diff --git a/include/darknet.h b/include/darknet.h index ef085960..940d9808 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -822,6 +822,7 @@ typedef struct load_args { int augment_speed; int letter_box; int show_imgs; + int dontuse_opencv; float jitter; int flip; int blur; diff --git a/scripts/README.md b/scripts/README.md index c38626ec..1cefec95 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,6 +2,8 @@ ### Datasets: +25 thousand datasets on Kaggle: https://www.kaggle.com/datasets + BDD100K - Diverse Driving Video: https://bair.berkeley.edu/blog/2018/05/30/bdd/ Pascal VOC: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html diff --git a/src/classifier.c b/src/classifier.c index 59ae3e94..e97e9cf4 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -27,7 +27,7 @@ float *get_regression_values(char **labels, int n) return v; } -void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int mjpeg_port, int calc_topk, int show_imgs) +void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dontuse_opencv, int dont_show, int mjpeg_port, int calc_topk, int show_imgs) { int i; @@ -81,6 +81,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, args.threads = 32; args.hierarchy = net.hierarchy; + args.dontuse_opencv = dontuse_opencv; args.min = net.min_crop; args.max = net.max_crop; args.flip = net.flip; @@ -112,7 +113,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int img_size = 1000; char windows_name[100]; sprintf(windows_name, "average loss (id:%d)", random_gen()); - img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show); + if (!dontuse_opencv) img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show); #endif //OPENCV data train; @@ -176,7 +177,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); #ifdef OPENCV - draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port); + if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port); #endif // OPENCV if (i >= (iter_save + 1000)) { @@ -1298,6 +1299,7 @@ void run_classifier(int argc, char **argv) } int dont_show = find_arg(argc, argv, "-dont_show"); + int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv"); int show_imgs = find_arg(argc, argv, "-show_imgs"); int calc_topk = find_arg(argc, argv, "-topk"); int cam_index = find_int_arg(argc, argv, "-c", 0); @@ -1311,7 +1313,7 @@ void run_classifier(int argc, char **argv) int layer = layer_s ? atoi(layer_s) : -1; if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); - else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dont_show, mjpeg_port, calc_topk, show_imgs); + else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs); else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); diff --git a/src/data.c b/src/data.c index 2053030b..6fa37846 100644 --- a/src/data.c +++ b/src/data.c @@ -142,7 +142,7 @@ matrix load_image_paths(char **paths, int n, int w, int h) return X; } -matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure) +matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv) { int i; matrix X; @@ -152,7 +152,10 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int for(i = 0; i < n; ++i){ int size = w > h ? w : h; - image im = load_image_color(paths[i], 0, 0); + image im; + if(dontuse_opencv) im = load_image_stb_resize(paths[i], 0, 0, 3); + else im = load_image_color(paths[i], 0, 0); + image crop = random_augment_image(im, angle, aspect, min, max, size); int flip = use_flip ? random_gen() % 2 : 0; if (flip) @@ -1362,7 +1365,7 @@ void *load_thread(void *ptr) if (a.type == OLD_CLASSIFICATION_DATA){ *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); } else if (a.type == CLASSIFICATION_DATA){ - *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps); + *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps, a.dontuse_opencv); } else if (a.type == SUPER_DATA){ *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); } else if (a.type == WRITING_DATA){ @@ -1507,20 +1510,20 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale) return d; } -data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps) +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv) { char **paths_stored = paths; if(m) paths = get_random_paths(paths, n, m); data d = {0}; d.shallow = 0; - d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); + d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv); d.y = load_labels_paths(paths, n, labels, k, hierarchy, label_smooth_eps); if (use_mixup && rand_int(0, 1)) { char **paths_mix = get_random_paths(paths_stored, n, m); data d2 = { 0 }; d2.shallow = 0; - d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); + d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv); d2.y = load_labels_paths(paths_mix, n, labels, k, hierarchy, label_smooth_eps); free(paths_mix); @@ -1530,12 +1533,12 @@ data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *h d4.shallow = 0; if (use_mixup >= 3) { char **paths_mix3 = get_random_paths(paths_stored, n, m); - d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); + d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv); d3.y = load_labels_paths(paths_mix3, n, labels, k, hierarchy, label_smooth_eps); free(paths_mix3); char **paths_mix4 = get_random_paths(paths_stored, n, m); - d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); + d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv); d4.y = load_labels_paths(paths_mix4, n, labels, k, hierarchy, label_smooth_eps); free(paths_mix4); } @@ -1706,7 +1709,7 @@ data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int d.w = w; d.h = h; d.shallow = 0; - d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); + d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, 0); d.y = load_tags_paths(paths, n, k); if(m) free(paths); return d; diff --git a/src/data.h b/src/data.h index f675ae82..e430363d 100644 --- a/src/data.h +++ b/src/data.h @@ -89,9 +89,9 @@ data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs); data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); -matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); +matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv); data load_data_super(char **paths, int n, int m, int w, int h, int scale); -data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps); +data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv); data load_go(char *filename); box_label *read_boxes(char *filename, int *n); diff --git a/src/image.c b/src/image.c index c24cb5dc..d62dd2f6 100644 --- a/src/image.c +++ b/src/image.c @@ -1419,6 +1419,18 @@ image load_image_stb(char *filename, int channels) return im; } +image load_image_stb_resize(char *filename, int w, int h, int c) +{ + image out = load_image_stb(filename, c); // without OpenCV + + if ((h && w) && (h != out.h || w != out.w)) { + image resized = resize_image(out, w, h); + free_image(out); + out = resized; + } + return out; +} + image load_image(char *filename, int w, int h, int c) { #ifdef OPENCV diff --git a/src/image.h b/src/image.h index 14792c9b..58897794 100644 --- a/src/image.h +++ b/src/image.h @@ -83,6 +83,7 @@ image float_to_image_scaled(int w, int h, int c, float *data); image float_to_image(int w, int h, int c, float *data); image copy_image(image p); image load_image(char *filename, int w, int h, int c); +image load_image_stb_resize(char *filename, int w, int h, int c); //LIB_API image load_image_color(char *filename, int w, int h); image **load_alphabet(); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index afcd86ce..d678bd90 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1210,15 +1210,15 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h, if (blur) { cv::Mat dst(sized.size(), sized.type()); if (blur == 1) { - //cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); - cv::bilateralFilter(sized, dst, 17, 75, 75); + cv::GaussianBlur(sized, dst, cv::Size(17, 17), 0); + //cv::bilateralFilter(sized, dst, 17, 75, 75); } else { int ksize = (blur / 2) * 2 + 1; cv::Size kernel_size = cv::Size(ksize, ksize); - //cv::GaussianBlur(sized, dst, kernel_size, 0); + cv::GaussianBlur(sized, dst, kernel_size, 0); //cv::medianBlur(sized, dst, ksize); - cv::bilateralFilter(sized, dst, ksize, 75, 75); + //cv::bilateralFilter(sized, dst, ksize, 75, 75); // sharpen //cv::Mat img_tmp; @@ -1274,7 +1274,9 @@ extern "C" image blur_image(image src_img, int ksize) { cv::Mat src = image_to_mat(src_img); cv::Mat dst; - cv::bilateralFilter(src, dst, ksize, 75, 75); + cv::Size kernel_size = cv::Size(ksize, ksize); + cv::GaussianBlur(src, dst, kernel_size, 0); + //cv::bilateralFilter(src, dst, ksize, 75, 75); image dst_img = mat_to_image(dst); return dst_img; }