From a66bc8d6ff118b8e2b5e3e2fbca0484b496e8ef2 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 9 Jan 2020 19:25:02 +0300 Subject: [PATCH] Resizing Detector: random=1 (/1.4 - x1.4), random=3 (/3.0 - x3.0), random=0 (no resizing). Added param: resize_step=32 to the [net] section: use resize_step=128 for 7 subsampling layers. --- include/darknet.h | 3 ++- src/detector.c | 19 +++++++++++-------- src/parser.c | 9 +++++---- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index a6d2c4a9..74cea7be 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -305,7 +305,7 @@ struct layer { float mask_scale; float class_scale; int bias_match; - int random; + float random; float ignore_thresh; float truth_thresh; float iou_thresh; @@ -664,6 +664,7 @@ typedef struct network { int blur; int mixup; float label_smooth_eps; + int resize_step; int letter_box; float angle; float aspect; diff --git a/src/detector.c b/src/detector.c index bb059c77..3a74a431 100644 --- a/src/detector.c +++ b/src/detector.c @@ -168,19 +168,22 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i //while(i*imgs < N*120){ while (get_current_batch(net) < net.max_batches) { if (l.random && count++ % 10 == 0) { - printf("Resizing\n"); - float random_val = rand_scale(1.4); // *x or /x - int dim_w = roundl(random_val*init_w / 32 + 1) * 32; - int dim_h = roundl(random_val*init_h / 32 + 1) * 32; + float rand_coef = 1.4; + if (l.random != 1.0) rand_coef = l.random; + printf("Resizing, random_coef = %.2f \n", rand_coef); + float random_val = rand_scale(rand_coef); // *x or /x + int dim_w = roundl(random_val*init_w / net.resize_step + 1) * net.resize_step; + int dim_h = roundl(random_val*init_h / net.resize_step + 1) * net.resize_step; + if (random_val < 1 && (dim_w > init_w || dim_h > init_h)) dim_w = init_w, dim_h = init_h; // at the beginning if (avg_loss < 0) { - dim_w = roundl(1.4*init_w / 32 + 1) * 32; - dim_h = roundl(1.4*init_h / 32 + 1) * 32; + dim_w = roundl(rand_coef*init_w / net.resize_step + 1) * net.resize_step; + dim_h = roundl(rand_coef*init_h / net.resize_step + 1) * net.resize_step; } - if (dim_w < 32) dim_w = 32; - if (dim_h < 32) dim_h = 32; + if (dim_w < net.resize_step) dim_w = net.resize_step; + if (dim_h < net.resize_step) dim_h = net.resize_step; printf("%d x %d \n", dim_w, dim_h); args.w = dim_w; diff --git a/src/parser.c b/src/parser.c index c2d9098b..173a4d43 100644 --- a/src/parser.c +++ b/src/parser.c @@ -439,7 +439,7 @@ layer parse_yolo(list *options, size_params params) l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.truth_thresh = option_find_float(options, "truth_thresh", 1); l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] - l.random = option_find_int_quiet(options, "random", 0); + l.random = option_find_float_quiet(options, "random", 0); char *map_file = option_find_str(options, "map", 0); if (map_file) l.map = read_map(map_file); @@ -541,7 +541,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.truth_thresh = option_find_float(options, "truth_thresh", 1); l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] - l.random = option_find_int_quiet(options, "random", 0); + l.random = option_find_float_quiet(options, "random", 0); char *map_file = option_find_str(options, "map", 0); if (map_file) l.map = read_map(map_file); @@ -590,7 +590,7 @@ layer parse_region(list *options, size_params params) l.thresh = option_find_float(options, "thresh", .5); l.classfix = option_find_int_quiet(options, "classfix", 0); l.absolute = option_find_int_quiet(options, "absolute", 0); - l.random = option_find_int_quiet(options, "random", 0); + l.random = option_find_float_quiet(options, "random", 0); l.coord_scale = option_find_float(options, "coord_scale", 1); l.object_scale = option_find_float(options, "object_scale", 1); @@ -639,7 +639,7 @@ detection_layer parse_detection(list *options, size_params params) layer.noobject_scale = option_find_float(options, "noobject_scale", 1); layer.class_scale = option_find_float(options, "class_scale", 1); layer.jitter = option_find_float(options, "jitter", .2); - layer.random = option_find_int_quiet(options, "random", 0); + layer.random = option_find_float_quiet(options, "random", 0); layer.reorg = option_find_int_quiet(options, "reorg", 0); return layer; } @@ -1060,6 +1060,7 @@ void parse_net_options(list *options, network *net) else if (mosaic) net->mixup = 3; net->letter_box = option_find_int_quiet(options, "letter_box", 0); net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); + net->resize_step = option_find_float_quiet(options, "resize_step", 32); net->angle = option_find_float_quiet(options, "angle", 0); net->aspect = option_find_float_quiet(options, "aspect", 1);