Resizing Detector: random=1 (/1.4 - x1.4), random=3 (/3.0 - x3.0), random=0 (no resizing). Added param: resize_step=32 to the [net] section: use resize_step=128 for 7 subsampling layers.

This commit is contained in:
AlexeyAB 2020-01-09 19:25:02 +03:00
parent 6878ecc2e2
commit a66bc8d6ff
3 changed files with 18 additions and 13 deletions

View File

@ -305,7 +305,7 @@ struct layer {
float mask_scale; float mask_scale;
float class_scale; float class_scale;
int bias_match; int bias_match;
int random; float random;
float ignore_thresh; float ignore_thresh;
float truth_thresh; float truth_thresh;
float iou_thresh; float iou_thresh;
@ -664,6 +664,7 @@ typedef struct network {
int blur; int blur;
int mixup; int mixup;
float label_smooth_eps; float label_smooth_eps;
int resize_step;
int letter_box; int letter_box;
float angle; float angle;
float aspect; float aspect;

View File

@ -168,19 +168,22 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
//while(i*imgs < N*120){ //while(i*imgs < N*120){
while (get_current_batch(net) < net.max_batches) { while (get_current_batch(net) < net.max_batches) {
if (l.random && count++ % 10 == 0) { if (l.random && count++ % 10 == 0) {
printf("Resizing\n"); float rand_coef = 1.4;
float random_val = rand_scale(1.4); // *x or /x if (l.random != 1.0) rand_coef = l.random;
int dim_w = roundl(random_val*init_w / 32 + 1) * 32; printf("Resizing, random_coef = %.2f \n", rand_coef);
int dim_h = roundl(random_val*init_h / 32 + 1) * 32; float random_val = rand_scale(rand_coef); // *x or /x
int dim_w = roundl(random_val*init_w / net.resize_step + 1) * net.resize_step;
int dim_h = roundl(random_val*init_h / net.resize_step + 1) * net.resize_step;
if (random_val < 1 && (dim_w > init_w || dim_h > init_h)) dim_w = init_w, dim_h = init_h;
// at the beginning // at the beginning
if (avg_loss < 0) { if (avg_loss < 0) {
dim_w = roundl(1.4*init_w / 32 + 1) * 32; dim_w = roundl(rand_coef*init_w / net.resize_step + 1) * net.resize_step;
dim_h = roundl(1.4*init_h / 32 + 1) * 32; dim_h = roundl(rand_coef*init_h / net.resize_step + 1) * net.resize_step;
} }
if (dim_w < 32) dim_w = 32; if (dim_w < net.resize_step) dim_w = net.resize_step;
if (dim_h < 32) dim_h = 32; if (dim_h < net.resize_step) dim_h = net.resize_step;
printf("%d x %d \n", dim_w, dim_h); printf("%d x %d \n", dim_w, dim_h);
args.w = dim_w; args.w = dim_w;

View File

@ -439,7 +439,7 @@ layer parse_yolo(list *options, size_params params)
l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
l.truth_thresh = option_find_float(options, "truth_thresh", 1); l.truth_thresh = option_find_float(options, "truth_thresh", 1);
l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
l.random = option_find_int_quiet(options, "random", 0); l.random = option_find_float_quiet(options, "random", 0);
char *map_file = option_find_str(options, "map", 0); char *map_file = option_find_str(options, "map", 0);
if (map_file) l.map = read_map(map_file); if (map_file) l.map = read_map(map_file);
@ -541,7 +541,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
l.truth_thresh = option_find_float(options, "truth_thresh", 1); l.truth_thresh = option_find_float(options, "truth_thresh", 1);
l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
l.random = option_find_int_quiet(options, "random", 0); l.random = option_find_float_quiet(options, "random", 0);
char *map_file = option_find_str(options, "map", 0); char *map_file = option_find_str(options, "map", 0);
if (map_file) l.map = read_map(map_file); if (map_file) l.map = read_map(map_file);
@ -590,7 +590,7 @@ layer parse_region(list *options, size_params params)
l.thresh = option_find_float(options, "thresh", .5); l.thresh = option_find_float(options, "thresh", .5);
l.classfix = option_find_int_quiet(options, "classfix", 0); l.classfix = option_find_int_quiet(options, "classfix", 0);
l.absolute = option_find_int_quiet(options, "absolute", 0); l.absolute = option_find_int_quiet(options, "absolute", 0);
l.random = option_find_int_quiet(options, "random", 0); l.random = option_find_float_quiet(options, "random", 0);
l.coord_scale = option_find_float(options, "coord_scale", 1); l.coord_scale = option_find_float(options, "coord_scale", 1);
l.object_scale = option_find_float(options, "object_scale", 1); l.object_scale = option_find_float(options, "object_scale", 1);
@ -639,7 +639,7 @@ detection_layer parse_detection(list *options, size_params params)
layer.noobject_scale = option_find_float(options, "noobject_scale", 1); layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
layer.class_scale = option_find_float(options, "class_scale", 1); layer.class_scale = option_find_float(options, "class_scale", 1);
layer.jitter = option_find_float(options, "jitter", .2); layer.jitter = option_find_float(options, "jitter", .2);
layer.random = option_find_int_quiet(options, "random", 0); layer.random = option_find_float_quiet(options, "random", 0);
layer.reorg = option_find_int_quiet(options, "reorg", 0); layer.reorg = option_find_int_quiet(options, "reorg", 0);
return layer; return layer;
} }
@ -1060,6 +1060,7 @@ void parse_net_options(list *options, network *net)
else if (mosaic) net->mixup = 3; else if (mosaic) net->mixup = 3;
net->letter_box = option_find_int_quiet(options, "letter_box", 0); net->letter_box = option_find_int_quiet(options, "letter_box", 0);
net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
net->resize_step = option_find_float_quiet(options, "resize_step", 32);
net->angle = option_find_float_quiet(options, "angle", 0); net->angle = option_find_float_quiet(options, "angle", 0);
net->aspect = option_find_float_quiet(options, "aspect", 1); net->aspect = option_find_float_quiet(options, "aspect", 1);