[net] mosaic_bound=1 fixed bboxes going out of bounds. Count rewritten bboxes. Changed self-adversarial-training.

This commit is contained in:
AlexeyAB 2020-06-07 04:41:01 +03:00
parent cde72f8e0b
commit 6c6f04a9b3
8 changed files with 50 additions and 16 deletions

View File

@ -669,6 +669,8 @@ typedef struct network {
float *output;
learning_rate_policy policy;
int benchmark_layers;
int *total_bbox;
int *rewritten_bbox;
float learning_rate;
float learning_rate_min;
@ -718,6 +720,7 @@ typedef struct network {
float adversarial_lr;
float max_chart_loss;
int letter_box;
int mosaic_bound;
float angle;
float aspect;
float exposure;
@ -894,6 +897,7 @@ typedef struct load_args {
int track;
int augment_speed;
int letter_box;
int mosaic_bound;
int show_imgs;
int dontuse_opencv;
float jitter;

View File

@ -848,7 +848,7 @@ void blend_truth(float *new_truth, int boxes, float *old_truth)
void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, int h, float cut_x, float cut_y, int i_mixup,
int left_shift, int right_shift, int top_shift, int bot_shift,
int net_w, int net_h)
int net_w, int net_h, int mosaic_bound)
{
const float lowest_w = 1.F / net_w;
const float lowest_h = 1.F / net_h;
@ -900,7 +900,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
int top = (yb - hb / 2)*h;
int bot = (yb + hb / 2)*h;
if(mosaic_bound)
{
// fix out of Mosaic-bound
float left_bound = 0, right_bound = 0, top_bound = 0, bot_bound = 0;
@ -947,8 +947,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
yb = ((float)(bot + top) / 2) / h;
hb = ((float)(bot - top)) / h;
}
/*
else
{
// fix out of bound
if (left < 0) {
@ -980,7 +979,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
top = (yb - hb / 2)*h;
bot = (yb + hb / 2)*h;
}
*/
// leave only within the image
if(left >= 0 && right <= w && top >= 0 && bot <= h &&
@ -1004,7 +1003,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
#include "http_stream.h"
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup,
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs)
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int show_imgs)
{
const int random_index = random_gen();
c = c ? c : 3;
@ -1263,7 +1262,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
}
}
blend_truth_mosaic(d.y.vals[i], boxes, truth, w, h, cut_x[i], cut_y[i], i_mixup, left_shift, right_shift, top_shift, bot_shift, w, h);
blend_truth_mosaic(d.y.vals[i], boxes, truth, w, h, cut_x[i], cut_y[i], i_mixup, left_shift, right_shift, top_shift, bot_shift, w, h, mosaic_bound);
free_image(ai);
ai.data = d.X.vals[i];
@ -1319,7 +1318,7 @@ void blend_images(image new_img, float alpha, image old_img, float beta)
}
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs)
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int show_imgs)
{
const int random_index = random_gen();
c = c ? c : 3;
@ -1534,7 +1533,7 @@ void *load_thread(void *ptr)
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize,
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.show_imgs);
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.show_imgs);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){

View File

@ -87,7 +87,7 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs);
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int show_imgs);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);

View File

@ -145,6 +145,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
args.saturation = net.saturation;
args.hue = net.hue;
args.letter_box = net.letter_box;
args.mosaic_bound = net.mosaic_bound;
if (dont_show && show_imgs) show_imgs = 2;
args.show_imgs = show_imgs;

View File

@ -246,6 +246,9 @@ network make_network(int n)
net.layers = (layer*)xcalloc(net.n, sizeof(layer));
net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t));
net.cur_iteration = (int*)xcalloc(1, sizeof(int));
net.total_bbox = (int*)xcalloc(1, sizeof(int));
net.rewritten_bbox = (int*)xcalloc(1, sizeof(int));
*net.rewritten_bbox = *net.total_bbox = 0;
#ifdef GPU
net.input_gpu = (float**)xcalloc(1, sizeof(float*));
net.truth_gpu = (float**)xcalloc(1, sizeof(float*));
@ -366,6 +369,7 @@ float train_network_datum(network net, float *x, float *y)
backward_network(net, state);
float error = get_network_cost(net);
//if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
printf(" total_bbox = %d, rewritten_bbox = %f %% \n", *(state.net.total_bbox), 100 * (float)*(state.net.rewritten_bbox) / *(state.net.total_bbox));
return error;
}
@ -1147,6 +1151,8 @@ void free_network(network net)
free(net.steps);
free(net.seen);
free(net.cur_iteration);
free(net.total_bbox);
free(net.rewritten_bbox);
#ifdef GPU
if (gpu_index >= 0) cuda_free(net.workspace);

View File

@ -348,6 +348,7 @@ void forward_backward_network_gpu(network net, float *x, float *y)
cuda_free(state.delta);
cuda_pull_array(*net.input_gpu, x, x_size);
}
printf(" total_bbox = %d, rewritten_bbox = %f %% \n", *(state.net.total_bbox), 100 * (float)*(state.net.rewritten_bbox) / *(state.net.total_bbox));
}
float train_network_datum_gpu(network net, float *x, float *y)
@ -356,7 +357,8 @@ float train_network_datum_gpu(network net, float *x, float *y)
if (net.adversarial_lr && rand_int(0, 1) == 1 && get_current_iteration(net) > net.burn_in) {
net.adversarial = 1;
float lr_old = net.learning_rate;
float scale = 1.0 - (get_current_iteration(net) / ((float)net.max_batches));
float scale = (get_current_iteration(net) / ((float)net.max_batches));
//scale = sin(scale * M_PI);
net.learning_rate = net.adversarial_lr * scale;
layer l = net.layers[net.n - 1];
int y_size = get_network_output_size(net)*net.batch;

View File

@ -1119,6 +1119,7 @@ void parse_net_options(list *options, network *net)
else if (cutmix) net->mixup = 2;
else if (mosaic) net->mixup = 3;
net->letter_box = option_find_int_quiet(options, "letter_box", 0);
net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0);
net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
net->resize_step = option_find_float_quiet(options, "resize_step", 32);
net->attention = option_find_int_quiet(options, "attention", 0);

View File

@ -153,8 +153,12 @@ static inline float clip_value(float val, const float max_val)
return val;
}
ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, float max_delta)
ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, float max_delta, int *rewritten_bbox)
{
if (delta[index + 0 * stride] || delta[index + 1 * stride] || delta[index + 2 * stride] || delta[index + 3 * stride]) {
(*rewritten_bbox)++;
}
ious all_ious = { 0 };
// i - step in layer width
// j - step in layer height
@ -422,9 +426,23 @@ void forward_yolo_layer(const layer l, network_state state)
if (scale > 0) scale = sqrt(scale);
l.delta[obj_index] = scale * l.cls_normalizer * (0 - l.output[obj_index]);
int cl_id;
int found_object = 0;
for (cl_id = 0; cl_id < l.classes; ++cl_id) {
if(l.output[class_index + stride*cl_id] * l.output[obj_index] > 0.25)
if (l.output[class_index + stride*cl_id] * l.output[obj_index] > 0.25) {
l.delta[class_index + stride*cl_id] = scale * (0 - l.output[class_index + stride*cl_id]);
found_object = 1;
}
}
if (found_object) {
// don't use this loop for adversarial attack drawing
for (cl_id = 0; cl_id < l.classes; ++cl_id)
if (l.output[class_index + stride*cl_id] * l.output[obj_index] < 0.25)
l.delta[class_index + stride*cl_id] = scale * (1 - l.output[class_index + stride*cl_id]);
l.delta[box_index + 0 * stride] += scale * (0 - l.output[box_index + 0 * stride]);
l.delta[box_index + 1 * stride] += scale * (0 - l.output[box_index + 1 * stride]);
l.delta[box_index + 2 * stride] += scale * (0 - l.output[box_index + 2 * stride]);
l.delta[box_index + 3 * stride] += scale * (0 - l.output[box_index + 3 * stride]);
}
}
if (best_iou > l.truth_thresh) {
@ -439,7 +457,8 @@ void forward_yolo_layer(const layer l, network_state state)
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
if (l.objectness_smooth) l.delta[class_index + stride*class_id] = class_multiplier * (iou_multiplier - l.output[class_index + stride*class_id]);
box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox);
(*state.net.total_bbox)++;
}
}
}
@ -481,7 +500,8 @@ void forward_yolo_layer(const layer l, network_state state)
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox);
(*state.net.total_bbox)++;
// range is 0 <= 1
tot_iou += all_ious.iou;
@ -528,7 +548,8 @@ void forward_yolo_layer(const layer l, network_state state)
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox);
(*state.net.total_bbox)++;
// range is 0 <= 1
tot_iou += all_ious.iou;