mirror of https://github.com/AlexeyAB/darknet.git
[net] mosaic_bound=1 fixed bboxes going out of bounds. Count rewritten bboxes. Changed self-adversarial-training.
This commit is contained in:
parent
cde72f8e0b
commit
6c6f04a9b3
|
@ -669,6 +669,8 @@ typedef struct network {
|
|||
float *output;
|
||||
learning_rate_policy policy;
|
||||
int benchmark_layers;
|
||||
int *total_bbox;
|
||||
int *rewritten_bbox;
|
||||
|
||||
float learning_rate;
|
||||
float learning_rate_min;
|
||||
|
@ -718,6 +720,7 @@ typedef struct network {
|
|||
float adversarial_lr;
|
||||
float max_chart_loss;
|
||||
int letter_box;
|
||||
int mosaic_bound;
|
||||
float angle;
|
||||
float aspect;
|
||||
float exposure;
|
||||
|
@ -894,6 +897,7 @@ typedef struct load_args {
|
|||
int track;
|
||||
int augment_speed;
|
||||
int letter_box;
|
||||
int mosaic_bound;
|
||||
int show_imgs;
|
||||
int dontuse_opencv;
|
||||
float jitter;
|
||||
|
|
17
src/data.c
17
src/data.c
|
@ -848,7 +848,7 @@ void blend_truth(float *new_truth, int boxes, float *old_truth)
|
|||
|
||||
void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, int h, float cut_x, float cut_y, int i_mixup,
|
||||
int left_shift, int right_shift, int top_shift, int bot_shift,
|
||||
int net_w, int net_h)
|
||||
int net_w, int net_h, int mosaic_bound)
|
||||
{
|
||||
const float lowest_w = 1.F / net_w;
|
||||
const float lowest_h = 1.F / net_h;
|
||||
|
@ -900,7 +900,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
|
|||
int top = (yb - hb / 2)*h;
|
||||
int bot = (yb + hb / 2)*h;
|
||||
|
||||
|
||||
if(mosaic_bound)
|
||||
{
|
||||
// fix out of Mosaic-bound
|
||||
float left_bound = 0, right_bound = 0, top_bound = 0, bot_bound = 0;
|
||||
|
@ -947,8 +947,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
|
|||
yb = ((float)(bot + top) / 2) / h;
|
||||
hb = ((float)(bot - top)) / h;
|
||||
}
|
||||
|
||||
/*
|
||||
else
|
||||
{
|
||||
// fix out of bound
|
||||
if (left < 0) {
|
||||
|
@ -980,7 +979,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
|
|||
top = (yb - hb / 2)*h;
|
||||
bot = (yb + hb / 2)*h;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
// leave only within the image
|
||||
if(left >= 0 && right <= w && top >= 0 && bot <= h &&
|
||||
|
@ -1004,7 +1003,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
|
|||
#include "http_stream.h"
|
||||
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup,
|
||||
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs)
|
||||
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int show_imgs)
|
||||
{
|
||||
const int random_index = random_gen();
|
||||
c = c ? c : 3;
|
||||
|
@ -1263,7 +1262,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
|
|||
}
|
||||
}
|
||||
|
||||
blend_truth_mosaic(d.y.vals[i], boxes, truth, w, h, cut_x[i], cut_y[i], i_mixup, left_shift, right_shift, top_shift, bot_shift, w, h);
|
||||
blend_truth_mosaic(d.y.vals[i], boxes, truth, w, h, cut_x[i], cut_y[i], i_mixup, left_shift, right_shift, top_shift, bot_shift, w, h, mosaic_bound);
|
||||
|
||||
free_image(ai);
|
||||
ai.data = d.X.vals[i];
|
||||
|
@ -1319,7 +1318,7 @@ void blend_images(image new_img, float alpha, image old_img, float beta)
|
|||
}
|
||||
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
|
||||
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs)
|
||||
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int show_imgs)
|
||||
{
|
||||
const int random_index = random_gen();
|
||||
c = c ? c : 3;
|
||||
|
@ -1534,7 +1533,7 @@ void *load_thread(void *ptr)
|
|||
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == DETECTION_DATA){
|
||||
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize,
|
||||
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.show_imgs);
|
||||
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.show_imgs);
|
||||
} else if (a.type == SWAG_DATA){
|
||||
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
|
||||
} else if (a.type == COMPARE_DATA){
|
||||
|
|
|
@ -87,7 +87,7 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
|||
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
||||
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
|
||||
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs);
|
||||
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int show_imgs);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv);
|
||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||
|
|
|
@ -145,6 +145,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.letter_box = net.letter_box;
|
||||
args.mosaic_bound = net.mosaic_bound;
|
||||
if (dont_show && show_imgs) show_imgs = 2;
|
||||
args.show_imgs = show_imgs;
|
||||
|
||||
|
|
|
@ -246,6 +246,9 @@ network make_network(int n)
|
|||
net.layers = (layer*)xcalloc(net.n, sizeof(layer));
|
||||
net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t));
|
||||
net.cur_iteration = (int*)xcalloc(1, sizeof(int));
|
||||
net.total_bbox = (int*)xcalloc(1, sizeof(int));
|
||||
net.rewritten_bbox = (int*)xcalloc(1, sizeof(int));
|
||||
*net.rewritten_bbox = *net.total_bbox = 0;
|
||||
#ifdef GPU
|
||||
net.input_gpu = (float**)xcalloc(1, sizeof(float*));
|
||||
net.truth_gpu = (float**)xcalloc(1, sizeof(float*));
|
||||
|
@ -366,6 +369,7 @@ float train_network_datum(network net, float *x, float *y)
|
|||
backward_network(net, state);
|
||||
float error = get_network_cost(net);
|
||||
//if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
|
||||
printf(" total_bbox = %d, rewritten_bbox = %f %% \n", *(state.net.total_bbox), 100 * (float)*(state.net.rewritten_bbox) / *(state.net.total_bbox));
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -1147,6 +1151,8 @@ void free_network(network net)
|
|||
free(net.steps);
|
||||
free(net.seen);
|
||||
free(net.cur_iteration);
|
||||
free(net.total_bbox);
|
||||
free(net.rewritten_bbox);
|
||||
|
||||
#ifdef GPU
|
||||
if (gpu_index >= 0) cuda_free(net.workspace);
|
||||
|
|
|
@ -348,6 +348,7 @@ void forward_backward_network_gpu(network net, float *x, float *y)
|
|||
cuda_free(state.delta);
|
||||
cuda_pull_array(*net.input_gpu, x, x_size);
|
||||
}
|
||||
printf(" total_bbox = %d, rewritten_bbox = %f %% \n", *(state.net.total_bbox), 100 * (float)*(state.net.rewritten_bbox) / *(state.net.total_bbox));
|
||||
}
|
||||
|
||||
float train_network_datum_gpu(network net, float *x, float *y)
|
||||
|
@ -356,7 +357,8 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||
if (net.adversarial_lr && rand_int(0, 1) == 1 && get_current_iteration(net) > net.burn_in) {
|
||||
net.adversarial = 1;
|
||||
float lr_old = net.learning_rate;
|
||||
float scale = 1.0 - (get_current_iteration(net) / ((float)net.max_batches));
|
||||
float scale = (get_current_iteration(net) / ((float)net.max_batches));
|
||||
//scale = sin(scale * M_PI);
|
||||
net.learning_rate = net.adversarial_lr * scale;
|
||||
layer l = net.layers[net.n - 1];
|
||||
int y_size = get_network_output_size(net)*net.batch;
|
||||
|
|
|
@ -1119,6 +1119,7 @@ void parse_net_options(list *options, network *net)
|
|||
else if (cutmix) net->mixup = 2;
|
||||
else if (mosaic) net->mixup = 3;
|
||||
net->letter_box = option_find_int_quiet(options, "letter_box", 0);
|
||||
net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0);
|
||||
net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
|
||||
net->resize_step = option_find_float_quiet(options, "resize_step", 32);
|
||||
net->attention = option_find_int_quiet(options, "attention", 0);
|
||||
|
|
|
@ -153,8 +153,12 @@ static inline float clip_value(float val, const float max_val)
|
|||
return val;
|
||||
}
|
||||
|
||||
ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, float max_delta)
|
||||
ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, float max_delta, int *rewritten_bbox)
|
||||
{
|
||||
if (delta[index + 0 * stride] || delta[index + 1 * stride] || delta[index + 2 * stride] || delta[index + 3 * stride]) {
|
||||
(*rewritten_bbox)++;
|
||||
}
|
||||
|
||||
ious all_ious = { 0 };
|
||||
// i - step in layer width
|
||||
// j - step in layer height
|
||||
|
@ -422,9 +426,23 @@ void forward_yolo_layer(const layer l, network_state state)
|
|||
if (scale > 0) scale = sqrt(scale);
|
||||
l.delta[obj_index] = scale * l.cls_normalizer * (0 - l.output[obj_index]);
|
||||
int cl_id;
|
||||
int found_object = 0;
|
||||
for (cl_id = 0; cl_id < l.classes; ++cl_id) {
|
||||
if(l.output[class_index + stride*cl_id] * l.output[obj_index] > 0.25)
|
||||
if (l.output[class_index + stride*cl_id] * l.output[obj_index] > 0.25) {
|
||||
l.delta[class_index + stride*cl_id] = scale * (0 - l.output[class_index + stride*cl_id]);
|
||||
found_object = 1;
|
||||
}
|
||||
}
|
||||
if (found_object) {
|
||||
// don't use this loop for adversarial attack drawing
|
||||
for (cl_id = 0; cl_id < l.classes; ++cl_id)
|
||||
if (l.output[class_index + stride*cl_id] * l.output[obj_index] < 0.25)
|
||||
l.delta[class_index + stride*cl_id] = scale * (1 - l.output[class_index + stride*cl_id]);
|
||||
|
||||
l.delta[box_index + 0 * stride] += scale * (0 - l.output[box_index + 0 * stride]);
|
||||
l.delta[box_index + 1 * stride] += scale * (0 - l.output[box_index + 1 * stride]);
|
||||
l.delta[box_index + 2 * stride] += scale * (0 - l.output[box_index + 2 * stride]);
|
||||
l.delta[box_index + 3 * stride] += scale * (0 - l.output[box_index + 3 * stride]);
|
||||
}
|
||||
}
|
||||
if (best_iou > l.truth_thresh) {
|
||||
|
@ -439,7 +457,8 @@ void forward_yolo_layer(const layer l, network_state state)
|
|||
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
|
||||
if (l.objectness_smooth) l.delta[class_index + stride*class_id] = class_multiplier * (iou_multiplier - l.output[class_index + stride*class_id]);
|
||||
box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
|
||||
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
|
||||
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox);
|
||||
(*state.net.total_bbox)++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -481,7 +500,8 @@ void forward_yolo_layer(const layer l, network_state state)
|
|||
|
||||
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
|
||||
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
|
||||
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
|
||||
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox);
|
||||
(*state.net.total_bbox)++;
|
||||
|
||||
// range is 0 <= 1
|
||||
tot_iou += all_ious.iou;
|
||||
|
@ -528,7 +548,8 @@ void forward_yolo_layer(const layer l, network_state state)
|
|||
|
||||
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
|
||||
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
|
||||
ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
|
||||
ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox);
|
||||
(*state.net.total_bbox)++;
|
||||
|
||||
// range is 0 <= 1
|
||||
tot_iou += all_ious.iou;
|
||||
|
|
Loading…
Reference in New Issue