From b8fe630119fea81200f6ca4641ce2514d893df04 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 25 Dec 2019 20:30:56 +0300 Subject: [PATCH] counters_per_class affects the bbox too --- src/gaussian_yolo_layer.c | 19 ++++++++++++------- src/yolo_layer.c | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 82053e25..999e7990 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -456,7 +456,8 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); + const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); } } } @@ -496,15 +497,17 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if(mask_n >= 0){ + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; + int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); + const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.label_smooth_eps, l.classes_multipliers); @@ -527,15 +530,17 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) // iou, n if (iou > l.iou_thresh) { + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; + int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); + const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.label_smooth_eps, l.classes_multipliers); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 919c26f0..7c7e7144 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -363,7 +363,8 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); } } } @@ -400,8 +401,12 @@ void forward_yolo_layer(const layer l, network_state state) int mask_n = int_index(l.mask, best_n, l.n); if (mask_n >= 0) { + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; + ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); // range is 0 <= 1 tot_iou += all_ious.iou; @@ -420,8 +425,6 @@ void forward_yolo_layer(const layer l, network_state state) avg_obj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers); @@ -442,8 +445,12 @@ void forward_yolo_layer(const layer l, network_state state) // iou, n if (iou > l.iou_thresh) { + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; + int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer, l.iou_loss, 1); + const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; + ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); // range is 0 <= 1 tot_iou += all_ious.iou; @@ -462,8 +469,6 @@ void forward_yolo_layer(const layer l, network_state state) avg_obj += l.output[obj_index]; l.delta[obj_index] = l.cls_normalizer * (1 - l.output[obj_index]); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers);