Added mosaic=1 data augmentation for Detector. Fixed blur=. Fixed [scale_channels] for ASFF.

2019-12-04 17:42:32 +03:00 · 2019-12-04 17:42:32 +03:00 · 5ddf9c74a5
parent c6f0f7b22c
commit 5ddf9c74a5
2 changed files with 151 additions and 33 deletions
--- a/src/data.c
+++ b/src/data.c
@ -811,6 +811,73 @@ void blend_truth(float *new_truth, int boxes, float *old_truth)
    //printf("\n was %d bboxes, now %d bboxes \n", count_new_truth, t);
 }

+
+void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, int h, int cut_x, int cut_y, int i_mixup)
+{
+    const int t_size = 4 + 1;
+    int count_new_truth = 0;
+    int t;
+    for (t = 0; t < boxes; ++t) {
+        float x = new_truth[t*(4 + 1)];
+        if (!x) break;
+        count_new_truth++;
+
+    }
+    int new_t = count_new_truth;
+    for (t = count_new_truth; t < boxes; ++t) {
+        float *new_truth_ptr = new_truth + new_t*t_size;
+        float *old_truth_ptr = old_truth + (t - count_new_truth)*t_size;
+        float x = old_truth_ptr[0];
+        if (!x) break;
+
+        float xb = old_truth_ptr[0];
+        float yb = old_truth_ptr[1];
+        float wb = old_truth_ptr[2];
+        float hb = old_truth_ptr[3];
+
+        int left = (xb - wb / 2)*w;
+        int right = (xb + wb / 2)*w;
+        int top = (yb - hb / 2)*h;
+        int bot = (yb + hb / 2)*h;
+
+        if ((i_mixup == 0 && left < cut_x && top < cut_y) ||
+            (i_mixup == 1 && right > cut_x && top < cut_y) ||
+            (i_mixup == 2 && left < cut_x && bot > cut_y) ||
+            (i_mixup == 3 && right > cut_x && bot > cut_y))
+        {
+            if ((i_mixup == 0 || i_mixup == 2) && right > cut_x) {
+                float diff_x = (float)(right - cut_x) / w;
+                xb = xb - diff_x / 2;
+                wb = wb - diff_x;
+            }
+            if ((i_mixup == 1 || i_mixup == 3) && left < cut_x) {
+                float diff_x = (float)(cut_x - left) / w;
+                xb = xb + diff_x / 2;
+                wb = wb - diff_x;
+            }
+
+            if ((i_mixup == 0 || i_mixup == 1) && bot > cut_y) {
+                float diff_y = (float)(bot - cut_y) / h;
+                yb = yb - diff_y / 2;
+                hb = hb - diff_y;
+            }
+            if ((i_mixup == 2 || i_mixup == 3) && top < cut_y) {
+                float diff_y = (float)(cut_y - top) / h;
+                yb = yb + diff_y / 2;
+                hb = hb - diff_y;
+            }
+
+            new_truth_ptr[0] = xb;
+            new_truth_ptr[1] = yb;
+            new_truth_ptr[2] = wb;
+            new_truth_ptr[3] = hb;
+            new_truth_ptr[4] = old_truth_ptr[4];
+            new_t++;
+        }
+    }
+    //printf("\n was %d bboxes, now %d bboxes \n", count_new_truth, t);
+}
+
 #ifdef OPENCV

 #include "http_stream.h"
@ -820,18 +887,21 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
 {
    const int random_index = random_gen();
    c = c ? c : 3;
-    char **random_paths;
-    char **mixup_random_paths = NULL;
-    if (track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
-    else random_paths = get_random_paths(paths, n, m);

-    int mixup = use_mixup ? random_gen() % 2 : 0;
-    //printf("\n mixup = %d \n", mixup);
-    if (mixup) {
-        if (track) mixup_random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
-        else mixup_random_paths = get_random_paths(paths, n, m);
-    }
+    if (random_gen() % 2 == 0) use_mixup = 0;
    int i;
+
+    int *cut_x = NULL, *cut_y = NULL;
+    if (use_mixup == 3) {
+        cut_x = (int*)calloc(n, sizeof(int));
+        cut_y = (int*)calloc(n, sizeof(int));
+        const float min_offset = 0.2; // 20%
+        for (i = 0; i < n; ++i) {
+            cut_x[i] = rand_int(w*min_offset, w*(1 - min_offset));
+            cut_y[i] = rand_int(h*min_offset, h*(1 - min_offset));
+        }
+    }
+
    data d = {0};
    d.shallow = 0;

@ -845,12 +915,16 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo

    d.y = make_matrix(n, 5*boxes);
    int i_mixup = 0;
-    for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
+    for (i_mixup = 0; i_mixup <= use_mixup; i_mixup++) {
        if (i_mixup) augmentation_calculated = 0;   // recalculate augmentation for the 2nd sequence if(track==1)

+        char **random_paths;
+        if (track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed);
+        else random_paths = get_random_paths(paths, n, m);
+
        for (i = 0; i < n; ++i) {
            float *truth = (float*)calloc(5 * boxes, sizeof(float));
-            const char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];
+            const char *filename = random_paths[i];

            int flag = (c >= 3);
            mat_cv *src;
@ -882,10 +956,12 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo

                flip = use_flip ? random_gen() % 2 : 0;

-                int tmp_blur = rand_int(0, 2);  // 0 - disable, 1 - blur background, 2 - blur the whole image
-                if (tmp_blur == 0) blur = 0;
-                else if (tmp_blur == 1) blur = 1;
-                else blur = use_blur;
+                if (use_blur) {
+                    int tmp_blur = rand_int(0, 2);  // 0 - disable, 1 - blur background, 2 - blur the whole image
+                    if (tmp_blur == 0) blur = 0;
+                    else if (tmp_blur == 1) blur = 1;
+                    else blur = use_blur;
+                }
            }

            int pleft = rand_precalc_random(-dw, dw, r1);
@ -935,27 +1011,67 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
            if (min_w_h / 8 < blur && blur > 1) blur = min_w_h / 8;   // disable blur if one of the objects is too small

            image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp,
-                blur, boxes, truth);
+                blur, boxes, d.y.vals[i]);

-            if (i_mixup) {
-                image old_img = ai;
-                old_img.data = d.X.vals[i];
-                //show_image(ai, "new");
-                //show_image(old_img, "old");
-                //wait_until_press_key_cv();
-                blend_images_cv(ai, 0.5, old_img, 0.5);
-                blend_truth(truth, boxes, d.y.vals[i]);
-                free_image(old_img);
+            if (use_mixup == 0) {
+                d.X.vals[i] = ai.data;
+                memcpy(d.y.vals[i], truth, 5 * boxes * sizeof(float));
+            }
+            else if (use_mixup == 1) {
+                if (i_mixup == 0) {
+                    d.X.vals[i] = ai.data;
+                    memcpy(d.y.vals[i], truth, 5 * boxes * sizeof(float));
+                }
+                else if (i_mixup == 1) {
+                    image old_img = make_empty_image(w, h, c);
+                    old_img.data = d.X.vals[i];
+                    //show_image(ai, "new");
+                    //show_image(old_img, "old");
+                    //wait_until_press_key_cv();
+                    blend_images_cv(ai, 0.5, old_img, 0.5);
+                    blend_truth(d.y.vals[i], boxes, truth);
+                    free_image(old_img);
+                    d.X.vals[i] = ai.data;
+                }
+            }
+            else if (use_mixup == 3) {
+                if (i_mixup == 0) {
+                    image tmp_img = make_image(w, h, c);
+                    d.X.vals[i] = tmp_img.data;
+                }
+
+                int k, x, y;
+                for (k = 0; k < c; ++k) {
+                    for (y = 0; y < h; ++y) {
+                        int j = y*w + k*w*h;
+                        if (i_mixup == 0 && y < cut_y[i]) {
+                            memcpy(&d.X.vals[i][j + 0], &ai.data[j + 0], cut_x[i] * sizeof(float));
+                        }
+                        if (i_mixup == 1 && y < cut_y[i]) {
+                            memcpy(&d.X.vals[i][j + cut_x[i]], &ai.data[j + cut_x[i]], (w-cut_x[i]) * sizeof(float));
+                        }
+                        if (i_mixup == 2 && y >= cut_y[i]) {
+                            memcpy(&d.X.vals[i][j + 0], &ai.data[j + 0], cut_x[i] * sizeof(float));
+                        }
+                        if (i_mixup == 3 && y >= cut_y[i]) {
+                            memcpy(&d.X.vals[i][j + cut_x[i]], &ai.data[j + cut_x[i]], (w - cut_x[i]) * sizeof(float));
+                        }
+                    }
+                }
+
+                blend_truth_mosaic(d.y.vals[i], boxes, truth, w, h, cut_x[i], cut_y[i], i_mixup);
+
+                free_image(ai);
+                ai.data = d.X.vals[i];
            }

-            d.X.vals[i] = ai.data;
-            memcpy(d.y.vals[i], truth, 5*boxes * sizeof(float));

-            if (show_imgs)// && i_mixup)   // delete i_mixup
+            if (show_imgs && i_mixup == use_mixup)   // delete i_mixup
            {
                image tmp_ai = copy_image(ai);
                char buff[1000];
-                sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg((char*)filename), random_gen());
+                //sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg((char*)filename), random_gen());
+                sprintf(buff, "aug_%d_%d_%d", random_index, i, random_gen());
                int t;
                for (t = 0; t < boxes; ++t) {
                    box b = float_to_box_stride(d.y.vals[i] + t*(4 + 1), 1);
@ -982,9 +1098,10 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
            release_mat(&src);
            free(truth);
        }
+        if (random_paths) free(random_paths);
    }
-    free(random_paths);
-    if(mixup_random_paths) free(mixup_random_paths);
+
+
    return d;
 }
 #else    // OPENCV
--- a/src/scale_channels_layer.c
+++ b/src/scale_channels_layer.c
@ -20,7 +20,8 @@ layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w
    l.out_w = w2;
    l.out_h = h2;
    l.out_c = c2;
-    assert(l.out_c == l.c);
+    if (!l.scale_wh) assert(l.out_c == l.c);
+    else assert(l.out_w == l.w && l.out_h == l.h);

    l.outputs = l.out_w*l.out_h*l.out_c;
    l.inputs = l.outputs;