From ce2ae2c5c3849cdb6f9b66680c2ef0c2f9d2dad2 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Tue, 7 Jul 2020 02:49:43 +0300
Subject: [PATCH] put embeddings to the detections

---
 include/darknet.h |  3 ++-
 src/layer.c       |  1 +
 src/network.c     | 22 +++++++++++++++++++---
 src/parser.c      |  6 ++++--
 src/yolo_layer.c  | 15 +++++++++------
 5 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/include/darknet.h b/include/darknet.h
index 9cfd2ac4..c278c2ff 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -300,7 +300,8 @@ struct layer {
     int softmax;
     int classes;
     int detection;
-    layer *embedding_layer;
+    int embedding_layer_id;
+    float *embedding_output;
     int embedding_size;
     int coords;
     int background;
diff --git a/src/layer.c b/src/layer.c
index 3dcef7ac..ae60254b 100644
--- a/src/layer.c
+++ b/src/layer.c
@@ -78,6 +78,7 @@ void free_layer_custom(layer l, int keep_cudnn_desc)
     if (l.cos_sim)            free(l.cos_sim);
     if (l.exp_cos_sim)        free(l.exp_cos_sim);
     if (l.p_constrastive)     free(l.p_constrastive);
+    if (l.embedding_output)   free(l.embedding_output);
     if (l.state)              free(l.state);
     if (l.prev_state)         free(l.prev_state);
     if (l.forgot_state)       free(l.forgot_state);
diff --git a/src/network.c b/src/network.c
index aa68a642..81349aba 100644
--- a/src/network.c
+++ b/src/network.c
@@ -768,8 +768,16 @@ int num_detections_batch(network *net, float thresh, int batch)
 
 detection *make_network_boxes(network *net, float thresh, int *num)
 {
-    layer l = net->layers[net->n - 1];
     int i;
+    layer l = net->layers[net->n - 1];
+    for (i = 0; i < net->n; ++i) {
+        layer l_tmp = net->layers[i];
+        if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) {
+            l = l_tmp;
+            break;
+        }
+    }
+
     int nboxes = num_detections(net, thresh);
     if (num) *num = nboxes;
     detection* dets = (detection*)xcalloc(nboxes, sizeof(detection));
@@ -782,7 +790,7 @@ detection *make_network_boxes(network *net, float thresh, int *num)
         if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
         else dets[i].mask = NULL;
 
-        if(l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
+        if(l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
         else dets[i].embeddings = NULL;
     }
     return dets;
@@ -792,6 +800,14 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba
 {
     int i;
     layer l = net->layers[net->n - 1];
+    for (i = 0; i < net->n; ++i) {
+        layer l_tmp = net->layers[i];
+        if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) {
+            l = l_tmp;
+            break;
+        }
+    }
+
     int nboxes = num_detections_batch(net, thresh, batch);
     assert(num != NULL);
     *num = nboxes;
@@ -805,7 +821,7 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba
         if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
         else dets[i].mask = NULL;
 
-        if (l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
+        if (l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
         else dets[i].embeddings = NULL;
     }
     return dets;
diff --git a/src/parser.c b/src/parser.c
index b11997d7..1e5dec4a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -491,8 +491,10 @@ layer parse_yolo(list *options, size_params params)
     if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id;
     if (embedding_layer_id != 999999) {
         printf(" embedding_layer_id = %d, ", embedding_layer_id);
-        l.embedding_layer = &params.net.layers[embedding_layer_id];
-        l.embedding_size = l.embedding_layer->n / l.n;
+        layer le = params.net.layers[embedding_layer_id];
+        l.embedding_layer_id = embedding_layer_id;
+        l.embedding_output = (float*)xcalloc(le.batch * le.outputs, sizeof(float));
+        l.embedding_size = le.n / l.n;
         printf(" embedding_size = %d \n", l.embedding_size);
     }
 
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index 970a3897..82bd451f 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -841,9 +841,8 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
                 dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
                 dets[count].objectness = objectness;
                 dets[count].classes = l.classes;
-                if (l.embedding_layer) {
-                    layer le = *l.embedding_layer;
-                    get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, 0, dets[count].embeddings);
+                if (l.embedding_output) {
+                    get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, 0, dets[count].embeddings);
                 }
 
                 for (j = 0; j < l.classes; ++j) {
@@ -878,9 +877,8 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t
                 dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
                 dets[count].objectness = objectness;
                 dets[count].classes = l.classes;
-                if (l.embedding_layer) {
-                    layer le = *l.embedding_layer;
-                    get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, batch, dets[count].embeddings);
+                if (l.embedding_output) {
+                    get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, batch, dets[count].embeddings);
                 }
 
                 for (j = 0; j < l.classes; ++j) {
@@ -900,6 +898,11 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t
 
 void forward_yolo_layer_gpu(const layer l, network_state state)
 {
+    if (l.embedding_output) {
+        layer le = state.net.layers[l.embedding_layer_id];
+        cuda_pull_array_async(le.output_gpu, l.embedding_output, le.batch*le.outputs);
+    }
+
     //copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
     simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu);
     int b, n;