put embeddings to the detections

2020-07-07 02:49:43 +03:00 · 2020-07-07 02:49:43 +03:00 · ce2ae2c5c3
parent 2fca3c399b
commit ce2ae2c5c3
5 changed files with 35 additions and 12 deletions
--- a/include/darknet.h
+++ b/include/darknet.h
@ -300,7 +300,8 @@ struct layer {
    int softmax;
    int classes;
    int detection;
-    layer *embedding_layer;
+    int embedding_layer_id;
+    float *embedding_output;
    int embedding_size;
    int coords;
    int background;
--- a/src/layer.c
+++ b/src/layer.c
@ -78,6 +78,7 @@ void free_layer_custom(layer l, int keep_cudnn_desc)
    if (l.cos_sim)            free(l.cos_sim);
    if (l.exp_cos_sim)        free(l.exp_cos_sim);
    if (l.p_constrastive)     free(l.p_constrastive);
+    if (l.embedding_output)   free(l.embedding_output);
    if (l.state)              free(l.state);
    if (l.prev_state)         free(l.prev_state);
    if (l.forgot_state)       free(l.forgot_state);
--- a/src/network.c
+++ b/src/network.c
@ -768,8 +768,16 @@ int num_detections_batch(network *net, float thresh, int batch)

 detection *make_network_boxes(network *net, float thresh, int *num)
 {
-    layer l = net->layers[net->n - 1];
    int i;
+    layer l = net->layers[net->n - 1];
+    for (i = 0; i < net->n; ++i) {
+        layer l_tmp = net->layers[i];
+        if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) {
+            l = l_tmp;
+            break;
+        }
+    }
+
    int nboxes = num_detections(net, thresh);
    if (num) *num = nboxes;
    detection* dets = (detection*)xcalloc(nboxes, sizeof(detection));
@ -782,7 +790,7 @@ detection *make_network_boxes(network *net, float thresh, int *num)
        if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
        else dets[i].mask = NULL;

-        if(l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
+        if(l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
        else dets[i].embeddings = NULL;
    }
    return dets;
@ -792,6 +800,14 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba
 {
    int i;
    layer l = net->layers[net->n - 1];
+    for (i = 0; i < net->n; ++i) {
+        layer l_tmp = net->layers[i];
+        if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) {
+            l = l_tmp;
+            break;
+        }
+    }
+
    int nboxes = num_detections_batch(net, thresh, batch);
    assert(num != NULL);
    *num = nboxes;
@ -805,7 +821,7 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba
        if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
        else dets[i].mask = NULL;

-        if (l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
+        if (l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
        else dets[i].embeddings = NULL;
    }
    return dets;
--- a/src/parser.c
+++ b/src/parser.c
@ -491,8 +491,10 @@ layer parse_yolo(list *options, size_params params)
    if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id;
    if (embedding_layer_id != 999999) {
        printf(" embedding_layer_id = %d, ", embedding_layer_id);
-        l.embedding_layer = &params.net.layers[embedding_layer_id];
-        l.embedding_size = l.embedding_layer->n / l.n;
+        layer le = params.net.layers[embedding_layer_id];
+        l.embedding_layer_id = embedding_layer_id;
+        l.embedding_output = (float*)xcalloc(le.batch * le.outputs, sizeof(float));
+        l.embedding_size = le.n / l.n;
        printf(" embedding_size = %d \n", l.embedding_size);
    }

--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@ -841,9 +841,8 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
                dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
                dets[count].objectness = objectness;
                dets[count].classes = l.classes;
-                if (l.embedding_layer) {
-                    layer le = *l.embedding_layer;
-                    get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, 0, dets[count].embeddings);
+                if (l.embedding_output) {
+                    get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, 0, dets[count].embeddings);
                }

                for (j = 0; j < l.classes; ++j) {
@ -878,9 +877,8 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t
                dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
                dets[count].objectness = objectness;
                dets[count].classes = l.classes;
-                if (l.embedding_layer) {
-                    layer le = *l.embedding_layer;
-                    get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, batch, dets[count].embeddings);
+                if (l.embedding_output) {
+                    get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, batch, dets[count].embeddings);
                }

                for (j = 0; j < l.classes; ++j) {
@ -900,6 +898,11 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t

 void forward_yolo_layer_gpu(const layer l, network_state state)
 {
+    if (l.embedding_output) {
+        layer le = state.net.layers[l.embedding_layer_id];
+        cuda_pull_array_async(le.output_gpu, l.embedding_output, le.batch*le.outputs);
+    }
+
    //copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
    simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu);
    int b, n;