From ce2ae2c5c3849cdb6f9b66680c2ef0c2f9d2dad2 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 7 Jul 2020 02:49:43 +0300 Subject: [PATCH] put embeddings to the detections --- include/darknet.h | 3 ++- src/layer.c | 1 + src/network.c | 22 +++++++++++++++++++--- src/parser.c | 6 ++++-- src/yolo_layer.c | 15 +++++++++------ 5 files changed, 35 insertions(+), 12 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 9cfd2ac4..c278c2ff 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -300,7 +300,8 @@ struct layer { int softmax; int classes; int detection; - layer *embedding_layer; + int embedding_layer_id; + float *embedding_output; int embedding_size; int coords; int background; diff --git a/src/layer.c b/src/layer.c index 3dcef7ac..ae60254b 100644 --- a/src/layer.c +++ b/src/layer.c @@ -78,6 +78,7 @@ void free_layer_custom(layer l, int keep_cudnn_desc) if (l.cos_sim) free(l.cos_sim); if (l.exp_cos_sim) free(l.exp_cos_sim); if (l.p_constrastive) free(l.p_constrastive); + if (l.embedding_output) free(l.embedding_output); if (l.state) free(l.state); if (l.prev_state) free(l.prev_state); if (l.forgot_state) free(l.forgot_state); diff --git a/src/network.c b/src/network.c index aa68a642..81349aba 100644 --- a/src/network.c +++ b/src/network.c @@ -768,8 +768,16 @@ int num_detections_batch(network *net, float thresh, int batch) detection *make_network_boxes(network *net, float thresh, int *num) { - layer l = net->layers[net->n - 1]; int i; + layer l = net->layers[net->n - 1]; + for (i = 0; i < net->n; ++i) { + layer l_tmp = net->layers[i]; + if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) { + l = l_tmp; + break; + } + } + int nboxes = num_detections(net, thresh); if (num) *num = nboxes; detection* dets = (detection*)xcalloc(nboxes, sizeof(detection)); @@ -782,7 +790,7 @@ detection *make_network_boxes(network *net, float thresh, int *num) if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float)); else dets[i].mask = NULL; - if(l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); + if(l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); else dets[i].embeddings = NULL; } return dets; @@ -792,6 +800,14 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba { int i; layer l = net->layers[net->n - 1]; + for (i = 0; i < net->n; ++i) { + layer l_tmp = net->layers[i]; + if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) { + l = l_tmp; + break; + } + } + int nboxes = num_detections_batch(net, thresh, batch); assert(num != NULL); *num = nboxes; @@ -805,7 +821,7 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float)); else dets[i].mask = NULL; - if (l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); + if (l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); else dets[i].embeddings = NULL; } return dets; diff --git a/src/parser.c b/src/parser.c index b11997d7..1e5dec4a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -491,8 +491,10 @@ layer parse_yolo(list *options, size_params params) if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id; if (embedding_layer_id != 999999) { printf(" embedding_layer_id = %d, ", embedding_layer_id); - l.embedding_layer = ¶ms.net.layers[embedding_layer_id]; - l.embedding_size = l.embedding_layer->n / l.n; + layer le = params.net.layers[embedding_layer_id]; + l.embedding_layer_id = embedding_layer_id; + l.embedding_output = (float*)xcalloc(le.batch * le.outputs, sizeof(float)); + l.embedding_size = le.n / l.n; printf(" embedding_size = %d \n", l.embedding_size); } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 970a3897..82bd451f 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -841,9 +841,8 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); dets[count].objectness = objectness; dets[count].classes = l.classes; - if (l.embedding_layer) { - layer le = *l.embedding_layer; - get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, 0, dets[count].embeddings); + if (l.embedding_output) { + get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, 0, dets[count].embeddings); } for (j = 0; j < l.classes; ++j) { @@ -878,9 +877,8 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); dets[count].objectness = objectness; dets[count].classes = l.classes; - if (l.embedding_layer) { - layer le = *l.embedding_layer; - get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, batch, dets[count].embeddings); + if (l.embedding_output) { + get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, batch, dets[count].embeddings); } for (j = 0; j < l.classes; ++j) { @@ -900,6 +898,11 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t void forward_yolo_layer_gpu(const layer l, network_state state) { + if (l.embedding_output) { + layer le = state.net.layers[l.embedding_layer_id]; + cuda_pull_array_async(le.output_gpu, l.embedding_output, le.batch*le.outputs); + } + //copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu); int b, n;