put embeddings to the detections

This commit is contained in:
AlexeyAB 2020-07-07 02:49:43 +03:00
parent 2fca3c399b
commit ce2ae2c5c3
5 changed files with 35 additions and 12 deletions

View File

@ -300,7 +300,8 @@ struct layer {
int softmax; int softmax;
int classes; int classes;
int detection; int detection;
layer *embedding_layer; int embedding_layer_id;
float *embedding_output;
int embedding_size; int embedding_size;
int coords; int coords;
int background; int background;

View File

@ -78,6 +78,7 @@ void free_layer_custom(layer l, int keep_cudnn_desc)
if (l.cos_sim) free(l.cos_sim); if (l.cos_sim) free(l.cos_sim);
if (l.exp_cos_sim) free(l.exp_cos_sim); if (l.exp_cos_sim) free(l.exp_cos_sim);
if (l.p_constrastive) free(l.p_constrastive); if (l.p_constrastive) free(l.p_constrastive);
if (l.embedding_output) free(l.embedding_output);
if (l.state) free(l.state); if (l.state) free(l.state);
if (l.prev_state) free(l.prev_state); if (l.prev_state) free(l.prev_state);
if (l.forgot_state) free(l.forgot_state); if (l.forgot_state) free(l.forgot_state);

View File

@ -768,8 +768,16 @@ int num_detections_batch(network *net, float thresh, int batch)
detection *make_network_boxes(network *net, float thresh, int *num) detection *make_network_boxes(network *net, float thresh, int *num)
{ {
layer l = net->layers[net->n - 1];
int i; int i;
layer l = net->layers[net->n - 1];
for (i = 0; i < net->n; ++i) {
layer l_tmp = net->layers[i];
if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) {
l = l_tmp;
break;
}
}
int nboxes = num_detections(net, thresh); int nboxes = num_detections(net, thresh);
if (num) *num = nboxes; if (num) *num = nboxes;
detection* dets = (detection*)xcalloc(nboxes, sizeof(detection)); detection* dets = (detection*)xcalloc(nboxes, sizeof(detection));
@ -782,7 +790,7 @@ detection *make_network_boxes(network *net, float thresh, int *num)
if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float)); if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
else dets[i].mask = NULL; else dets[i].mask = NULL;
if(l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); if(l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
else dets[i].embeddings = NULL; else dets[i].embeddings = NULL;
} }
return dets; return dets;
@ -792,6 +800,14 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba
{ {
int i; int i;
layer l = net->layers[net->n - 1]; layer l = net->layers[net->n - 1];
for (i = 0; i < net->n; ++i) {
layer l_tmp = net->layers[i];
if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) {
l = l_tmp;
break;
}
}
int nboxes = num_detections_batch(net, thresh, batch); int nboxes = num_detections_batch(net, thresh, batch);
assert(num != NULL); assert(num != NULL);
*num = nboxes; *num = nboxes;
@ -805,7 +821,7 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba
if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float)); if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
else dets[i].mask = NULL; else dets[i].mask = NULL;
if (l.embedding_layer) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); if (l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
else dets[i].embeddings = NULL; else dets[i].embeddings = NULL;
} }
return dets; return dets;

View File

@ -491,8 +491,10 @@ layer parse_yolo(list *options, size_params params)
if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id; if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id;
if (embedding_layer_id != 999999) { if (embedding_layer_id != 999999) {
printf(" embedding_layer_id = %d, ", embedding_layer_id); printf(" embedding_layer_id = %d, ", embedding_layer_id);
l.embedding_layer = &params.net.layers[embedding_layer_id]; layer le = params.net.layers[embedding_layer_id];
l.embedding_size = l.embedding_layer->n / l.n; l.embedding_layer_id = embedding_layer_id;
l.embedding_output = (float*)xcalloc(le.batch * le.outputs, sizeof(float));
l.embedding_size = le.n / l.n;
printf(" embedding_size = %d \n", l.embedding_size); printf(" embedding_size = %d \n", l.embedding_size);
} }

View File

@ -841,9 +841,8 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
dets[count].objectness = objectness; dets[count].objectness = objectness;
dets[count].classes = l.classes; dets[count].classes = l.classes;
if (l.embedding_layer) { if (l.embedding_output) {
layer le = *l.embedding_layer; get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, 0, dets[count].embeddings);
get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, 0, dets[count].embeddings);
} }
for (j = 0; j < l.classes; ++j) { for (j = 0; j < l.classes; ++j) {
@ -878,9 +877,8 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
dets[count].objectness = objectness; dets[count].objectness = objectness;
dets[count].classes = l.classes; dets[count].classes = l.classes;
if (l.embedding_layer) { if (l.embedding_output) {
layer le = *l.embedding_layer; get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, batch, dets[count].embeddings);
get_embedding(le.output, le.w, le.h, le.n, l.embedding_size, col, row, n, batch, dets[count].embeddings);
} }
for (j = 0; j < l.classes; ++j) { for (j = 0; j < l.classes; ++j) {
@ -900,6 +898,11 @@ int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float t
void forward_yolo_layer_gpu(const layer l, network_state state) void forward_yolo_layer_gpu(const layer l, network_state state)
{ {
if (l.embedding_output) {
layer le = state.net.layers[l.embedding_layer_id];
cuda_pull_array_async(le.output_gpu, l.embedding_output, le.batch*le.outputs);
}
//copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); //copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu); simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu);
int b, n; int b, n;