diff --git a/include/darknet.h b/include/darknet.h index 32077ccf..e7063e30 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -727,6 +727,7 @@ typedef struct network { int letter_box; int mosaic_bound; int contrastive; + int unsupervised; float angle; float aspect; float exposure; diff --git a/src/blas.c b/src/blas.c index adcce984..7bfc7528 100644 --- a/src/blas.c +++ b/src/blas.c @@ -590,9 +590,10 @@ void grad_contrastive_loss_positive(int i, int *labels, int num_of_samples, floa fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len); getchar(); } - const float mult = 1 / ((2 * N - 1) * temperature * vec_len); + const float mult = 1 / ((N - 1) * temperature * vec_len); for (j = 0; j < num_of_samples; ++j) { + //if (i != j && (i/2) == (j/2)) { if (i != j && labels[i] == labels[j]) { const float sim = cos_sim[i*num_of_samples + j]; // cosine_similarity(z[i], z[j], feature_size); const float P = p_constrastive[i*num_of_samples + j]; // P_constrastive(i, j, labels, num_of_samples, z, feature_size, temperature, cos_sim); @@ -600,8 +601,8 @@ void grad_contrastive_loss_positive(int i, int *labels, int num_of_samples, floa int m; for (m = 0; m < feature_size; ++m) { - //const float d = mult*(sim * z[i][m] - z[j][m]) * (1 - P); // bad - const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // good + const float d = mult*(sim * z[i][m] - z[j][m]) * (1 - P); // good + //const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // bad // printf(" pos: z[j][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[j][m], z[i][m], d, sim); delta[m] -= d; } @@ -626,9 +627,10 @@ void grad_contrastive_loss_negative(int i, int *labels, int num_of_samples, floa fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len); getchar(); } - const float mult = 1 / ((2 * N - 1) * temperature * vec_len); + const float mult = 1 / ((N - 1) * temperature * vec_len); for (j = 0; j < num_of_samples; ++j) { + //if (i != j && (i/2) == (j/2)) { if (i != j && labels[i] == labels[j]) { int k; @@ -641,8 +643,8 @@ void grad_contrastive_loss_negative(int i, int *labels, int num_of_samples, floa int m; for (m = 0; m < feature_size; ++m) { - //const float d = mult*(z[k][m] - sim * z[i][m]) * P; // bad - const float d = mult*(z[k][m] - sim * z[k][m]) * P; // good + const float d = mult*(z[k][m] - sim * z[i][m]) * P; // good + //const float d = mult*(z[k][m] - sim * z[k][m]) * P; // bad //printf(" neg: z[k][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[k][m], z[i][m], d, sim); delta[m] -= d; } diff --git a/src/classifier.c b/src/classifier.c index e340ed96..915be9cd 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -69,13 +69,18 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int topk_data = option_find_int(options, "top", 5); char topk_buff[10]; sprintf(topk_buff, "top%d", topk_data); - if (classes != net.layers[net.n - 1].inputs) { + layer l = net.layers[net.n - 1]; + if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) { printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n", - net.layers[net.n - 1].inputs, classes); + l.outputs, classes); getchar(); } char **labels = get_labels(label_list); + if (net.unsupervised) { + free(labels); + labels = NULL; + } list *plist = get_paths(train_list); char **paths = (char **)list_to_array(plist); printf("%d\n", plist->size); @@ -184,8 +189,16 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int draw_precision = 0; if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) { iter_topk = i; - topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n - printf("\n accuracy %s = %f \n", topk_buff, topk); + if (net.contrastive && l.type != SOFTMAX && l.type != COST) { + int k; + for (k = 0; k < net.n; ++k) if (net.layers[k].type == CONTRASTIVE) break; + topk = *(net.layers[k].loss) / 100; + sprintf(topk_buff, "Contr"); + } + else { + topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n + printf("\n accuracy %s = %f \n", topk_buff, topk); + } draw_precision = 1; } @@ -240,7 +253,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, free(nets); //free_ptrs((void**)labels, classes); - free(labels); + if(labels) free(labels); free_ptrs((void**)paths, plist->size); free_list(plist); free(nets); @@ -820,9 +833,10 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); int classes = option_find_int(options, "classes", 2); printf(" classes = %d, output in cfg = %d \n", classes, net.layers[net.n - 1].c); - if (classes != net.layers[net.n - 1].inputs) { + layer l = net.layers[net.n - 1]; + if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) { printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n", - net.layers[net.n - 1].inputs, classes); + l.outputs, classes); getchar(); } if (top == 0) top = option_find_int(options, "top", 1); diff --git a/src/data.c b/src/data.c index eb1759db..d1fa4c12 100644 --- a/src/data.c +++ b/src/data.c @@ -613,7 +613,9 @@ matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierar } else { // unsupervised learning for (i = 0; i < n; ++i) { - const int class_id = i / 2; + const int img_index = (contrastive) ? (i / 2) : i; + const uintptr_t path_p = (uintptr_t)paths[img_index];// abs(random_gen()); + const int class_id = path_p % k; int l; for (l = 0; l < k; ++l) y.vals[i][l] = 0; y.vals[i][class_id] = 1; diff --git a/src/parser.c b/src/parser.c index 1306a773..c43ef678 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1130,6 +1130,7 @@ void parse_net_options(list *options, network *net) net->letter_box = option_find_int_quiet(options, "letter_box", 0); net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0); net->contrastive = option_find_int_quiet(options, "contrastive", 0); + net->unsupervised = option_find_int_quiet(options, "unsupervised", 0); net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); net->resize_step = option_find_float_quiet(options, "resize_step", 32); net->attention = option_find_int_quiet(options, "attention", 0); diff --git a/src/softmax_layer.c b/src/softmax_layer.c index fb4e91bd..ffbefffc 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -141,7 +141,7 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int cla l.n = n; l.classes = classes; l.temperature = 1; - //l.loss = (float*)xcalloc(inputs * batch, sizeof(float)); + l.loss = (float*)xcalloc(1, sizeof(float)); l.output = (float*)xcalloc(inputs * batch, sizeof(float)); l.delta = (float*)xcalloc(inputs * batch, sizeof(float)); l.cost = (float*)xcalloc(1, sizeof(float)); @@ -156,7 +156,6 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int cla l.backward_gpu = backward_contrastive_layer_gpu; l.output_gpu = cuda_make_array(l.output, inputs*batch); - //l.loss_gpu = cuda_make_array(l.loss, inputs*batch); l.delta_gpu = cuda_make_array(l.delta, inputs*batch); //l.cos_sim_gpu = cuda_make_array(l.cos_sim, l.batch*l.batch); #endif @@ -164,10 +163,10 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int cla } -void forward_contrastive_layer(const contrastive_layer l, network_state state) +void forward_contrastive_layer(contrastive_layer l, network_state state) { if (!state.train) return; - const float truth_thresh = 0.2; + const float truth_thresh = state.net.label_smooth_eps; memset(l.delta, 0, l.batch*l.inputs * sizeof(float)); @@ -183,7 +182,8 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state) for (n = 0; n < l.classes; ++n) { const float truth_prob = state.truth[b*l.classes + n]; //printf(" truth_prob = %f, ", truth_prob); - if (truth_prob > max_truth) + //if (truth_prob > max_truth) + if (truth_prob > truth_thresh) { max_truth = truth_prob; l.labels[b] = n; @@ -228,7 +228,8 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state) //printf(" l.labels[b] = %d, l.labels[b+1] = %d, l.labels[b+2] = %d, b = %d \n", l.labels[b], l.labels[b + 1], l.labels[b + 2], b); //printf(" same = %f, aug = %f, diff = %f, (aug > diff) = %d \n", same, aug, diff, (aug > diff)); } - printf("good contrast = %f %% \n", 100 * good_contrast / (l.batch/2)); + *l.loss = 100 * good_contrast / (l.batch / 2); + printf(" Contrast accuracy = %f %% \n", *l.loss); // precalculate P_contrastive for (b = 0; b < l.batch; ++b) { @@ -251,10 +252,10 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state) { //printf(" b = %d, ", b); // positive - grad_contrastive_loss_positive(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta); + grad_contrastive_loss_positive(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta + b*l.inputs); // negative - grad_contrastive_loss_negative(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta); + grad_contrastive_loss_negative(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta + b*l.inputs); } } } @@ -264,9 +265,9 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state) free(z); } -void backward_contrastive_layer(const contrastive_layer l, network_state net) +void backward_contrastive_layer(contrastive_layer l, network_state state) { - axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); + axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1); } @@ -283,10 +284,10 @@ void push_contrastive_layer_output(const contrastive_layer l) } -void forward_contrastive_layer_gpu(const contrastive_layer l, network_state state) +void forward_contrastive_layer_gpu(contrastive_layer l, network_state state) { - if (!state.train) return; simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu); + if (!state.train) return; float *in_cpu = (float *)xcalloc(l.batch*l.inputs, sizeof(float)); cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); @@ -311,7 +312,7 @@ void forward_contrastive_layer_gpu(const contrastive_layer l, network_state stat if (cpu_state.truth) free(cpu_state.truth); } -void backward_contrastive_layer_gpu(const contrastive_layer layer, network_state state) +void backward_contrastive_layer_gpu(contrastive_layer layer, network_state state) { axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1); } diff --git a/src/softmax_layer.h b/src/softmax_layer.h index d49bddc4..f1985a99 100644 --- a/src/softmax_layer.h +++ b/src/softmax_layer.h @@ -23,14 +23,14 @@ void backward_softmax_layer_gpu(const softmax_layer l, network_state state); //----------------------- contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int classes, int inputs); -void forward_contrastive_layer(const contrastive_layer l, network_state state); -void backward_contrastive_layer(const contrastive_layer l, network_state net); +void forward_contrastive_layer(contrastive_layer l, network_state state); +void backward_contrastive_layer(contrastive_layer l, network_state net); #ifdef GPU void pull_contrastive_layer_output(const contrastive_layer l); void push_contrastive_layer_output(const contrastive_layer l); -void forward_contrastive_layer_gpu(const contrastive_layer l, network_state state); -void backward_contrastive_layer_gpu(const contrastive_layer layer, network_state state); +void forward_contrastive_layer_gpu(contrastive_layer l, network_state state); +void backward_contrastive_layer_gpu(contrastive_layer layer, network_state state); #endif #ifdef __cplusplus