diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 1590fe7b..2376835e 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -82,9 +82,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) if(l.xnor){ binarize_filters_gpu(l.filters_gpu, l.n, l.c*l.size*l.size, l.binary_filters_gpu); swap_binary(&l); - for(i = 0; i < l.batch; ++i){ - binarize_input_gpu(state.input + i*l.inputs, l.c, l.h*l.w, l.binary_input_gpu + i*l.inputs); - } + binarize_gpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input_gpu); state.input = l.binary_input_gpu; } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index c88cb0ad..4014a24d 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -45,6 +45,14 @@ void binarize_filters(float *filters, int n, int size, float *binary) } } +void binarize_cpu(float *input, int n, float *binary) +{ + int i; + for(i = 0; i < n; ++i){ + binary[i] = (input[i] > 0) ? 1 : -1; + } +} + void binarize_input(float *input, int n, int size, float *binary) { int i, s; @@ -426,12 +434,10 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) } */ - if(l.xnor && (l.c%32 != 0 || !AI2)){ + if(l.xnor ){ binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters); swap_binary(&l); - for(i = 0; i < l.batch; ++i){ - binarize_input(state.input + i*l.inputs, l.c, l.h*l.w, l.binary_input + i*l.inputs); - } + binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input); state.input = l.binary_input; } diff --git a/src/darknet.c b/src/darknet.c index a9b24332..aee9521e 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -88,6 +88,23 @@ void average(int argc, char *argv[]) save_weights(sum, outfile); } +void operations(char *cfgfile) +{ + gpu_index = -1; + network net = parse_network_cfg(cfgfile); + int i; + long ops = 0; + for(i = 0; i < net.n; ++i){ + layer l = net.layers[i]; + if(l.type == CONVOLUTIONAL){ + ops += 2 * l.n * l.size*l.size*l.c * l.out_h*l.out_w; + } else if(l.type == CONNECTED){ + ops += 2 * l.inputs * l.outputs; + } + } + printf("Floating Point Operations: %ld\n", ops); +} + void partial(char *cfgfile, char *weightfile, char *outfile, int max) { gpu_index = -1; @@ -288,8 +305,12 @@ int main(int argc, char **argv) normalize_net(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "rescale")){ rescale_net(argv[2], argv[3], argv[4]); + } else if (0 == strcmp(argv[1], "ops")){ + operations(argv[2]); } else if (0 == strcmp(argv[1], "partial")){ partial(argv[2], argv[3], argv[4], atoi(argv[5])); + } else if (0 == strcmp(argv[1], "average")){ + average(argc, argv); } else if (0 == strcmp(argv[1], "stacked")){ stacked(argv[2], argv[3], argv[4]); } else if (0 == strcmp(argv[1], "visualize")){ diff --git a/src/detection_layer.c b/src/detection_layer.c index f7019ef2..1b0f1268 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -133,9 +133,11 @@ void forward_detection_layer(const detection_layer l, network_state state) best_index = 0; } } + /* if(1 && *(state.net.seen) < 100000){ best_index = rand()%l.n; } + */ int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; int tbox_index = truth_index + 1 + l.classes; diff --git a/src/xnor_layer.c b/src/xnor_layer.c index 6c5e9b8a..e2fca7e8 100644 --- a/src/xnor_layer.c +++ b/src/xnor_layer.c @@ -66,7 +66,7 @@ void forward_xnor_layer(const layer l, network_state state) ai2_bin_conv_layer al = ai2_make_bin_conv_layer(b, c, ix, iy, wx, wy, s, pad); // OPTIONAL: You need to set the real-valued input like: - ai2_setFltInput(&al, state.input); + ai2_setFltInput_unpadded(&al, state.input); // The above function will automatically binarize the input for the layer (channel wise). // If commented: using the default 0-valued input.