Midway through lots of fixes, checkpoint

2014-07-17 09:05:07 -07:00 · 2014-07-17 09:05:07 -07:00 · 1b94df24fd
parent 70d622ea54
commit 1b94df24fd
6 changed files with 33 additions and 27 deletions
--- a/src/activations.c
+++ b/src/activations.c
@ -43,7 +43,7 @@ float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}

 float activate(float x, ACTIVATION a, float dropout)
 {
-    if((float)rand()/RAND_MAX < dropout) return 0;
+    if(dropout && (float)rand()/RAND_MAX < dropout) return 0;
    switch(a){
        case LINEAR:
            return linear_activate(x)/(1-dropout);
--- a/src/cnn.c
+++ b/src/cnn.c
@ -300,7 +300,6 @@ void test_vince()

 void test_nist()
 {
-	srand(444444);
 	srand(222222);
 	network net = parse_network_cfg("cfg/nist.cfg");
 	data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
@ -309,18 +308,18 @@ void test_nist()
 	normalize_data_rows(test);
 	//randomize_data(train);
 	int count = 0;
-	float lr = .000075;
+	float lr = .0001;
 	float momentum = .9;
 	float decay = 0.0001;
-	decay = 0;
 	//clock_t start = clock(), end;
-	int iters = 100;
+	int iters = 1000;
 	while(++count <= 10){
 		clock_t start = clock(), end;
 		float loss = train_network_sgd(net, train, iters, lr, momentum, decay);
 		end = clock();
-		float test_acc = network_accuracy(net, test);
-		printf("%d: %f %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
+		//float test_acc = network_accuracy(net, test);
+        float test_acc = 0;
+		printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);

 		//printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
 		//end = clock();
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@ -77,7 +77,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
    int i;
    for(i = 0; i < layer.outputs*layer.batch; ++i){
        layer.delta[i] *= gradient(layer.output[i], layer.activation);
-        layer.bias_updates[i%layer.batch] += layer.delta[i];
+        layer.bias_updates[i%layer.outputs] += layer.delta[i];
    }
    int m = layer.inputs;
    int k = layer.batch;
@ -85,7 +85,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
    float *a = input;
    float *b = layer.delta;
    float *c = layer.weight_updates;
-    gemm(1,0,m,n,k,1,a,k,b,n,1,c,n);
+    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = layer.batch;
    k = layer.outputs;
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -86,7 +86,6 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
    layer->activation = activation;

    fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
-    srand(0);

    return layer;
 }
@ -133,10 +132,15 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
    float *a = layer.filters;
    float *b = layer.col_image;
    float *c = layer.output;
-    im2col_cpu(in,layer.batch, layer.c, layer.h, layer.w, 
+    im2col_cpu(in, layer.batch, layer.c, layer.h, layer.w, 
        layer.size, layer.stride, layer.pad, b);
    bias_output(layer);
    gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
+    /*
+    int i;
+    for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]);
+    printf("\n");
+    */
    activate_array(layer.output, m*n, layer.activation, 0.);
 }

--- a/src/im2col.c
+++ b/src/im2col.c
@ -1,4 +1,5 @@
 #include "mini_blas.h"
+#include <stdio.h>

 inline float im2col_get_pixel(float *im, int height, int width, int channels,
                        int row, int col, int channel, int pad)
@ -27,7 +28,7 @@ void im2col_cpu(float* data_im,
    }
    int channels_col = channels * ksize * ksize;
    int im_size = height*width*channels;
-    int col_size = height_col*width_col*channels_col;
+    //int col_size = height_col*width_col*channels_col;
    for (b = 0; b < batch; ++b) {
        for (c = 0; c < channels_col; ++c) {
            int w_offset = c % ksize;
@ -37,14 +38,14 @@ void im2col_cpu(float* data_im,
                for (w = 0; w < width_col; ++w) {
                    int im_row = h_offset + h * stride;
                    int im_col = w_offset + w * stride;
-                    data_col[(c * height_col + h) * width_col + w] =
-                        im2col_get_pixel(data_im, height, width, channels,
+                    int col_index = (c * height_col + h) * width_col + w + (batch-1) * c * height_col*width_col;
+                    data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
                                        im_row, im_col, c_im, pad);
                }
            }
        }
        data_im += im_size;
-        data_col+= col_size;
+        data_col+= channels_col;
    }
 }

--- a/src/network.c
+++ b/src/network.c
@ -272,7 +272,9 @@ float calculate_error_network(network net, float *truth)
    for(i = 0; i < get_network_output_size(net)*net.batch; ++i){
        //if(i %get_network_output_size(net) == 0) printf("\n");
        //printf("%5.2f %5.2f, ", out[i], truth[i]);
+        //if(i == get_network_output_size(net)) printf("\n");
        delta[i] = truth[i] - out[i];
+        //printf("%f, ", delta[i]);
        sum += delta[i]*delta[i];
    }
    //printf("\n");
@ -382,20 +384,20 @@ float train_network_sgd(network net, data d, int n, float step, float momentum,f
 }
 float train_network_batch(network net, data d, int n, float step, float momentum,float decay)
 {
-    int i;
-    int correct = 0;
+    int i,j;
+    float sum = 0;
+    int batch = 2;
    for(i = 0; i < n; ++i){
+        for(j = 0; j < batch; ++j){
            int index = rand()%d.X.rows;
            float *x = d.X.vals[index];
            float *y = d.y.vals[index];
            forward_network(net, x, 1);
-        int class = get_predicted_class_network(net);
-        backward_network(net, x, y);
-        correct += (y[class]?1:0);
+            sum += backward_network(net, x, y);
        }
        update_network(net, step, momentum, decay);
-    return (float)correct/n;
-
+    }
+    return (float)sum/(n*batch);
 }