Midway through lots of fixes, checkpoint

This commit is contained in:
Joseph Redmon 2014-07-17 09:05:07 -07:00
parent 70d622ea54
commit 1b94df24fd
6 changed files with 33 additions and 27 deletions

View File

@ -43,7 +43,7 @@ float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
float activate(float x, ACTIVATION a, float dropout)
{
if((float)rand()/RAND_MAX < dropout) return 0;
if(dropout && (float)rand()/RAND_MAX < dropout) return 0;
switch(a){
case LINEAR:
return linear_activate(x)/(1-dropout);

View File

@ -300,7 +300,6 @@ void test_vince()
void test_nist()
{
srand(444444);
srand(222222);
network net = parse_network_cfg("cfg/nist.cfg");
data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
@ -309,18 +308,18 @@ void test_nist()
normalize_data_rows(test);
//randomize_data(train);
int count = 0;
float lr = .000075;
float lr = .0001;
float momentum = .9;
float decay = 0.0001;
decay = 0;
//clock_t start = clock(), end;
int iters = 100;
int iters = 1000;
while(++count <= 10){
clock_t start = clock(), end;
float loss = train_network_sgd(net, train, iters, lr, momentum, decay);
end = clock();
float test_acc = network_accuracy(net, test);
printf("%d: %f %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
//float test_acc = network_accuracy(net, test);
float test_acc = 0;
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
//printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
//end = clock();

View File

@ -77,7 +77,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
int i;
for(i = 0; i < layer.outputs*layer.batch; ++i){
layer.delta[i] *= gradient(layer.output[i], layer.activation);
layer.bias_updates[i%layer.batch] += layer.delta[i];
layer.bias_updates[i%layer.outputs] += layer.delta[i];
}
int m = layer.inputs;
int k = layer.batch;
@ -85,7 +85,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
float *a = input;
float *b = layer.delta;
float *c = layer.weight_updates;
gemm(1,0,m,n,k,1,a,k,b,n,1,c,n);
gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
m = layer.batch;
k = layer.outputs;

View File

@ -86,7 +86,6 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
layer->activation = activation;
fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
srand(0);
return layer;
}
@ -133,10 +132,15 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
float *a = layer.filters;
float *b = layer.col_image;
float *c = layer.output;
im2col_cpu(in,layer.batch, layer.c, layer.h, layer.w,
im2col_cpu(in, layer.batch, layer.c, layer.h, layer.w,
layer.size, layer.stride, layer.pad, b);
bias_output(layer);
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
/*
int i;
for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]);
printf("\n");
*/
activate_array(layer.output, m*n, layer.activation, 0.);
}

View File

@ -1,4 +1,5 @@
#include "mini_blas.h"
#include <stdio.h>
inline float im2col_get_pixel(float *im, int height, int width, int channels,
int row, int col, int channel, int pad)
@ -27,7 +28,7 @@ void im2col_cpu(float* data_im,
}
int channels_col = channels * ksize * ksize;
int im_size = height*width*channels;
int col_size = height_col*width_col*channels_col;
//int col_size = height_col*width_col*channels_col;
for (b = 0; b < batch; ++b) {
for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize;
@ -37,14 +38,14 @@ void im2col_cpu(float* data_im,
for (w = 0; w < width_col; ++w) {
int im_row = h_offset + h * stride;
int im_col = w_offset + w * stride;
data_col[(c * height_col + h) * width_col + w] =
im2col_get_pixel(data_im, height, width, channels,
int col_index = (c * height_col + h) * width_col + w + (batch-1) * c * height_col*width_col;
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
im_row, im_col, c_im, pad);
}
}
}
data_im += im_size;
data_col+= col_size;
data_col+= channels_col;
}
}

View File

@ -272,7 +272,9 @@ float calculate_error_network(network net, float *truth)
for(i = 0; i < get_network_output_size(net)*net.batch; ++i){
//if(i %get_network_output_size(net) == 0) printf("\n");
//printf("%5.2f %5.2f, ", out[i], truth[i]);
//if(i == get_network_output_size(net)) printf("\n");
delta[i] = truth[i] - out[i];
//printf("%f, ", delta[i]);
sum += delta[i]*delta[i];
}
//printf("\n");
@ -382,20 +384,20 @@ float train_network_sgd(network net, data d, int n, float step, float momentum,f
}
float train_network_batch(network net, data d, int n, float step, float momentum,float decay)
{
int i;
int correct = 0;
int i,j;
float sum = 0;
int batch = 2;
for(i = 0; i < n; ++i){
for(j = 0; j < batch; ++j){
int index = rand()%d.X.rows;
float *x = d.X.vals[index];
float *y = d.y.vals[index];
forward_network(net, x, 1);
int class = get_predicted_class_network(net);
backward_network(net, x, y);
correct += (y[class]?1:0);
sum += backward_network(net, x, y);
}
update_network(net, step, momentum, decay);
return (float)correct/n;
}
return (float)sum/(n*batch);
}