mirror of https://github.com/AlexeyAB/darknet.git
Midway through lots of fixes, checkpoint
This commit is contained in:
parent
70d622ea54
commit
1b94df24fd
|
@ -43,7 +43,7 @@ float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
|||
|
||||
float activate(float x, ACTIVATION a, float dropout)
|
||||
{
|
||||
if((float)rand()/RAND_MAX < dropout) return 0;
|
||||
if(dropout && (float)rand()/RAND_MAX < dropout) return 0;
|
||||
switch(a){
|
||||
case LINEAR:
|
||||
return linear_activate(x)/(1-dropout);
|
||||
|
|
11
src/cnn.c
11
src/cnn.c
|
@ -300,7 +300,6 @@ void test_vince()
|
|||
|
||||
void test_nist()
|
||||
{
|
||||
srand(444444);
|
||||
srand(222222);
|
||||
network net = parse_network_cfg("cfg/nist.cfg");
|
||||
data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
|
||||
|
@ -309,18 +308,18 @@ void test_nist()
|
|||
normalize_data_rows(test);
|
||||
//randomize_data(train);
|
||||
int count = 0;
|
||||
float lr = .000075;
|
||||
float lr = .0001;
|
||||
float momentum = .9;
|
||||
float decay = 0.0001;
|
||||
decay = 0;
|
||||
//clock_t start = clock(), end;
|
||||
int iters = 100;
|
||||
int iters = 1000;
|
||||
while(++count <= 10){
|
||||
clock_t start = clock(), end;
|
||||
float loss = train_network_sgd(net, train, iters, lr, momentum, decay);
|
||||
end = clock();
|
||||
float test_acc = network_accuracy(net, test);
|
||||
printf("%d: %f %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
|
||||
//float test_acc = network_accuracy(net, test);
|
||||
float test_acc = 0;
|
||||
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, lr, momentum, decay);
|
||||
|
||||
//printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
|
||||
//end = clock();
|
||||
|
|
|
@ -77,7 +77,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
|
|||
int i;
|
||||
for(i = 0; i < layer.outputs*layer.batch; ++i){
|
||||
layer.delta[i] *= gradient(layer.output[i], layer.activation);
|
||||
layer.bias_updates[i%layer.batch] += layer.delta[i];
|
||||
layer.bias_updates[i%layer.outputs] += layer.delta[i];
|
||||
}
|
||||
int m = layer.inputs;
|
||||
int k = layer.batch;
|
||||
|
@ -85,7 +85,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta)
|
|||
float *a = input;
|
||||
float *b = layer.delta;
|
||||
float *c = layer.weight_updates;
|
||||
gemm(1,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
|
||||
|
||||
m = layer.batch;
|
||||
k = layer.outputs;
|
||||
|
|
|
@ -86,7 +86,6 @@ convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, in
|
|||
layer->activation = activation;
|
||||
|
||||
fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
||||
srand(0);
|
||||
|
||||
return layer;
|
||||
}
|
||||
|
@ -133,10 +132,15 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
|
|||
float *a = layer.filters;
|
||||
float *b = layer.col_image;
|
||||
float *c = layer.output;
|
||||
im2col_cpu(in,layer.batch, layer.c, layer.h, layer.w,
|
||||
im2col_cpu(in, layer.batch, layer.c, layer.h, layer.w,
|
||||
layer.size, layer.stride, layer.pad, b);
|
||||
bias_output(layer);
|
||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
/*
|
||||
int i;
|
||||
for(i = 0; i < m*n; ++i) printf("%f, ", layer.output[i]);
|
||||
printf("\n");
|
||||
*/
|
||||
activate_array(layer.output, m*n, layer.activation, 0.);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "mini_blas.h"
|
||||
#include <stdio.h>
|
||||
|
||||
inline float im2col_get_pixel(float *im, int height, int width, int channels,
|
||||
int row, int col, int channel, int pad)
|
||||
|
@ -27,7 +28,7 @@ void im2col_cpu(float* data_im,
|
|||
}
|
||||
int channels_col = channels * ksize * ksize;
|
||||
int im_size = height*width*channels;
|
||||
int col_size = height_col*width_col*channels_col;
|
||||
//int col_size = height_col*width_col*channels_col;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (c = 0; c < channels_col; ++c) {
|
||||
int w_offset = c % ksize;
|
||||
|
@ -37,14 +38,14 @@ void im2col_cpu(float* data_im,
|
|||
for (w = 0; w < width_col; ++w) {
|
||||
int im_row = h_offset + h * stride;
|
||||
int im_col = w_offset + w * stride;
|
||||
data_col[(c * height_col + h) * width_col + w] =
|
||||
im2col_get_pixel(data_im, height, width, channels,
|
||||
int col_index = (c * height_col + h) * width_col + w + (batch-1) * c * height_col*width_col;
|
||||
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
|
||||
im_row, im_col, c_im, pad);
|
||||
}
|
||||
}
|
||||
}
|
||||
data_im += im_size;
|
||||
data_col+= col_size;
|
||||
data_col+= channels_col;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -272,7 +272,9 @@ float calculate_error_network(network net, float *truth)
|
|||
for(i = 0; i < get_network_output_size(net)*net.batch; ++i){
|
||||
//if(i %get_network_output_size(net) == 0) printf("\n");
|
||||
//printf("%5.2f %5.2f, ", out[i], truth[i]);
|
||||
//if(i == get_network_output_size(net)) printf("\n");
|
||||
delta[i] = truth[i] - out[i];
|
||||
//printf("%f, ", delta[i]);
|
||||
sum += delta[i]*delta[i];
|
||||
}
|
||||
//printf("\n");
|
||||
|
@ -382,20 +384,20 @@ float train_network_sgd(network net, data d, int n, float step, float momentum,f
|
|||
}
|
||||
float train_network_batch(network net, data d, int n, float step, float momentum,float decay)
|
||||
{
|
||||
int i;
|
||||
int correct = 0;
|
||||
int i,j;
|
||||
float sum = 0;
|
||||
int batch = 2;
|
||||
for(i = 0; i < n; ++i){
|
||||
for(j = 0; j < batch; ++j){
|
||||
int index = rand()%d.X.rows;
|
||||
float *x = d.X.vals[index];
|
||||
float *y = d.y.vals[index];
|
||||
forward_network(net, x, 1);
|
||||
int class = get_predicted_class_network(net);
|
||||
backward_network(net, x, y);
|
||||
correct += (y[class]?1:0);
|
||||
sum += backward_network(net, x, y);
|
||||
}
|
||||
update_network(net, step, momentum, decay);
|
||||
return (float)correct/n;
|
||||
|
||||
}
|
||||
return (float)sum/(n*batch);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue