mirror of https://github.com/AlexeyAB/darknet.git
Fixed LSTM-layer
This commit is contained in:
parent
85b99872cb
commit
110b5240a4
|
@ -0,0 +1,34 @@
|
|||
[net]
|
||||
subdivisions=8
|
||||
inputs=256
|
||||
batch = 128
|
||||
momentum=0.9
|
||||
decay=0.001
|
||||
max_batches = 2000
|
||||
time_steps=576
|
||||
learning_rate=1.0
|
||||
policy=steps
|
||||
steps=1000,1500
|
||||
scales=.1,.1
|
||||
|
||||
[lstm]
|
||||
batch_normalize=1
|
||||
output = 1024
|
||||
|
||||
[lstm]
|
||||
batch_normalize=1
|
||||
output = 1024
|
||||
|
||||
[lstm]
|
||||
batch_normalize=1
|
||||
output = 1024
|
||||
|
||||
[connected]
|
||||
output=256
|
||||
activation=leaky
|
||||
|
||||
[softmax]
|
||||
|
||||
[cost]
|
||||
type=sse
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[net]
|
||||
subdivisions=1
|
||||
subdivisions=8
|
||||
inputs=256
|
||||
batch = 128
|
||||
momentum=0.9
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
rem Create your own text.txt file with some text.
|
||||
|
||||
|
||||
darknet.exe rnn train cfg/lstm.train.cfg -file text.txt
|
||||
|
||||
|
||||
rem darknet.exe rnn train cfg/lstm.train.cfg backup/lstm.backup -file text.txt
|
||||
|
||||
|
||||
pause
|
||||
|
||||
darknet.exe rnn generate cfg/lstm.train.cfg backup/lstm.backup -srand 2 -len 500 -seed apple
|
||||
|
||||
darknet.exe rnn generate cfg/lstm.train.cfg backup/lstm.backup -srand 2 -len 500 -seed apple > text_gen.txt
|
||||
|
||||
pause
|
|
@ -0,0 +1,34 @@
|
|||
[net]
|
||||
subdivisions=8
|
||||
inputs=256
|
||||
batch = 128
|
||||
momentum=0.9
|
||||
decay=0.001
|
||||
max_batches = 2000
|
||||
time_steps=576
|
||||
learning_rate=1.0
|
||||
policy=steps
|
||||
steps=1000,1500
|
||||
scales=.1,.1
|
||||
|
||||
[lstm]
|
||||
batch_normalize=1
|
||||
output = 1024
|
||||
|
||||
[lstm]
|
||||
batch_normalize=1
|
||||
output = 1024
|
||||
|
||||
[lstm]
|
||||
batch_normalize=1
|
||||
output = 1024
|
||||
|
||||
[connected]
|
||||
output=256
|
||||
activation=leaky
|
||||
|
||||
[softmax]
|
||||
|
||||
[cost]
|
||||
type=sse
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[net]
|
||||
subdivisions=1
|
||||
subdivisions=8
|
||||
inputs=256
|
||||
batch = 128
|
||||
momentum=0.9
|
||||
|
|
|
@ -630,7 +630,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
|
||||
{
|
||||
network net = parse_network_cfg_custom(cfgfile, 1);
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 0);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
@ -713,7 +713,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
|
|||
|
||||
void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top)
|
||||
{
|
||||
network net = parse_network_cfg_custom(cfgfile, 1);
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 0);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
@ -1109,7 +1109,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||
{
|
||||
#ifdef OPENCV
|
||||
printf("Classifier Demo\n");
|
||||
network net = parse_network_cfg_custom(cfgfile, 1);
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 0);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
|
|
@ -152,7 +152,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
|
|||
demo_ext_output = ext_output;
|
||||
demo_json_port = json_port;
|
||||
printf("Demo\n");
|
||||
net = parse_network_cfg_custom(cfgfile, 1); // set batch=1
|
||||
net = parse_network_cfg_custom(cfgfile, 1, 0); // set batch=1
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
|
||||
cuda_set_device(gpus[0]);
|
||||
printf(" Prepare additional network for mAP calculation...\n");
|
||||
net_map = parse_network_cfg_custom(cfgfile, 1);
|
||||
net_map = parse_network_cfg_custom(cfgfile, 1, 0);
|
||||
int k; // free memory unnecessary arrays
|
||||
for (k = 0; k < net_map.n; ++k) {
|
||||
free_layer(net_map.layers[k]);
|
||||
|
@ -424,7 +424,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
int *map = 0;
|
||||
if (mapf) map = read_map(mapf);
|
||||
|
||||
network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 0); // set batch=1
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
@ -548,7 +548,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
|
||||
void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile)
|
||||
{
|
||||
network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 0); // set batch=1
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
@ -662,7 +662,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
|
|||
net = *existing_net;
|
||||
}
|
||||
else {
|
||||
net = parse_network_cfg_custom(cfgfile, 1); // set batch=1
|
||||
net = parse_network_cfg_custom(cfgfile, 1, 0); // set batch=1
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
@ -1235,7 +1235,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||
char **names = get_labels_custom(name_list, &names_size); //get_labels(name_list);
|
||||
|
||||
image **alphabet = load_alphabet();
|
||||
network net = parse_network_cfg_custom(cfgfile, 1); // set batch=1
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 0); // set batch=1
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
|
|
@ -106,6 +106,8 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
|||
l.backward_gpu = backward_lstm_layer_gpu;
|
||||
l.update_gpu = update_lstm_layer_gpu;
|
||||
|
||||
//l.state_gpu = cuda_make_array(l.state, batch*l.outputs);
|
||||
|
||||
l.output_gpu = cuda_make_array(0, batch*outputs*steps);
|
||||
l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps);
|
||||
|
||||
|
@ -125,6 +127,7 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
|||
l.dc_gpu = cuda_make_array(0, batch*outputs);
|
||||
l.dh_gpu = cuda_make_array(0, batch*outputs);
|
||||
#ifdef CUDNN
|
||||
/*
|
||||
cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w);
|
||||
|
@ -134,6 +137,7 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
|
|||
cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w);
|
||||
*/
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -424,7 +428,7 @@ void forward_lstm_layer_gpu(layer l, network_state state)
|
|||
}
|
||||
|
||||
for (i = 0; i < l.steps; ++i) {
|
||||
s.input = l.state_gpu;
|
||||
s.input = l.h_gpu;
|
||||
forward_connected_layer_gpu(wf, s);
|
||||
forward_connected_layer_gpu(wi, s);
|
||||
forward_connected_layer_gpu(wg, s);
|
||||
|
|
|
@ -716,10 +716,10 @@ int is_network(section *s)
|
|||
|
||||
network parse_network_cfg(char *filename)
|
||||
{
|
||||
return parse_network_cfg_custom(filename, 0);
|
||||
return parse_network_cfg_custom(filename, 0, 0);
|
||||
}
|
||||
|
||||
network parse_network_cfg_custom(char *filename, int batch)
|
||||
network parse_network_cfg_custom(char *filename, int batch, int time_steps)
|
||||
{
|
||||
list *sections = read_cfg(filename);
|
||||
node *n = sections->front;
|
||||
|
@ -738,6 +738,7 @@ network parse_network_cfg_custom(char *filename, int batch)
|
|||
params.c = net.c;
|
||||
params.inputs = net.inputs;
|
||||
if (batch > 0) net.batch = batch;
|
||||
if (time_steps > 0) net.time_steps = time_steps;
|
||||
params.batch = net.batch;
|
||||
params.time_steps = net.time_steps;
|
||||
params.net = net;
|
||||
|
@ -1300,7 +1301,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
|
|||
{
|
||||
printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
|
||||
network *net = calloc(1, sizeof(network));
|
||||
*net = parse_network_cfg_custom(cfg, batch);
|
||||
*net = parse_network_cfg_custom(cfg, batch, 0);
|
||||
if (weights && weights[0] != 0) {
|
||||
load_weights(net, weights);
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#include "network.h"
|
||||
|
||||
network parse_network_cfg(char *filename);
|
||||
network parse_network_cfg_custom(char *filename, int batch);
|
||||
network parse_network_cfg_custom(char *filename, int batch, int time_steps);
|
||||
void save_network(network net, char *filename);
|
||||
void save_weights(network net, char *filename);
|
||||
void save_weights_upto(network net, char *filename, int cutoff);
|
||||
|
|
10
src/rnn.c
10
src/rnn.c
|
@ -163,11 +163,15 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
|
|||
int i = (*net.seen)/net.batch;
|
||||
|
||||
int streams = batch/steps;
|
||||
printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %d \n", batch, steps, streams, net.subdivisions, size);
|
||||
printf(" global_batch = %d \n", batch*net.subdivisions);
|
||||
size_t *offsets = calloc(streams, sizeof(size_t));
|
||||
int j;
|
||||
for(j = 0; j < streams; ++j){
|
||||
offsets[j] = rand_size_t()%size;
|
||||
//printf(" offset[%d] = %d, ", j, offsets[j]);
|
||||
}
|
||||
//printf("\n");
|
||||
|
||||
clock_t time;
|
||||
while(get_current_batch(net) < net.max_batches){
|
||||
|
@ -234,7 +238,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
|
|||
char *base = basecfg(cfgfile);
|
||||
fprintf(stderr, "%s\n", base);
|
||||
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
network net = parse_network_cfg_custom(cfgfile, 1, 1); // batch=1, time_steps=1
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
|
@ -273,7 +277,9 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
|
|||
for(j = 0; j < inputs; ++j){
|
||||
if (out[j] < .0001) out[j] = 0;
|
||||
}
|
||||
c = sample_array(out, inputs);
|
||||
//c = sample_array(out, inputs);
|
||||
c = sample_array_custom(out, inputs);
|
||||
//c = max_index(out, inputs);
|
||||
print_symbol(c, tokens);
|
||||
}
|
||||
printf("\n");
|
||||
|
|
18
src/utils.c
18
src/utils.c
|
@ -616,13 +616,27 @@ void scale_array(float *a, int n, float s)
|
|||
int sample_array(float *a, int n)
|
||||
{
|
||||
float sum = sum_array(a, n);
|
||||
scale_array(a, n, 1./sum);
|
||||
scale_array(a, n, 1. / sum);
|
||||
float r = rand_uniform(0, 1);
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
for (i = 0; i < n; ++i) {
|
||||
r = r - a[i];
|
||||
if (r <= 0) return i;
|
||||
}
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
int sample_array_custom(float *a, int n)
|
||||
{
|
||||
float sum = sum_array(a, n);
|
||||
scale_array(a, n, 1./sum);
|
||||
float r = rand_uniform(0, 1);
|
||||
int start_index = rand_int(0, 0);
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
r = r - a[(i + start_index) % n];
|
||||
if (r <= 0) return i;
|
||||
}
|
||||
return n-1;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,6 +80,7 @@ float find_float_arg(int argc, char **argv, char *arg, float def);
|
|||
int find_arg(int argc, char* argv[], char *arg);
|
||||
char *find_char_arg(int argc, char **argv, char *arg, char *def);
|
||||
int sample_array(float *a, int n);
|
||||
int sample_array_custom(float *a, int n);
|
||||
void print_statistics(float *a, int n);
|
||||
unsigned int random_gen();
|
||||
float random_float();
|
||||
|
|
Loading…
Reference in New Issue