From d64693eb7733975d113d6353706328aaf6c6f05a Mon Sep 17 00:00:00 2001 From: Josh Veitch-Michaelis Date: Mon, 18 Mar 2019 23:26:04 +0000 Subject: [PATCH 1/3] add SGDR policy --- include/darknet.h | 5 ++++- src/network.c | 6 ++++++ src/parser.c | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/darknet.h b/include/darknet.h index 0a1451e3..fb62fc2a 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -518,7 +518,7 @@ struct layer { // network.h typedef enum { - CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM, SGDR } learning_rate_policy; // network.h @@ -534,6 +534,9 @@ typedef struct network { learning_rate_policy policy; float learning_rate; + float learning_rate_min; + float learning_rate_max; + int batches_per_cycle; float momentum; float decay; float gamma; diff --git a/src/network.c b/src/network.c index cfc747cb..32e5e96e 100644 --- a/src/network.c +++ b/src/network.c @@ -117,6 +117,12 @@ float get_current_rate(network net) return net.learning_rate * pow(rand_uniform(0,1), net.power); case SIG: return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step)))); + case SGDR: + rate = net.learning_rate_min + + 0.5*(net.learning_rate_max-net.learning_rate_min) + * (1. + cos( (float) (batch_num % net.batches_per_cycle)*3.14159265 / net.batches_per_cycle)); + + return rate; default: fprintf(stderr, "Policy is weird!\n"); return net.learning_rate; diff --git a/src/parser.c b/src/parser.c index 11cc4bfc..8e3af25f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -629,6 +629,7 @@ learning_rate_policy get_policy(char *s) if (strcmp(s, "exp")==0) return EXP; if (strcmp(s, "sigmoid")==0) return SIG; if (strcmp(s, "steps")==0) return STEPS; + if (strcmp(s, "sgdr")==0) return SGDR; fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); return CONSTANT; } @@ -637,6 +638,9 @@ void parse_net_options(list *options, network *net) { net->batch = option_find_int(options, "batch",1); net->learning_rate = option_find_float(options, "learning_rate", .001); + net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001); + net->learning_rate_max = option_find_float_quiet(options, "learning_rate_max", .001); + net->batches_per_cycle = option_find_int(options, "sgdr_cycle", 500); net->momentum = option_find_float(options, "momentum", .9); net->decay = option_find_float(options, "decay", .0001); int subdivs = option_find_int(options, "subdivisions",1); From 53081900e5c740c75a1bba50068aa48123082e05 Mon Sep 17 00:00:00 2001 From: Josh Veitch-Michaelis Date: Mon, 18 Mar 2019 23:37:37 +0000 Subject: [PATCH 2/3] make option quiet --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 8e3af25f..77cac714 100644 --- a/src/parser.c +++ b/src/parser.c @@ -640,7 +640,7 @@ void parse_net_options(list *options, network *net) net->learning_rate = option_find_float(options, "learning_rate", .001); net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001); net->learning_rate_max = option_find_float_quiet(options, "learning_rate_max", .001); - net->batches_per_cycle = option_find_int(options, "sgdr_cycle", 500); + net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", 500); net->momentum = option_find_float(options, "momentum", .9); net->decay = option_find_float(options, "decay", .0001); int subdivs = option_find_int(options, "subdivisions",1); From 6cbf08321a5c2f81da6120d70b15167d9fc7f7e3 Mon Sep 17 00:00:00 2001 From: Josh Veitch-Michaelis Date: Tue, 19 Mar 2019 00:02:01 +0000 Subject: [PATCH 3/3] get rid of learning_rate_max --- src/network.c | 2 +- src/parser.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/network.c b/src/network.c index 32e5e96e..fd7cddfb 100644 --- a/src/network.c +++ b/src/network.c @@ -119,7 +119,7 @@ float get_current_rate(network net) return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step)))); case SGDR: rate = net.learning_rate_min + - 0.5*(net.learning_rate_max-net.learning_rate_min) + 0.5*(net.learning_rate-net.learning_rate_min) * (1. + cos( (float) (batch_num % net.batches_per_cycle)*3.14159265 / net.batches_per_cycle)); return rate; diff --git a/src/parser.c b/src/parser.c index 77cac714..557e07ba 100644 --- a/src/parser.c +++ b/src/parser.c @@ -639,7 +639,6 @@ void parse_net_options(list *options, network *net) net->batch = option_find_int(options, "batch",1); net->learning_rate = option_find_float(options, "learning_rate", .001); net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001); - net->learning_rate_max = option_find_float_quiet(options, "learning_rate_max", .001); net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", 500); net->momentum = option_find_float(options, "momentum", .9); net->decay = option_find_float(options, "decay", .0001);