From d64693eb7733975d113d6353706328aaf6c6f05a Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis@gmail.com>
Date: Mon, 18 Mar 2019 23:26:04 +0000
Subject: [PATCH 1/3] add SGDR policy

---
 include/darknet.h | 5 ++++-
 src/network.c     | 6 ++++++
 src/parser.c      | 4 ++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/darknet.h b/include/darknet.h
index 0a1451e3..fb62fc2a 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -518,7 +518,7 @@ struct layer {
 
 // network.h
 typedef enum {
-    CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
+    CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM, SGDR
 } learning_rate_policy;
 
 // network.h
@@ -534,6 +534,9 @@ typedef struct network {
     learning_rate_policy policy;
 
     float learning_rate;
+    float learning_rate_min;
+    float learning_rate_max;
+    int batches_per_cycle;
     float momentum;
     float decay;
     float gamma;
diff --git a/src/network.c b/src/network.c
index cfc747cb..32e5e96e 100644
--- a/src/network.c
+++ b/src/network.c
@@ -117,6 +117,12 @@ float get_current_rate(network net)
             return net.learning_rate * pow(rand_uniform(0,1), net.power);
         case SIG:
             return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step))));
+        case SGDR:
+            rate = net.learning_rate_min + 
+                        0.5*(net.learning_rate_max-net.learning_rate_min)
+                        * (1. + cos( (float) (batch_num % net.batches_per_cycle)*3.14159265 / net.batches_per_cycle));
+            
+            return rate;
         default:
             fprintf(stderr, "Policy is weird!\n");
             return net.learning_rate;
diff --git a/src/parser.c b/src/parser.c
index 11cc4bfc..8e3af25f 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -629,6 +629,7 @@ learning_rate_policy get_policy(char *s)
     if (strcmp(s, "exp")==0) return EXP;
     if (strcmp(s, "sigmoid")==0) return SIG;
     if (strcmp(s, "steps")==0) return STEPS;
+    if (strcmp(s, "sgdr")==0) return SGDR;
     fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
     return CONSTANT;
 }
@@ -637,6 +638,9 @@ void parse_net_options(list *options, network *net)
 {
     net->batch = option_find_int(options, "batch",1);
     net->learning_rate = option_find_float(options, "learning_rate", .001);
+    net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
+    net->learning_rate_max = option_find_float_quiet(options, "learning_rate_max", .001);
+    net->batches_per_cycle = option_find_int(options, "sgdr_cycle", 500);
     net->momentum = option_find_float(options, "momentum", .9);
     net->decay = option_find_float(options, "decay", .0001);
     int subdivs = option_find_int(options, "subdivisions",1);

From 53081900e5c740c75a1bba50068aa48123082e05 Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis@gmail.com>
Date: Mon, 18 Mar 2019 23:37:37 +0000
Subject: [PATCH 2/3] make option quiet

---
 src/parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parser.c b/src/parser.c
index 8e3af25f..77cac714 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -640,7 +640,7 @@ void parse_net_options(list *options, network *net)
     net->learning_rate = option_find_float(options, "learning_rate", .001);
     net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
     net->learning_rate_max = option_find_float_quiet(options, "learning_rate_max", .001);
-    net->batches_per_cycle = option_find_int(options, "sgdr_cycle", 500);
+    net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", 500);
     net->momentum = option_find_float(options, "momentum", .9);
     net->decay = option_find_float(options, "decay", .0001);
     int subdivs = option_find_int(options, "subdivisions",1);

From 6cbf08321a5c2f81da6120d70b15167d9fc7f7e3 Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis@gmail.com>
Date: Tue, 19 Mar 2019 00:02:01 +0000
Subject: [PATCH 3/3] get rid of learning_rate_max

---
 src/network.c | 2 +-
 src/parser.c  | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/network.c b/src/network.c
index 32e5e96e..fd7cddfb 100644
--- a/src/network.c
+++ b/src/network.c
@@ -119,7 +119,7 @@ float get_current_rate(network net)
             return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step))));
         case SGDR:
             rate = net.learning_rate_min + 
-                        0.5*(net.learning_rate_max-net.learning_rate_min)
+                        0.5*(net.learning_rate-net.learning_rate_min)
                         * (1. + cos( (float) (batch_num % net.batches_per_cycle)*3.14159265 / net.batches_per_cycle));
             
             return rate;
diff --git a/src/parser.c b/src/parser.c
index 77cac714..557e07ba 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -639,7 +639,6 @@ void parse_net_options(list *options, network *net)
     net->batch = option_find_int(options, "batch",1);
     net->learning_rate = option_find_float(options, "learning_rate", .001);
     net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
-    net->learning_rate_max = option_find_float_quiet(options, "learning_rate_max", .001);
     net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", 500);
     net->momentum = option_find_float(options, "momentum", .9);
     net->decay = option_find_float(options, "decay", .0001);