Fixed Loss scale

2020-06-22 14:50:51 +03:00 · 2020-06-22 14:50:51 +03:00 · 17b806f0bd
parent 8f900493c6
commit 17b806f0bd
2 changed files with 4 additions and 15 deletions
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@ -1264,18 +1264,12 @@ void update_convolutional_layer_gpu(layer l, int batch, float learning_rate_init
    }
-
+    // Loss scale for Mixed-Precision on Tensor-Cores
-    float learning_rate = learning_rate_init*l.learning_rate_scale;
+    float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale;
    //float momentum = a.momentum;
    //float decay = a.decay;
    //int batch = a.batch;
    // Loss scale for Mixed-Precision on Tensor-Cores
    if (loss_scale != 1.0) {
        if (l.weight_updates_gpu && l.nweights > 0) scal_ongpu(l.nweights, 1.0 / loss_scale, l.weight_updates_gpu, 1);
        if (l.bias_updates_gpu && l.n > 0) scal_ongpu(l.n, 1.0 / loss_scale, l.bias_updates_gpu, 1);
        if (l.scale_updates_gpu && l.n > 0) scal_ongpu(l.n, 1.0 / loss_scale, l.scale_updates_gpu, 1);
    }
    reset_nan_and_inf(l.weight_updates_gpu, l.nweights);
    fix_nan_and_inf(l.weights_gpu, l.nweights);
@ -1301,7 +1295,7 @@ void update_convolutional_layer_gpu(layer l, int batch, float learning_rate_init
        //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
        //axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
        //scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1);
-        axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
+        axpy_ongpu(l.nweights, -decay*batch*loss_scale, l.weights_gpu, 1, l.weight_updates_gpu, 1);
        axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
        scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1);
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@ -243,16 +243,11 @@ void backward_shortcut_layer_gpu(const layer l, network_state state)
 void update_shortcut_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale)
 {
    if (l.nweights > 0) {
-        float learning_rate = learning_rate_init*l.learning_rate_scale;
+        float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale;
        //float momentum = a.momentum;
        //float decay = a.decay;
        //int batch = a.batch;
        // Loss scale for Mixed-Precision on Tensor-Cores
        if (loss_scale != 1.0) {
            if(l.weight_updates_gpu && l.nweights > 0) scal_ongpu(l.nweights, 1.0 / loss_scale, l.weight_updates_gpu, 1);
        }
        reset_nan_and_inf(l.weight_updates_gpu, l.nweights);
        fix_nan_and_inf(l.weights_gpu, l.nweights);