Added flag -cuda_debug_sync for checking CUDA errors

2020-03-11 17:12:07 +03:00 · 2020-03-11 17:12:07 +03:00 · 1a56ef588a
parent e70b70db8f
commit 1a56ef588a
4 changed files with 33 additions and 11 deletions
--- a/src/dark_cuda.c
+++ b/src/dark_cuda.c
@ -29,6 +29,8 @@ int gpu_index = 0;
 #error "If you set CUDNN_HALF=1 then you must set CUDNN=1"
 #endif

+extern int cuda_debug_sync;
+
 void cuda_set_device(int n)
 {
    gpu_index = n;
@ -86,10 +88,13 @@ void check_error_extended(cudaError_t status, const char *file, int line, const
        check_error(status);
    }
 #if defined(DEBUG) || defined(CUDA_DEBUG)
-    status = cudaDeviceSynchronize();
-    if (status != cudaSuccess)
-        printf("CUDA status = cudaDeviceSynchronize() Error: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+    cuda_debug_sync = 1;
 #endif
+    if (cuda_debug_sync) {
+        status = cudaDeviceSynchronize();
+        if (status != cudaSuccess)
+            printf("CUDA status = cudaDeviceSynchronize() Error: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+    }
    check_error(status);
 }

@ -173,6 +178,9 @@ void cudnn_check_error(cudnnStatus_t status)
 #if defined(DEBUG) || defined(CUDA_DEBUG)
    cudaDeviceSynchronize();
 #endif
+    if (cuda_debug_sync) {
+        cudaDeviceSynchronize();
+    }
    cudnnStatus_t status2 = CUDNN_STATUS_SUCCESS;
 #ifdef CUDNN_ERRQUERY_RAWCODE
    cudnnStatus_t status_tmp = cudnnQueryRuntimeError(cudnn_handle(), &status2, CUDNN_ERRQUERY_RAWCODE, NULL);
@ -208,10 +216,13 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
        cudnn_check_error(status);
    }
 #if defined(DEBUG) || defined(CUDA_DEBUG)
-    status = cudaDeviceSynchronize();
-    if (status != CUDNN_STATUS_SUCCESS)
-        printf("\n cuDNN status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+    cuda_debug_sync = 1;
 #endif
+    if (cuda_debug_sync) {
+        status = cudaDeviceSynchronize();
+        if (status != CUDNN_STATUS_SUCCESS)
+            printf("\n cuDNN status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+    }
    cudnn_check_error(status);
 }
 #endif
--- a/src/darknet.c
+++ b/src/darknet.c
@ -12,6 +12,9 @@
 #include "blas.h"
 #include "connected_layer.h"

+int check_mistakes = 0;
+int cuda_debug_sync = 0;
+

 extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
 extern void run_voxel(int argc, char **argv);
@ -459,14 +462,20 @@ int main(int argc, char **argv)
    gpu_index = -1;
    printf(" GPU isn't used \n");
    init_cpu();
-#else
+#else   // GPU
    if(gpu_index >= 0){
        cuda_set_device(gpu_index);
        CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
    }

    show_cuda_cudnn_info();
-#endif
+    cuda_debug_sync = find_arg(argc, argv, "-cuda_debug_sync");
+
+#ifdef CUDNN_HALF
+    printf(" CUDNN_HALF=1 \n");
+#endif  // CUDNN_HALF
+
+#endif  // GPU

    show_opencv_info();

--- a/src/detector.c
+++ b/src/detector.c
@ -18,7 +18,7 @@ typedef __compar_fn_t comparison_fn_t;

 #include "http_stream.h"

-int check_mistakes = 0;
+extern int check_mistakes;

 static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 };

--- a/src/parser.c
+++ b/src/parser.c
@ -1054,6 +1054,7 @@ void parse_net_options(list *options, network *net)
    net->batch *= net->time_steps;
    net->subdivisions = subdivs;

+    *net->seen = 0;
    *net->cur_iteration = 0;
    net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0);
    net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0);
@ -1096,7 +1097,7 @@ void parse_net_options(list *options, network *net)
    char *policy_s = option_find_str(options, "policy", "constant");
    net->policy = get_policy(policy_s);
    net->burn_in = option_find_int_quiet(options, "burn_in", 0);
-#ifdef CUDNN_HALF
+#ifdef GPU
    if (net->gpu_index >= 0) {
        int compute_capability = get_gpu_compute_capability(net->gpu_index);
        if (get_gpu_compute_capability(net->gpu_index) >= 700) net->cudnn_half = 1;
@ -1104,7 +1105,7 @@ void parse_net_options(list *options, network *net)
        fprintf(stderr, " compute_capability = %d, cudnn_half = %d \n", compute_capability, net->cudnn_half);
    }
    else fprintf(stderr, " GPU isn't used \n");
-#endif
+#endif// GPU
    if(net->policy == STEP){
        net->step = option_find_int(options, "step", 1);
        net->scale = option_find_float(options, "scale", 1);
@ -1630,6 +1631,7 @@ void save_weights_upto(network net, char *filename, int cutoff)
    fwrite(&major, sizeof(int), 1, fp);
    fwrite(&minor, sizeof(int), 1, fp);
    fwrite(&revision, sizeof(int), 1, fp);
+    (*net.seen) = (*net.cur_iteration) * net.batch * net.subdivisions;
    fwrite(net.seen, sizeof(uint64_t), 1, fp);

    int i;