minor fix

This commit is contained in:
AlexeyAB 2017-07-13 14:36:54 +03:00
parent 5bc62b14e0
commit 9920410ba9
3 changed files with 6 additions and 1 deletions

View File

@ -132,7 +132,7 @@
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>CUDNN;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CLanguageStandard>c11</CLanguageStandard>
<CppLanguageStandard>c++1y</CppLanguageStandard>
<PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs>

View File

@ -223,6 +223,7 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch,
local[id] += (i+id < spatial) ? delta[index] : 0;
}
}
__syncthreads();
if(id == 0){
mean_delta[filter] = 0;
@ -251,6 +252,7 @@ __global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean,
local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0;
}
}
__syncthreads();
if(id == 0){
variance_delta[filter] = 0;
@ -446,6 +448,7 @@ __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial,
local[id] += (i+id < spatial) ? x[index] : 0;
}
}
__syncthreads();
if(id == 0){
mean[filter] = 0;
@ -474,6 +477,7 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt
local[id] += (i+id < spatial) ? pow((x[index] - mean[filter]), 2) : 0;
}
}
__syncthreads();
if(id == 0){
variance[filter] = 0;

View File

@ -127,6 +127,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
//if(l.dot > 0) dot_error_gpu(l);
if(l.binary || l.xnor) swap_binary(&l);
//cudaDeviceSynchronize(); // for correct profiling of performance
}
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)