mirror of https://github.com/AlexeyAB/darknet.git
For CPU and GPU structures in the darknet.h have the same size
This commit is contained in:
parent
10a586140c
commit
ef979a1fd2
|
@ -34,6 +34,8 @@
|
||||||
|
|
||||||
#define SECRET_NUM -1234
|
#define SECRET_NUM -1234
|
||||||
|
|
||||||
|
typedef enum { UNUSED_DEF_VAL } UNUSED_ENUM_TYPE;
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
@ -42,8 +44,8 @@
|
||||||
|
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
#include <cudnn.h>
|
#include <cudnn.h>
|
||||||
#endif
|
#endif // CUDNN
|
||||||
#endif
|
#endif // GPU
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -495,7 +497,7 @@ struct layer {
|
||||||
|
|
||||||
size_t workspace_size;
|
size_t workspace_size;
|
||||||
|
|
||||||
#ifdef GPU
|
//#ifdef GPU
|
||||||
int *indexes_gpu;
|
int *indexes_gpu;
|
||||||
|
|
||||||
float *z_gpu;
|
float *z_gpu;
|
||||||
|
@ -610,8 +612,21 @@ struct layer {
|
||||||
cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16;
|
cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16;
|
||||||
cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16;
|
cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16;
|
||||||
cudnnPoolingDescriptor_t poolingDesc;
|
cudnnPoolingDescriptor_t poolingDesc;
|
||||||
|
#else // CUDNN
|
||||||
|
void* srcTensorDesc, *dstTensorDesc;
|
||||||
|
void* srcTensorDesc16, *dstTensorDesc16;
|
||||||
|
void* dsrcTensorDesc, *ddstTensorDesc;
|
||||||
|
void* dsrcTensorDesc16, *ddstTensorDesc16;
|
||||||
|
void* normTensorDesc, *normDstTensorDesc, *normDstTensorDescF16;
|
||||||
|
void* weightDesc, *weightDesc16;
|
||||||
|
void* dweightDesc, *dweightDesc16;
|
||||||
|
void* convDesc;
|
||||||
|
UNUSED_ENUM_TYPE fw_algo, fw_algo16;
|
||||||
|
UNUSED_ENUM_TYPE bd_algo, bd_algo16;
|
||||||
|
UNUSED_ENUM_TYPE bf_algo, bf_algo16;
|
||||||
|
void* poolingDesc;
|
||||||
#endif // CUDNN
|
#endif // CUDNN
|
||||||
#endif // GPU
|
//#endif // GPU
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -701,7 +716,7 @@ typedef struct network {
|
||||||
float *cost;
|
float *cost;
|
||||||
float clip;
|
float clip;
|
||||||
|
|
||||||
#ifdef GPU
|
//#ifdef GPU
|
||||||
//float *input_gpu;
|
//float *input_gpu;
|
||||||
//float *truth_gpu;
|
//float *truth_gpu;
|
||||||
float *delta_gpu;
|
float *delta_gpu;
|
||||||
|
@ -722,7 +737,7 @@ typedef struct network {
|
||||||
float *global_delta_gpu;
|
float *global_delta_gpu;
|
||||||
float *state_delta_gpu;
|
float *state_delta_gpu;
|
||||||
size_t max_delta_gpu_size;
|
size_t max_delta_gpu_size;
|
||||||
#endif
|
//#endif // GPU
|
||||||
int optimized_memory;
|
int optimized_memory;
|
||||||
size_t workspace_size_limit;
|
size_t workspace_size_limit;
|
||||||
} network;
|
} network;
|
||||||
|
|
|
@ -258,15 +258,17 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
|
||||||
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
|
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
|
||||||
|
|
||||||
//fast_v_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.v_cbn_gpu);
|
//fast_v_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.v_cbn_gpu);
|
||||||
int minibatch_index = state.net.current_subdivision + 1;
|
const int minibatch_index = state.net.current_subdivision + 1;
|
||||||
float alpha = 0.01;
|
const int max_minibatch_index = state.net.subdivisions;
|
||||||
|
//printf("\n minibatch_index = %d, max_minibatch_index = %d \n", minibatch_index, max_minibatch_index);
|
||||||
|
const float alpha = 0.01;
|
||||||
|
|
||||||
int inverse_variance = 0;
|
int inverse_variance = 0;
|
||||||
#ifdef CUDNN
|
#ifdef CUDNN
|
||||||
inverse_variance = 1;
|
inverse_variance = 1;
|
||||||
#endif // CUDNN
|
#endif // CUDNN
|
||||||
|
|
||||||
fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu,
|
fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, max_minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu,
|
||||||
alpha, l.rolling_mean_gpu, l.rolling_variance_gpu, inverse_variance, .00001);
|
alpha, l.rolling_mean_gpu, l.rolling_variance_gpu, inverse_variance, .00001);
|
||||||
|
|
||||||
normalize_scale_bias_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.scales_gpu, l.biases_gpu, l.batch, l.out_c, l.out_h*l.out_w, inverse_variance, .00001f);
|
normalize_scale_bias_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.scales_gpu, l.biases_gpu, l.batch, l.out_c, l.out_h*l.out_w, inverse_variance, .00001f);
|
||||||
|
|
|
@ -87,7 +87,7 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc
|
||||||
|
|
||||||
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
|
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
|
||||||
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
|
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
|
||||||
void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
|
void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
|
||||||
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon);
|
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon);
|
||||||
void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon);
|
void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon);
|
||||||
void compare_2_arrays_gpu(float *one, float *two, int size);
|
void compare_2_arrays_gpu(float *one, float *two, int size);
|
||||||
|
|
|
@ -572,7 +572,7 @@ extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
|
__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
|
||||||
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
|
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
|
||||||
{
|
{
|
||||||
const int threads = BLOCK;
|
const int threads = BLOCK;
|
||||||
|
@ -615,16 +615,19 @@ __global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int f
|
||||||
if (inverse_variance) variance[filter] = 1.0f / sqrtf(variance_tmp + epsilon);
|
if (inverse_variance) variance[filter] = 1.0f / sqrtf(variance_tmp + epsilon);
|
||||||
else variance[filter] = variance_tmp;
|
else variance[filter] = variance_tmp;
|
||||||
|
|
||||||
|
//if (max_minibatch_index == minibatch_index)
|
||||||
|
{
|
||||||
rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter];
|
rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter];
|
||||||
|
|
||||||
rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter];
|
rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
|
extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
|
||||||
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
|
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
|
||||||
{
|
{
|
||||||
fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
|
fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
|
||||||
CHECK_CUDA(cudaPeekAtLastError());
|
CHECK_CUDA(cudaPeekAtLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -455,6 +455,7 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
#ifndef GPU
|
#ifndef GPU
|
||||||
gpu_index = -1;
|
gpu_index = -1;
|
||||||
|
printf(" GPU isn't used \n");
|
||||||
init_cpu();
|
init_cpu();
|
||||||
#else
|
#else
|
||||||
if(gpu_index >= 0){
|
if(gpu_index >= 0){
|
||||||
|
|
Loading…
Reference in New Issue