mirror of https://github.com/AlexeyAB/darknet.git
Added [convolutional] coordconv=1 for GPU-only
This commit is contained in:
parent
0d8718a434
commit
05dee78fa3
|
@ -52,7 +52,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.props" />
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
|
@ -153,7 +153,7 @@
|
|||
</Link>
|
||||
<CudaCompile>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
<CodeGeneration>compute_30,sm_30;compute_75,sm_75</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_75,sm_75</CodeGeneration>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
@ -302,6 +302,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.targets" />
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -259,6 +259,7 @@ struct layer {
|
|||
int maxpool_zero_nonmax;
|
||||
int out_channels;
|
||||
float reverse;
|
||||
int coordconv;
|
||||
int flatten;
|
||||
int spatial;
|
||||
int pad;
|
||||
|
|
|
@ -172,6 +172,7 @@ void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size,
|
|||
void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
|
||||
void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps);
|
||||
void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size);
|
||||
void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type);
|
||||
|
||||
#endif // GPU
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -2383,3 +2383,50 @@ extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size,
|
|||
|
||||
CHECK_CUDA(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void coord_conv_kernel(float *dst, int w, int h, int chan, int batch, int type)
|
||||
{
|
||||
int i = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
|
||||
const int x = i % w;
|
||||
i = i / w;
|
||||
const int y = i % h;
|
||||
i = i / h;
|
||||
const int c = i % chan;
|
||||
//i = i / chan;
|
||||
//const int b = i % batch;
|
||||
|
||||
if (type == 0) {
|
||||
if (c == 0) {
|
||||
const float x_val = (2.0f * x) / w - 1.0f; // [-1; 1)
|
||||
dst[i] = x_val; // x - coord
|
||||
}
|
||||
else if (c == 1) {
|
||||
const float y_val = (2.0f * y) / h - 1.0f; // [-1; 1)
|
||||
dst[i] = y_val; // y - coord
|
||||
}
|
||||
else if (c == 2) {
|
||||
const float x_val = (2.0f * x) / w - 1.0f; // [-1; 1)
|
||||
const float y_val = (2.0f * y) / h - 1.0f; // [-1; 1)
|
||||
const float rad_val = sqrtf(x_val*x_val + y_val*y_val); // [0; 1.414)
|
||||
dst[i] = rad_val; // rad - coord
|
||||
}
|
||||
}
|
||||
else if (type == 1) {
|
||||
if (c >= 0 && c <= 2) {
|
||||
dst[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type)
|
||||
{
|
||||
const int block_size = BLOCK;
|
||||
const int num_blocks = get_number_of_blocks(size, block_size);
|
||||
coord_conv_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (dst, w, h, chan, b, type);
|
||||
|
||||
CHECK_CUDA(cudaPeekAtLastError());
|
||||
}
|
|
@ -242,7 +242,6 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
|
|||
|
||||
layer make_history_layer(int batch, int h, int w, int c, int history_size, int steps, int train)
|
||||
{
|
||||
//steps = 1;
|
||||
layer l = { (LAYER_TYPE)0 };
|
||||
l.train = train;
|
||||
l.batch = batch;
|
||||
|
|
|
@ -629,10 +629,18 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
|||
simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu);
|
||||
simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu);
|
||||
}
|
||||
|
||||
if (l.coordconv) {
|
||||
coord_conv_gpu(l.output_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
||||
{
|
||||
if (l.coordconv) {
|
||||
coord_conv_gpu(l.delta_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 1);
|
||||
}
|
||||
|
||||
if (l.antialiasing) {
|
||||
network_state s = { 0 };
|
||||
s.train = state.train;
|
||||
|
|
|
@ -229,6 +229,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
|||
layer.angle = option_find_float_quiet(options, "angle", 15);
|
||||
layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0);
|
||||
layer.reverse = option_find_float_quiet(options, "reverse", 0);
|
||||
layer.coordconv = option_find_int_quiet(options, "coordconv", 0);
|
||||
|
||||
if(params.net.adam){
|
||||
layer.B1 = params.net.B1;
|
||||
|
|
Loading…
Reference in New Issue