mirror of https://github.com/AlexeyAB/darknet.git
Added stretch_sway
This commit is contained in:
parent
b2749c9e97
commit
af54d95184
|
@ -254,6 +254,7 @@ struct layer {
|
|||
int sway;
|
||||
int rotate;
|
||||
int stretch;
|
||||
int stretch_sway;
|
||||
float angle;
|
||||
float jitter;
|
||||
float saturation;
|
||||
|
|
|
@ -133,6 +133,7 @@ void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scales_c, float
|
|||
void smooth_rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse);
|
||||
void stretch_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, float scale, int reverse);
|
||||
void sway_and_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse);
|
||||
void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse);
|
||||
void rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int reverse);
|
||||
void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
|
||||
void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
|
||||
|
|
|
@ -1402,6 +1402,7 @@ __global__ void stretch_weights_kernel(const float *src_weight_gpu, float *weig
|
|||
//const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum);
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
//if (scale < 1) weight_deform_gpu[x + y*kernel_size + i] /= scale;// *= coef;
|
||||
weight_deform_gpu[x + y*kernel_size + i] /= scale;// *= coef;
|
||||
}
|
||||
}
|
||||
|
@ -1422,11 +1423,6 @@ extern "C" void stretch_weights_gpu(const float *src_weight_gpu, float *weight_d
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__global__ void sway_and_flip_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int angle, int reverse)
|
||||
{
|
||||
const int index = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
|
@ -1616,6 +1612,173 @@ extern "C" void rotate_weights_gpu(const float *src_weight_gpu, float *weight_de
|
|||
|
||||
|
||||
|
||||
__global__ void stretch_sway_flip_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, float angle, int reverse)
|
||||
{
|
||||
const int index = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
const int kernel_area = kernel_size * kernel_size;
|
||||
const int i = index * kernel_area;
|
||||
|
||||
const int stage_step = (nweights / kernel_area) / 8; // 8 stages
|
||||
const int stage_id = index / stage_step;
|
||||
|
||||
// nweights = (c / groups) * n * size * size;
|
||||
// kernel_area = size*size
|
||||
|
||||
if (i < nweights)
|
||||
{
|
||||
|
||||
if (stage_id == 0) {
|
||||
// simple copy
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
weight_deform_gpu[x + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (stage_id == 1 || stage_id == 2 || stage_id == 3 || stage_id == 4)
|
||||
{
|
||||
float scale = 0.5;
|
||||
if (stage_id == 1) scale = 0.65;
|
||||
else if (stage_id == 2) scale = 0.8;
|
||||
else if (stage_id == 3) scale = 1.2;
|
||||
else if (stage_id == 4) scale = 1.4;
|
||||
|
||||
if (reverse) scale = 1 / scale;
|
||||
|
||||
const int x_c = kernel_size / 2;
|
||||
const int y_c = kernel_size / 2;
|
||||
|
||||
float dropout_sum = 0;
|
||||
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
// Xsource = x_c + (x_d - x_c) / scale
|
||||
// Ysource = y_c + (y_d - y_c) / scale
|
||||
|
||||
float x_s = x_c + (x - x_c) / scale;
|
||||
float y_s = y_c + (y - y_c) / scale;
|
||||
|
||||
int x_0 = floor(x_s); // round down
|
||||
int x_1 = ceil(x_s); // round up
|
||||
if (x_0 == x_1) x_1 = x_0 + 1;
|
||||
int y_0 = floor(y_s);
|
||||
int y_1 = ceil(y_s);
|
||||
if (y_0 == y_1) y_1 = y_0 + 1;
|
||||
|
||||
float c_x_0 = x_1 - x_s;
|
||||
float c_x_1 = x_s - x_0;
|
||||
float c_y_0 = y_1 - y_s;
|
||||
float c_y_1 = y_s - y_0;
|
||||
|
||||
float val = 0;
|
||||
if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0;
|
||||
else dropout_sum += c_x_0 * c_y_0;
|
||||
|
||||
if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0;
|
||||
else dropout_sum += c_x_1 * c_y_0;
|
||||
|
||||
if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1;
|
||||
else dropout_sum += c_x_0 * c_y_1;
|
||||
|
||||
if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1;
|
||||
else dropout_sum += c_x_1 * c_y_1;
|
||||
|
||||
weight_deform_gpu[x + y*kernel_size + i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// compensate for dropped items
|
||||
//const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum);
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
if(scale > 1)
|
||||
weight_deform_gpu[x + y*kernel_size + i] *= scale;// *= coef;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (stage_id == 5 || stage_id == 6)
|
||||
{
|
||||
// rotate left or right
|
||||
if (stage_id == 6) angle = -angle;
|
||||
if (reverse) angle = -angle;
|
||||
|
||||
const float cos_a = cosf(angle * 3.14159265 / 180);
|
||||
const float sin_a = sinf(angle * 3.14159265 / 180);
|
||||
const int x_c = kernel_size / 2;
|
||||
const int y_c = kernel_size / 2;
|
||||
|
||||
float dropout_sum = 0;
|
||||
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
// Xsource = x*cos(alpha) + y*sin(alpha)
|
||||
// Ysource = -x*sin(alpha) + y*cos(alpha)
|
||||
|
||||
float x_s = x_c + (x - x_c)*cos_a + (y - y_c)*sin_a;
|
||||
float y_s = y_c - (x - x_c)*sin_a + (y - y_c)*cos_a;
|
||||
|
||||
int x_0 = floor(x_s); // round down
|
||||
int x_1 = ceil(x_s); // round up
|
||||
if (x_0 == x_1) x_1 = x_0 + 1;
|
||||
int y_0 = floor(y_s);
|
||||
int y_1 = ceil(y_s);
|
||||
if (y_0 == y_1) y_1 = y_0 + 1;
|
||||
|
||||
float c_x_0 = x_1 - x_s;
|
||||
float c_x_1 = x_s - x_0;
|
||||
float c_y_0 = y_1 - y_s;
|
||||
float c_y_1 = y_s - y_0;
|
||||
|
||||
float val = 0;
|
||||
if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0;
|
||||
else dropout_sum += c_x_0 * c_y_0;
|
||||
|
||||
if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0;
|
||||
else dropout_sum += c_x_1 * c_y_0;
|
||||
|
||||
if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1;
|
||||
else dropout_sum += c_x_0 * c_y_1;
|
||||
|
||||
if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1;
|
||||
else dropout_sum += c_x_1 * c_y_1;
|
||||
|
||||
weight_deform_gpu[x + y*kernel_size + i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// compensate for dropped items
|
||||
const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum);
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
weight_deform_gpu[x + y*kernel_size + i] *= coef;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (stage_id == 7)
|
||||
{
|
||||
// flip
|
||||
for (int y = 0; y < kernel_size; ++y) {
|
||||
for (int x = 0; x < kernel_size; ++x) {
|
||||
weight_deform_gpu[(kernel_size - x - 1) + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse)
|
||||
{
|
||||
const int kernel_area = size*size;
|
||||
const int block_size = BLOCK;
|
||||
const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size);
|
||||
stretch_sway_flip_weights_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
|
||||
|
||||
CHECK_CUDA(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void reduce_and_expand_array_kernel(const float *src_gpu, float *dst_gpu, int current_size, int groups)
|
||||
{
|
||||
const int index = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
|
|
|
@ -1210,6 +1210,7 @@ void update_convolutional_layer_gpu(layer l, int batch, float learning_rate_init
|
|||
if (l.rotate) rotate_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, 1);
|
||||
else if (l.sway) sway_and_flip_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, l.angle, 1);
|
||||
else if (l.stretch) stretch_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, 0, 1);
|
||||
else if (l.stretch_sway) stretch_sway_flip_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, l.angle, 1);
|
||||
|
||||
//simple_copy_ongpu(l.nweights, l.weight_updates_gpu, l.weight_deform_gpu);
|
||||
|
||||
|
@ -1268,6 +1269,7 @@ void update_convolutional_layer_gpu(layer l, int batch, float learning_rate_init
|
|||
if (l.rotate) rotate_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, 0);
|
||||
else if (l.sway) sway_and_flip_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, l.angle, 0);
|
||||
else if (l.stretch) stretch_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, 0, 0);
|
||||
else if (l.stretch_sway) stretch_sway_flip_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, l.angle, 0);
|
||||
|
||||
//printf(" angle = %f, reverse = %d \n", l.angle, 0);
|
||||
//cuda_pull_array(l.weights_gpu, l.weights, l.nweights);
|
||||
|
|
|
@ -232,7 +232,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_
|
|||
// 3. FP32 Master Copy of Weights
|
||||
// More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops
|
||||
CHECK_CUDNN(cudnnSetConvolutionGroupCount(l->convDesc, l->groups));
|
||||
if (l->groups == 1) CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH));
|
||||
//if (l->groups == 1) CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH));
|
||||
#if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) // cuDNN >= 7.2
|
||||
//CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); // reduces the speed of regular and group convolution
|
||||
#endif
|
||||
|
|
|
@ -1628,7 +1628,9 @@ data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *h
|
|||
}
|
||||
|
||||
for (j = 0; j < d.y.cols; ++j) {
|
||||
d.y.vals[i][j] = d.y.vals[i][j] * s1 + d2.y.vals[i][j] * s2 + d3.y.vals[i][j] * s3 + d4.y.vals[i][j] * s4;
|
||||
const float max_s = 1;// max_val_cmp(s1, max_val_cmp(s2, max_val_cmp(s3, s4)));
|
||||
|
||||
d.y.vals[i][j] = d.y.vals[i][j] * s1 / max_s + d2.y.vals[i][j] * s2 / max_s + d3.y.vals[i][j] * s3 / max_s + d4.y.vals[i][j] * s4 / max_s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -204,11 +204,12 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
|||
int sway = option_find_int_quiet(options, "sway", 0);
|
||||
int rotate = option_find_int_quiet(options, "rotate", 0);
|
||||
int stretch = option_find_int_quiet(options, "stretch", 0);
|
||||
if ((sway + rotate + stretch) > 1) {
|
||||
int stretch_sway = option_find_int_quiet(options, "stretch_sway", 0);
|
||||
if ((sway + rotate + stretch + stretch_sway) > 1) {
|
||||
printf(" Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer \n");
|
||||
exit(0);
|
||||
}
|
||||
int deform = sway || rotate || stretch;
|
||||
int deform = sway || rotate || stretch || stretch_sway;
|
||||
if (deform && size == 1) {
|
||||
printf(" Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer \n");
|
||||
exit(0);
|
||||
|
@ -220,6 +221,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
|||
layer.sway = sway;
|
||||
layer.rotate = rotate;
|
||||
layer.stretch = stretch;
|
||||
layer.stretch_sway = stretch_sway;
|
||||
layer.angle = option_find_float_quiet(options, "angle", 15);
|
||||
|
||||
if(params.net.adam){
|
||||
|
@ -778,7 +780,7 @@ dropout_layer parse_dropout(list *options, size_params params)
|
|||
printf(" [dropout] - dropblock_size_abs = %d that is bigger than layer size %d x %d \n", dropblock_size_abs, params.w, params.h);
|
||||
dropblock_size_abs = min_val_cmp(params.w, params.h);
|
||||
}
|
||||
if (!dropblock_size_rel && !dropblock_size_abs) {
|
||||
if (dropblock && !dropblock_size_rel && !dropblock_size_abs) {
|
||||
printf(" [dropout] - None of the parameters (dropblock_size_rel or dropblock_size_abs) are set, will be used: dropblock_size_abs = 7 \n");
|
||||
dropblock_size_abs = 7;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue