mirror of https://github.com/AlexeyAB/darknet.git
updates and things
This commit is contained in:
parent
aebe937710
commit
8f1b4e0962
|
@ -14,6 +14,7 @@ decaf/
|
|||
submission/
|
||||
cfg/
|
||||
darknet
|
||||
.fuse*
|
||||
|
||||
# OS Generated #
|
||||
.DS_Store*
|
||||
|
|
8
Makefile
8
Makefile
|
@ -1,6 +1,6 @@
|
|||
GPU=1
|
||||
CUDNN=1
|
||||
OPENCV=1
|
||||
GPU=0
|
||||
CUDNN=0
|
||||
OPENCV=0
|
||||
DEBUG=0
|
||||
|
||||
ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
|
||||
|
@ -41,7 +41,7 @@ CFLAGS+= -DCUDNN
|
|||
LDFLAGS+= -lcudnn
|
||||
endif
|
||||
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
[net]
|
||||
batch=128
|
||||
subdivisions=1
|
||||
height=224
|
||||
width=224
|
||||
max_crop=320
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
learning_rate=0.01
|
||||
max_batches = 0
|
||||
policy=steps
|
||||
steps=444000,590000,970000
|
||||
scales=.5,.2,.1
|
||||
|
||||
#policy=sigmoid
|
||||
#gamma=.00008
|
||||
#step=100000
|
||||
#max_batches=200000
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=7
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=2048
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=2048
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[avgpool]
|
||||
|
||||
[connected]
|
||||
output=21842
|
||||
activation=leaky
|
||||
|
||||
[softmax]
|
||||
groups=1
|
||||
|
||||
[cost]
|
||||
type=sse
|
||||
|
126
cfg/go.test.cfg
126
cfg/go.test.cfg
|
@ -3,102 +3,126 @@ batch=1
|
|||
subdivisions=1
|
||||
height=19
|
||||
width=19
|
||||
channels=8
|
||||
channels=1
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
|
||||
learning_rate=0.1
|
||||
max_batches = 0
|
||||
policy=steps
|
||||
steps=50000
|
||||
scales=.1
|
||||
policy=poly
|
||||
power=4
|
||||
max_batches=400000
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=512
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=256
|
||||
size=1
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
[convolutional]
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=relu
|
||||
batch_normalize=1
|
||||
|
||||
|
||||
[convolutional]
|
||||
filters=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
activation=linear
|
||||
|
||||
[softmax]
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
classes=1000
|
||||
labels = data/inet.labels.list
|
||||
names = data/shortnames.txt
|
||||
train = /data/imagenet/imagenet1k.train.list
|
||||
valid = /data/imagenet/imagenet1k.valid.list
|
||||
top=5
|
||||
test = /Users/pjreddie/Documents/sites/selfie/paths.list
|
||||
train = /data/imagenet/imagenet1k.train.list
|
||||
valid = /data/imagenet/imagenet1k.valid.list
|
||||
backup = /home/pjreddie/backup/
|
||||
labels = data/imagenet.labels.list
|
||||
names = data/imagenet.shortnames.list
|
||||
top=5
|
||||
|
||||
|
|
16
cfg/yolo.cfg
16
cfg/yolo.cfg
|
@ -1,11 +1,14 @@
|
|||
[net]
|
||||
batch=64
|
||||
subdivisions=2
|
||||
batch=1
|
||||
subdivisions=1
|
||||
height=448
|
||||
width=448
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
saturation=1.5
|
||||
exposure=1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.0005
|
||||
policy=steps
|
||||
|
@ -13,15 +16,6 @@ steps=200,400,600,20000,30000
|
|||
scales=2.5,2,2,.1,.1
|
||||
max_batches = 40000
|
||||
|
||||
[crop]
|
||||
crop_width=448
|
||||
crop_height=448
|
||||
flip=0
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
noadjust=1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
|
|
|
@ -0,0 +1,257 @@
|
|||
[net]
|
||||
batch=64
|
||||
subdivisions=4
|
||||
height=448
|
||||
width=448
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
saturation=1.5
|
||||
exposure=1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.0005
|
||||
policy=steps
|
||||
steps=200,400,600,20000,30000
|
||||
scales=2.5,2,2,.1,.1
|
||||
max_batches = 40000
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=7
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=192
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[local]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[dropout]
|
||||
probability=.5
|
||||
|
||||
[connected]
|
||||
output= 1715
|
||||
activation=linear
|
||||
|
||||
[detection]
|
||||
classes=20
|
||||
coords=4
|
||||
rescore=1
|
||||
side=7
|
||||
num=3
|
||||
softmax=0
|
||||
sqrt=1
|
||||
jitter=.2
|
||||
|
||||
object_scale=1
|
||||
noobject_scale=.5
|
||||
class_scale=1
|
||||
coord_scale=5
|
||||
|
BIN
data/dog.jpg
BIN
data/dog.jpg
Binary file not shown.
Before Width: | Height: | Size: 160 KiB After Width: | Height: | Size: 160 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1000
data/shortnames.txt
1000
data/shortnames.txt
File diff suppressed because it is too large
Load Diff
|
@ -40,6 +40,7 @@ void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float
|
|||
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
|
||||
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void supp_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
void mask_ongpu(int N, float * X, float mask_num, float * mask);
|
||||
void const_ongpu(int N, float ALPHA, float *X, int INCX);
|
||||
void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
|
||||
|
|
|
@ -368,6 +368,14 @@ __global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX)
|
|||
if(i < N) X[i*INCX] = min(ALPHA, max(-ALPHA, X[i*INCX]));
|
||||
}
|
||||
|
||||
__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(i < N) {
|
||||
if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
|
@ -552,6 +560,12 @@ extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
|
|||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
{
|
||||
supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
|
||||
{
|
||||
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
|
||||
|
@ -633,6 +647,7 @@ extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *er
|
|||
}
|
||||
|
||||
|
||||
|
||||
__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
|
|
222
src/classifier.c
222
src/classifier.c
|
@ -39,6 +39,18 @@ list *read_data_cfg(char *filename)
|
|||
return options;
|
||||
}
|
||||
|
||||
float *get_regression_values(char **labels, int n)
|
||||
{
|
||||
float *v = calloc(n, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
char *p = strchr(labels[i], ' ');
|
||||
*p = 0;
|
||||
v[i] = atof(p+1);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
||||
{
|
||||
int nthreads = 8;
|
||||
|
@ -85,6 +97,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
args.size = net.w;
|
||||
|
||||
args.paths = paths;
|
||||
|
@ -116,6 +129,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
#ifdef OPENCV
|
||||
if(0){
|
||||
int u;
|
||||
for(u = 0; u < imgs; ++u){
|
||||
|
@ -124,6 +138,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
|
|||
cvWaitKey(0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
float loss = train_network(net, train);
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
|
@ -440,7 +455,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
char **labels = get_labels(label_list);
|
||||
list *plist = get_paths(valid_list);
|
||||
int scales[] = {192, 224, 288, 320, 352};
|
||||
int scales[] = {224, 288, 320, 352, 384};
|
||||
int nscales = sizeof(scales)/sizeof(scales[0]);
|
||||
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
|
@ -484,6 +499,88 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
|
|||
}
|
||||
}
|
||||
|
||||
void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
char *name_list = option_find_str(options, "names", 0);
|
||||
if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
|
||||
int top = option_find_int(options, "top", 1);
|
||||
|
||||
int i = 0;
|
||||
char **names = get_labels(name_list);
|
||||
clock_t time;
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image orig = load_image_color(input, 0, 0);
|
||||
image r = resize_min(orig, 256);
|
||||
image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224);
|
||||
float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742};
|
||||
float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583};
|
||||
float var[3];
|
||||
var[0] = std[0]*std[0];
|
||||
var[1] = std[1]*std[1];
|
||||
var[2] = std[2]*std[2];
|
||||
|
||||
normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h);
|
||||
|
||||
float *X = im.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
|
||||
layer l = net.layers[layer_num];
|
||||
for(i = 0; i < l.c; ++i){
|
||||
if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
|
||||
}
|
||||
#ifdef GPU
|
||||
cuda_pull_array(l.output_gpu, l.output, l.outputs);
|
||||
#endif
|
||||
for(i = 0; i < l.outputs; ++i){
|
||||
printf("%f\n", l.output[i]);
|
||||
}
|
||||
/*
|
||||
|
||||
printf("\n\nWeights\n");
|
||||
for(i = 0; i < l.n*l.size*l.size*l.c; ++i){
|
||||
printf("%f\n", l.filters[i]);
|
||||
}
|
||||
|
||||
printf("\n\nBiases\n");
|
||||
for(i = 0; i < l.n; ++i){
|
||||
printf("%f\n", l.biases[i]);
|
||||
}
|
||||
*/
|
||||
|
||||
top_predictions(net, top, indexes);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
for(i = 0; i < top; ++i){
|
||||
int index = indexes[i];
|
||||
printf("%s: %f\n", names[index], predictions[index]);
|
||||
}
|
||||
free_image(im);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
|
@ -649,6 +746,127 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_
|
|||
}
|
||||
|
||||
|
||||
void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
float threat = 0;
|
||||
float roll = .2;
|
||||
|
||||
printf("Classifier Demo\n");
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
srand(2222222);
|
||||
CvCapture * cap;
|
||||
|
||||
if(filename){
|
||||
cap = cvCaptureFromFile(filename);
|
||||
}else{
|
||||
cap = cvCaptureFromCAM(cam_index);
|
||||
}
|
||||
|
||||
int top = option_find_int(options, "top", 1);
|
||||
|
||||
char *name_list = option_find_str(options, "names", 0);
|
||||
char **names = get_labels(name_list);
|
||||
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
|
||||
//cvResizeWindow("Threat", 512, 512);
|
||||
float fps = 0;
|
||||
int i;
|
||||
|
||||
int count = 0;
|
||||
|
||||
while(1){
|
||||
++count;
|
||||
struct timeval tval_before, tval_after, tval_result;
|
||||
gettimeofday(&tval_before, NULL);
|
||||
|
||||
image in = get_image_from_stream(cap);
|
||||
if(!in.data) break;
|
||||
image in_s = resize_image(in, net.w, net.h);
|
||||
|
||||
image out = in;
|
||||
int x1 = out.w / 20;
|
||||
int y1 = out.h / 20;
|
||||
int x2 = 2*x1;
|
||||
int y2 = out.h - out.h/20;
|
||||
|
||||
int border = .01*out.h;
|
||||
int h = y2 - y1 - 2*border;
|
||||
int w = x2 - x1 - 2*border;
|
||||
|
||||
float *predictions = network_predict(net, in_s.data);
|
||||
float curr_threat = predictions[0] * 0 + predictions[1] * .6 + predictions[2];
|
||||
threat = roll * curr_threat + (1-roll) * threat;
|
||||
|
||||
draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
|
||||
if(threat > .97) {
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .02*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .02*h + 3*border, 3*border, 1,0,0);
|
||||
}
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .02*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .02*h + 3*border, .5*border, 0,0,0);
|
||||
draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0);
|
||||
if(threat > .57) {
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .42*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .42*h + 3*border, 3*border, 1,1,0);
|
||||
}
|
||||
draw_box_width(out, x2 + .5 * w + border,
|
||||
y1 + .42*h - 2*border,
|
||||
x2 + .5 * w + 6*border,
|
||||
y1 + .42*h + 3*border, .5*border, 0,0,0);
|
||||
|
||||
draw_box_width(out, x1, y1, x2, y2, border, 0,0,0);
|
||||
for(i = 0; i < threat * h ; ++i){
|
||||
float ratio = (float) i / h;
|
||||
float r = (ratio < .5) ? (2*(ratio)) : 1;
|
||||
float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5);
|
||||
draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0);
|
||||
}
|
||||
top_predictions(net, top, indexes);
|
||||
char buff[256];
|
||||
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
|
||||
save_image(out, buff);
|
||||
|
||||
printf("\033[2J");
|
||||
printf("\033[1;1H");
|
||||
printf("\nFPS:%.0f\n",fps);
|
||||
|
||||
for(i = 0; i < top; ++i){
|
||||
int index = indexes[i];
|
||||
printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
|
||||
}
|
||||
|
||||
if(0){
|
||||
show_image(out, "Threat");
|
||||
cvWaitKey(10);
|
||||
}
|
||||
free_image(in_s);
|
||||
free_image(in);
|
||||
|
||||
gettimeofday(&tval_after, NULL);
|
||||
timersub(&tval_after, &tval_before, &tval_result);
|
||||
float curr = 1000000.f/((long int)tval_result.tv_usec);
|
||||
fps = .9*fps + .1*curr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
|
@ -732,8 +950,10 @@ void run_classifier(int argc, char **argv)
|
|||
char *layer_s = (argc > 7) ? argv[7]: 0;
|
||||
int layer = layer_s ? atoi(layer_s) : -1;
|
||||
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
|
||||
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
|
||||
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);
|
||||
|
|
11
src/coco.c
11
src/coco.c
|
@ -25,6 +25,7 @@ void train_coco(char *cfgfile, char *weightfile)
|
|||
//char *train_images = "/home/pjreddie/data/voc/test/train.txt";
|
||||
//char *train_images = "/home/pjreddie/data/coco/train.txt";
|
||||
char *train_images = "data/coco.trainval.txt";
|
||||
//char *train_images = "data/bags.train.list";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
data_seed = time(0);
|
||||
|
@ -63,6 +64,11 @@ void train_coco(char *cfgfile, char *weightfile)
|
|||
args.d = &buffer;
|
||||
args.type = REGION_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
|
@ -94,6 +100,11 @@ void train_coco(char *cfgfile, char *weightfile)
|
|||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(i%100==0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
free_data(train);
|
||||
}
|
||||
char buff[256];
|
||||
|
|
10
src/col2im.c
10
src/col2im.c
|
@ -16,13 +16,9 @@ void col2im_cpu(float* data_col,
|
|||
int ksize, int stride, int pad, float* data_im)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
if (pad){
|
||||
height_col = 1 + (height-1) / stride;
|
||||
width_col = 1 + (width-1) / stride;
|
||||
pad = ksize/2;
|
||||
}
|
||||
int height_col = (height + 2*pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2*pad - ksize) / stride + 1;
|
||||
|
||||
int channels_col = channels * ksize * ksize;
|
||||
for (c = 0; c < channels_col; ++c) {
|
||||
int w_offset = c % ksize;
|
||||
|
|
|
@ -46,7 +46,6 @@ void col2im_ongpu(float *data_col,
|
|||
int ksize, int stride, int pad, float *data_im){
|
||||
// We are going to launch channels * height_col * width_col kernels, each
|
||||
// kernel responsible for copying a single-channel grid.
|
||||
pad = pad ? ksize/2 : 0;
|
||||
int height_col = (height + 2 * pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2 * pad - ksize) / stride + 1;
|
||||
int num_kernels = channels * height * width;
|
||||
|
|
|
@ -17,7 +17,7 @@ __global__ void binarize_kernel(float *x, int n, float *binary)
|
|||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (i >= n) return;
|
||||
binary[i] = (x[i] > 0) ? 1 : -1;
|
||||
binary[i] = (x[i] >= 0) ? 1 : -1;
|
||||
}
|
||||
|
||||
void binarize_gpu(float *x, int n, float *binary)
|
||||
|
@ -60,6 +60,7 @@ __global__ void binarize_filters_kernel(float *filters, int n, int size, float *
|
|||
mean = mean / size;
|
||||
for(i = 0; i < size; ++i){
|
||||
binary[f*size + i] = (filters[f*size + i] > 0) ? mean : -mean;
|
||||
//binary[f*size + i] = filters[f*size + i];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -70,18 +70,12 @@ void binarize_input(float *input, int n, int size, float *binary)
|
|||
|
||||
int convolutional_out_height(convolutional_layer l)
|
||||
{
|
||||
int h = l.h;
|
||||
if (!l.pad) h -= l.size;
|
||||
else h -= 1;
|
||||
return h/l.stride + 1;
|
||||
return (l.h + 2*l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
int convolutional_out_width(convolutional_layer l)
|
||||
{
|
||||
int w = l.w;
|
||||
if (!l.pad) w -= l.size;
|
||||
else w -= 1;
|
||||
return w/l.stride + 1;
|
||||
return (l.w + 2*l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
image get_convolutional_image(convolutional_layer l)
|
||||
|
@ -148,8 +142,7 @@ void cudnn_convolutional_setup(layer *l)
|
|||
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
cudnnSetFilter4dDescriptor(l->filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
|
||||
int padding = l->pad ? l->size/2 : 0;
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, padding, padding, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->filterDesc,
|
||||
|
@ -178,7 +171,7 @@ void cudnn_convolutional_setup(layer *l)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
|
||||
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor)
|
||||
{
|
||||
int i;
|
||||
convolutional_layer l = {0};
|
||||
|
@ -193,7 +186,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
|
|||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
l.pad = pad;
|
||||
l.pad = padding;
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
l.filters = calloc(c*n*size*size, sizeof(float));
|
||||
|
|
|
@ -98,6 +98,15 @@ void push_cost_layer(cost_layer l)
|
|||
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
|
||||
}
|
||||
|
||||
int float_abs_compare (const void * a, const void * b)
|
||||
{
|
||||
float fa = *(const float*) a;
|
||||
if(fa < 0) fa = -fa;
|
||||
float fb = *(const float*) b;
|
||||
if(fb < 0) fb = -fb;
|
||||
return (fa > fb) - (fa < fb);
|
||||
}
|
||||
|
||||
void forward_cost_layer_gpu(cost_layer l, network_state state)
|
||||
{
|
||||
if (!state.truth) return;
|
||||
|
@ -111,6 +120,16 @@ void forward_cost_layer_gpu(cost_layer l, network_state state)
|
|||
l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
|
||||
}
|
||||
|
||||
if(l.ratio){
|
||||
cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
|
||||
qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
|
||||
int n = (1-l.ratio) * l.batch*l.inputs;
|
||||
float thresh = l.delta[n];
|
||||
thresh = 0;
|
||||
printf("%f\n", thresh);
|
||||
supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
|
||||
}
|
||||
|
||||
cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
|
||||
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
#endif
|
||||
|
||||
extern void run_voxel(int argc, char **argv);
|
||||
extern void run_imagenet(int argc, char **argv);
|
||||
extern void run_yolo(int argc, char **argv);
|
||||
extern void run_detector(int argc, char **argv);
|
||||
extern void run_coco(int argc, char **argv);
|
||||
|
@ -327,9 +326,7 @@ int main(int argc, char **argv)
|
|||
}
|
||||
#endif
|
||||
|
||||
if(0==strcmp(argv[1], "imagenet")){
|
||||
run_imagenet(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "average")){
|
||||
if (0 == strcmp(argv[1], "average")){
|
||||
average(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "yolo")){
|
||||
run_yolo(argc, argv);
|
||||
|
|
37
src/data.c
37
src/data.c
|
@ -100,7 +100,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
|
|||
return X;
|
||||
}
|
||||
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
int i;
|
||||
matrix X;
|
||||
|
@ -113,10 +113,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
|
|||
image crop = random_augment_image(im, angle, min, max, size);
|
||||
int flip = rand_r(&data_seed)%2;
|
||||
if (flip) flip_image(crop);
|
||||
float exp = rand_uniform(1./exposure, exposure);
|
||||
float sat = rand_uniform(1./saturation, saturation);
|
||||
exposure_image(crop, exp);
|
||||
exposure_image(crop, sat);
|
||||
random_distort_image(crop, hue, saturation, exposure);
|
||||
|
||||
/*
|
||||
show_image(im, "orig");
|
||||
|
@ -241,6 +238,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
|
|||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".png", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
int count = 0;
|
||||
|
@ -287,6 +285,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
|
|||
labelpath = find_replace(labelpath, "JPEGImages", "labels");
|
||||
|
||||
labelpath = find_replace(labelpath, ".jpg", ".txt");
|
||||
labelpath = find_replace(labelpath, ".png", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPG", ".txt");
|
||||
labelpath = find_replace(labelpath, ".JPEG", ".txt");
|
||||
int count = 0;
|
||||
|
@ -443,7 +442,7 @@ void free_data(data d)
|
|||
}
|
||||
}
|
||||
|
||||
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter)
|
||||
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure)
|
||||
{
|
||||
char **random_paths = get_random_paths(paths, n, m);
|
||||
int i;
|
||||
|
@ -485,6 +484,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
|
|||
|
||||
image sized = resize_image(cropped, w, h);
|
||||
if(flip) flip_image(sized);
|
||||
random_distort_image(sized, hue, saturation, exposure);
|
||||
d.X.vals[i] = sized.data;
|
||||
|
||||
fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy);
|
||||
|
@ -611,7 +611,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
|
|||
return d;
|
||||
}
|
||||
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter)
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
|
||||
{
|
||||
char **random_paths = get_random_paths(paths, n, m);
|
||||
int i;
|
||||
|
@ -651,6 +651,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
|
|||
|
||||
image sized = resize_image(cropped, w, h);
|
||||
if(flip) flip_image(sized);
|
||||
random_distort_image(sized, hue, saturation, exposure);
|
||||
d.X.vals[i] = sized.data;
|
||||
|
||||
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
|
||||
|
@ -679,17 +680,17 @@ void *load_thread(void *ptr)
|
|||
if (a.type == OLD_CLASSIFICATION_DATA){
|
||||
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||
} else if (a.type == CLASSIFICATION_DATA){
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == SUPER_DATA){
|
||||
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
||||
} else if (a.type == STUDY_DATA){
|
||||
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
|
||||
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == WRITING_DATA){
|
||||
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
|
||||
} else if (a.type == REGION_DATA){
|
||||
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
|
||||
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == DETECTION_DATA){
|
||||
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
|
||||
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
|
||||
} else if (a.type == SWAG_DATA){
|
||||
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
|
||||
} else if (a.type == COMPARE_DATA){
|
||||
|
@ -698,7 +699,7 @@ void *load_thread(void *ptr)
|
|||
*(a.im) = load_image_color(a.path, 0, 0);
|
||||
*(a.resized) = resize_image(*(a.im), a.w, a.h);
|
||||
} else if (a.type == TAG_DATA){
|
||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
|
||||
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
|
||||
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||
}
|
||||
free(ptr);
|
||||
|
@ -740,13 +741,13 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
|
|||
return d;
|
||||
}
|
||||
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
data d = {0};
|
||||
d.indexes = calloc(n, sizeof(int));
|
||||
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
||||
d.y = load_labels_paths(paths, n, labels, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
|
@ -782,25 +783,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
|||
return d;
|
||||
}
|
||||
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d = {0};
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
||||
d.y = load_labels_paths(paths, n, labels, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
}
|
||||
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
|
||||
{
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d = {0};
|
||||
d.w = size;
|
||||
d.h = size;
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
|
||||
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
|
||||
d.y = load_tags_paths(paths, n, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
|
|
10
src/data.h
10
src/data.h
|
@ -54,6 +54,7 @@ typedef struct load_args{
|
|||
float angle;
|
||||
float saturation;
|
||||
float exposure;
|
||||
float hue;
|
||||
data *d;
|
||||
image *im;
|
||||
image *resized;
|
||||
|
@ -74,11 +75,12 @@ void print_letters(float *pred, int n);
|
|||
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
||||
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
||||
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
|
||||
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
|
||||
data load_go(char *filename);
|
||||
|
||||
box_label *read_boxes(char *filename, int *n);
|
||||
|
|
24
src/demo.c
24
src/demo.c
|
@ -8,7 +8,7 @@
|
|||
#include "demo.h"
|
||||
#include <sys/time.h>
|
||||
|
||||
#define FRAMES 1
|
||||
#define FRAMES 3
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
|
@ -48,7 +48,7 @@ void *fetch_in_thread(void *ptr)
|
|||
|
||||
void *detect_in_thread(void *ptr)
|
||||
{
|
||||
float nms = .4;
|
||||
float nms = .1;
|
||||
|
||||
detection_layer l = net.layers[net.n-1];
|
||||
float *X = det_s.data;
|
||||
|
@ -153,13 +153,19 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
|||
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
|
||||
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
|
||||
|
||||
show_image(disp, "Demo");
|
||||
int c = cvWaitKey(1);
|
||||
if (c == 10){
|
||||
if(frame_skip == 0) frame_skip = 60;
|
||||
else if(frame_skip == 4) frame_skip = 0;
|
||||
else if(frame_skip == 60) frame_skip = 4;
|
||||
else frame_skip = 0;
|
||||
if(1){
|
||||
show_image(disp, "Demo");
|
||||
int c = cvWaitKey(1);
|
||||
if (c == 10){
|
||||
if(frame_skip == 0) frame_skip = 60;
|
||||
else if(frame_skip == 4) frame_skip = 0;
|
||||
else if(frame_skip == 60) frame_skip = 4;
|
||||
else frame_skip = 0;
|
||||
}
|
||||
}else{
|
||||
char buff[256];
|
||||
sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
|
||||
save_image(disp, buff);
|
||||
}
|
||||
|
||||
pthread_join(fetch_thread, 0);
|
||||
|
|
|
@ -22,6 +22,8 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
|
|||
l.coords = coords;
|
||||
l.rescore = rescore;
|
||||
l.side = side;
|
||||
l.w = side;
|
||||
l.h = side;
|
||||
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
|
||||
l.cost = calloc(1, sizeof(float));
|
||||
l.outputs = l.inputs;
|
||||
|
@ -44,6 +46,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
|||
int locations = l.side*l.side;
|
||||
int i,j;
|
||||
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
|
||||
//if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
|
||||
int b;
|
||||
if (l.softmax){
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
|
@ -204,6 +207,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
|
|||
|
||||
|
||||
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
|
||||
//if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -51,6 +51,11 @@ void train_detector(char *cfgfile, char *weightfile)
|
|||
args.d = &buffer;
|
||||
args.type = DETECTION_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
|
|
10
src/im2col.c
10
src/im2col.c
|
@ -18,13 +18,9 @@ void im2col_cpu(float* data_im,
|
|||
int ksize, int stride, int pad, float* data_col)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
if (pad){
|
||||
height_col = 1 + (height-1) / stride;
|
||||
width_col = 1 + (width-1) / stride;
|
||||
pad = ksize/2;
|
||||
}
|
||||
int height_col = (height + 2*pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2*pad - ksize) / stride + 1;
|
||||
|
||||
int channels_col = channels * ksize * ksize;
|
||||
for (c = 0; c < channels_col; ++c) {
|
||||
int w_offset = c % ksize;
|
||||
|
|
|
@ -33,8 +33,12 @@ __global__ void im2col_gpu_kernel(const int n, const float* data_im,
|
|||
for (int j = 0; j < ksize; ++j) {
|
||||
int h = h_in + i;
|
||||
int w = w_in + j;
|
||||
|
||||
*data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
|
||||
data_im_ptr[i * width + j] : 0;
|
||||
|
||||
//*data_col_ptr = data_im_ptr[ii * width + jj];
|
||||
|
||||
data_col_ptr += height_col * width_col;
|
||||
}
|
||||
}
|
||||
|
@ -46,7 +50,6 @@ void im2col_ongpu(float *im,
|
|||
int ksize, int stride, int pad, float *data_col){
|
||||
// We are going to launch channels * height_col * width_col kernels, each
|
||||
// kernel responsible for copying a single-channel grid.
|
||||
pad = pad ? ksize/2 : 0;
|
||||
int height_col = (height + 2 * pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2 * pad - ksize) / stride + 1;
|
||||
int num_kernels = channels * height_col * width_col;
|
||||
|
@ -56,93 +59,3 @@ void im2col_ongpu(float *im,
|
|||
stride, height_col,
|
||||
width_col, data_col);
|
||||
}
|
||||
/*
|
||||
__global__ void im2col_pad_kernel(float *im,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, float *data_col)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = 1 + (height-1) / stride;
|
||||
int width_col = 1 + (width-1) / stride;
|
||||
int channels_col = channels * ksize * ksize;
|
||||
|
||||
int pad = ksize/2;
|
||||
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
int col_size = height_col*width_col*channels_col;
|
||||
if (id >= col_size) return;
|
||||
|
||||
int col_index = id;
|
||||
w = id % width_col;
|
||||
id /= width_col;
|
||||
h = id % height_col;
|
||||
id /= height_col;
|
||||
c = id % channels_col;
|
||||
id /= channels_col;
|
||||
|
||||
int w_offset = c % ksize;
|
||||
int h_offset = (c / ksize) % ksize;
|
||||
int im_channel = c / ksize / ksize;
|
||||
int im_row = h_offset + h * stride - pad;
|
||||
int im_col = w_offset + w * stride - pad;
|
||||
|
||||
int im_index = im_col + width*(im_row + height*im_channel);
|
||||
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
||||
|
||||
data_col[col_index] = val;
|
||||
}
|
||||
|
||||
__global__ void im2col_nopad_kernel(float *im,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, float *data_col)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
int channels_col = channels * ksize * ksize;
|
||||
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
int col_size = height_col*width_col*channels_col;
|
||||
if (id >= col_size) return;
|
||||
|
||||
int col_index = id;
|
||||
w = id % width_col;
|
||||
id /= width_col;
|
||||
h = id % height_col;
|
||||
id /= height_col;
|
||||
c = id % channels_col;
|
||||
id /= channels_col;
|
||||
|
||||
int w_offset = c % ksize;
|
||||
int h_offset = (c / ksize) % ksize;
|
||||
int im_channel = c / ksize / ksize;
|
||||
int im_row = h_offset + h * stride;
|
||||
int im_col = w_offset + w * stride;
|
||||
|
||||
int im_index = im_col + width*(im_row + height*im_channel);
|
||||
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
||||
|
||||
data_col[col_index] = val;
|
||||
}
|
||||
|
||||
extern "C" void im2col_ongpu(float *im,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float *data_col)
|
||||
{
|
||||
|
||||
int height_col = (height - ksize) / stride + 1;
|
||||
int width_col = (width - ksize) / stride + 1;
|
||||
int channels_col = channels * ksize * ksize;
|
||||
|
||||
if (pad){
|
||||
height_col = 1 + (height-1) / stride;
|
||||
width_col = 1 + (width-1) / stride;
|
||||
}
|
||||
|
||||
size_t n = channels_col*height_col*width_col;
|
||||
|
||||
if(pad)im2col_pad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
|
||||
else im2col_nopad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
*/
|
||||
|
|
491
src/image.c
491
src/image.c
|
@ -1,6 +1,7 @@
|
|||
#include "image.h"
|
||||
#include "utils.h"
|
||||
#include "blas.h"
|
||||
#include "cuda.h"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
|
@ -247,6 +248,26 @@ void constrain_image(image im)
|
|||
}
|
||||
|
||||
void normalize_image(image p)
|
||||
{
|
||||
int i;
|
||||
float min = 9999999;
|
||||
float max = -999999;
|
||||
|
||||
for(i = 0; i < p.h*p.w*p.c; ++i){
|
||||
float v = p.data[i];
|
||||
if(v < min) min = v;
|
||||
if(v > max) max = v;
|
||||
}
|
||||
if(max - min < .000000001){
|
||||
min = 0;
|
||||
max = 1;
|
||||
}
|
||||
for(i = 0; i < p.c*p.w*p.h; ++i){
|
||||
p.data[i] = (p.data[i] - min)/(max-min);
|
||||
}
|
||||
}
|
||||
|
||||
void normalize_image2(image p)
|
||||
{
|
||||
float *min = calloc(p.c, sizeof(float));
|
||||
float *max = calloc(p.c, sizeof(float));
|
||||
|
@ -320,7 +341,6 @@ void show_image_cv(image p, const char *name)
|
|||
}
|
||||
free_image(copy);
|
||||
if(0){
|
||||
//if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
|
||||
int w = 448;
|
||||
int h = w*p.h/p.w;
|
||||
if(h > 1000){
|
||||
|
@ -334,202 +354,202 @@ void show_image_cv(image p, const char *name)
|
|||
}
|
||||
cvShowImage(buff, disp);
|
||||
cvReleaseImage(&disp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void show_image(image p, const char *name)
|
||||
{
|
||||
void show_image(image p, const char *name)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
show_image_cv(p, name);
|
||||
show_image_cv(p, name);
|
||||
#else
|
||||
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
|
||||
save_image(p, name);
|
||||
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
|
||||
save_image(p, name);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OPENCV
|
||||
image get_image_from_stream(CvCapture *cap)
|
||||
{
|
||||
IplImage* src = cvQueryFrame(cap);
|
||||
if (!src) return make_empty_image(0,0,0);
|
||||
image im = ipl_to_image(src);
|
||||
rgbgr_image(im);
|
||||
return im;
|
||||
}
|
||||
image get_image_from_stream(CvCapture *cap)
|
||||
{
|
||||
IplImage* src = cvQueryFrame(cap);
|
||||
if (!src) return make_empty_image(0,0,0);
|
||||
image im = ipl_to_image(src);
|
||||
rgbgr_image(im);
|
||||
return im;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OPENCV
|
||||
void save_image_jpg(image p, const char *name)
|
||||
{
|
||||
image copy = copy_image(p);
|
||||
rgbgr_image(copy);
|
||||
int x,y,k;
|
||||
void save_image_jpg(image p, const char *name)
|
||||
{
|
||||
image copy = copy_image(p);
|
||||
if(p.c == 3) rgbgr_image(copy);
|
||||
int x,y,k;
|
||||
|
||||
char buff[256];
|
||||
sprintf(buff, "%s.jpg", name);
|
||||
char buff[256];
|
||||
sprintf(buff, "%s.jpg", name);
|
||||
|
||||
IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
|
||||
int step = disp->widthStep;
|
||||
for(y = 0; y < p.h; ++y){
|
||||
for(x = 0; x < p.w; ++x){
|
||||
for(k= 0; k < p.c; ++k){
|
||||
disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
|
||||
}
|
||||
IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
|
||||
int step = disp->widthStep;
|
||||
for(y = 0; y < p.h; ++y){
|
||||
for(x = 0; x < p.w; ++x){
|
||||
for(k= 0; k < p.c; ++k){
|
||||
disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
|
||||
}
|
||||
}
|
||||
cvSaveImage(buff, disp,0);
|
||||
cvReleaseImage(&disp);
|
||||
free_image(copy);
|
||||
}
|
||||
cvSaveImage(buff, disp,0);
|
||||
cvReleaseImage(&disp);
|
||||
free_image(copy);
|
||||
}
|
||||
#endif
|
||||
|
||||
void save_image(image im, const char *name)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
save_image_jpg(im, name);
|
||||
#else
|
||||
char buff[256];
|
||||
//sprintf(buff, "%s (%d)", name, windows);
|
||||
sprintf(buff, "%s.png", name);
|
||||
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
|
||||
int i,k;
|
||||
for(k = 0; k < im.c; ++k){
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
|
||||
void save_image(image im, const char *name)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
save_image_jpg(im, name);
|
||||
#else
|
||||
char buff[256];
|
||||
//sprintf(buff, "%s (%d)", name, windows);
|
||||
sprintf(buff, "%s.png", name);
|
||||
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
|
||||
int i,k;
|
||||
for(k = 0; k < im.c; ++k){
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
|
||||
}
|
||||
}
|
||||
int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
|
||||
free(data);
|
||||
if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void show_image_layers(image p, char *name)
|
||||
{
|
||||
int i;
|
||||
char buff[256];
|
||||
for(i = 0; i < p.c; ++i){
|
||||
sprintf(buff, "%s - Layer %d", name, i);
|
||||
image layer = get_image_layer(p, i);
|
||||
show_image(layer, buff);
|
||||
free_image(layer);
|
||||
}
|
||||
}
|
||||
|
||||
void show_image_collapsed(image p, char *name)
|
||||
{
|
||||
image c = collapse_image_layers(p, 1);
|
||||
show_image(c, name);
|
||||
free_image(c);
|
||||
}
|
||||
|
||||
image make_empty_image(int w, int h, int c)
|
||||
{
|
||||
image out;
|
||||
out.data = 0;
|
||||
out.h = h;
|
||||
out.w = w;
|
||||
out.c = c;
|
||||
return out;
|
||||
}
|
||||
|
||||
image make_image(int w, int h, int c)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = calloc(h*w*c, sizeof(float));
|
||||
return out;
|
||||
}
|
||||
|
||||
image make_random_image(int w, int h, int c)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = calloc(h*w*c, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < w*h*c; ++i){
|
||||
out.data[i] = (rand_normal() * .25) + .5;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
image float_to_image(int w, int h, int c, float *data)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = data;
|
||||
return out;
|
||||
}
|
||||
|
||||
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
|
||||
{
|
||||
int x, y, c;
|
||||
float cx = im.w/2.;
|
||||
float cy = im.h/2.;
|
||||
image rot = make_image(w, h, im.c);
|
||||
for(c = 0; c < im.c; ++c){
|
||||
for(y = 0; y < h; ++y){
|
||||
for(x = 0; x < w; ++x){
|
||||
float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
|
||||
float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
|
||||
float val = bilinear_interpolate(im, rx, ry, c);
|
||||
set_pixel(rot, x, y, c, val);
|
||||
}
|
||||
}
|
||||
int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
|
||||
free(data);
|
||||
if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
|
||||
#endif
|
||||
}
|
||||
return rot;
|
||||
}
|
||||
|
||||
|
||||
void show_image_layers(image p, char *name)
|
||||
{
|
||||
int i;
|
||||
char buff[256];
|
||||
for(i = 0; i < p.c; ++i){
|
||||
sprintf(buff, "%s - Layer %d", name, i);
|
||||
image layer = get_image_layer(p, i);
|
||||
show_image(layer, buff);
|
||||
free_image(layer);
|
||||
image rotate_image(image im, float rad)
|
||||
{
|
||||
int x, y, c;
|
||||
float cx = im.w/2.;
|
||||
float cy = im.h/2.;
|
||||
image rot = make_image(im.w, im.h, im.c);
|
||||
for(c = 0; c < im.c; ++c){
|
||||
for(y = 0; y < im.h; ++y){
|
||||
for(x = 0; x < im.w; ++x){
|
||||
float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
|
||||
float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
|
||||
float val = bilinear_interpolate(im, rx, ry, c);
|
||||
set_pixel(rot, x, y, c, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rot;
|
||||
}
|
||||
|
||||
void show_image_collapsed(image p, char *name)
|
||||
{
|
||||
image c = collapse_image_layers(p, 1);
|
||||
show_image(c, name);
|
||||
free_image(c);
|
||||
}
|
||||
void translate_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
|
||||
}
|
||||
|
||||
image make_empty_image(int w, int h, int c)
|
||||
{
|
||||
image out;
|
||||
out.data = 0;
|
||||
out.h = h;
|
||||
out.w = w;
|
||||
out.c = c;
|
||||
return out;
|
||||
}
|
||||
void scale_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
|
||||
}
|
||||
|
||||
image make_image(int w, int h, int c)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = calloc(h*w*c, sizeof(float));
|
||||
return out;
|
||||
}
|
||||
|
||||
image make_random_image(int w, int h, int c)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = calloc(h*w*c, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < w*h*c; ++i){
|
||||
out.data[i] = (rand_normal() * .25) + .5;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
image float_to_image(int w, int h, int c, float *data)
|
||||
{
|
||||
image out = make_empty_image(w,h,c);
|
||||
out.data = data;
|
||||
return out;
|
||||
}
|
||||
|
||||
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
|
||||
{
|
||||
int x, y, c;
|
||||
float cx = im.w/2.;
|
||||
float cy = im.h/2.;
|
||||
image rot = make_image(w, h, im.c);
|
||||
for(c = 0; c < im.c; ++c){
|
||||
for(y = 0; y < h; ++y){
|
||||
for(x = 0; x < w; ++x){
|
||||
float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
|
||||
float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
|
||||
float val = bilinear_interpolate(im, rx, ry, c);
|
||||
set_pixel(rot, x, y, c, val);
|
||||
image crop_image(image im, int dx, int dy, int w, int h)
|
||||
{
|
||||
image cropped = make_image(w, h, im.c);
|
||||
int i, j, k;
|
||||
for(k = 0; k < im.c; ++k){
|
||||
for(j = 0; j < h; ++j){
|
||||
for(i = 0; i < w; ++i){
|
||||
int r = j + dy;
|
||||
int c = i + dx;
|
||||
float val = 0;
|
||||
r = constrain_int(r, 0, im.h-1);
|
||||
c = constrain_int(c, 0, im.w-1);
|
||||
if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
|
||||
val = get_pixel(im, c, r, k);
|
||||
}
|
||||
set_pixel(cropped, i, j, k, val);
|
||||
}
|
||||
}
|
||||
return rot;
|
||||
}
|
||||
|
||||
image rotate_image(image im, float rad)
|
||||
{
|
||||
int x, y, c;
|
||||
float cx = im.w/2.;
|
||||
float cy = im.h/2.;
|
||||
image rot = make_image(im.w, im.h, im.c);
|
||||
for(c = 0; c < im.c; ++c){
|
||||
for(y = 0; y < im.h; ++y){
|
||||
for(x = 0; x < im.w; ++x){
|
||||
float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
|
||||
float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
|
||||
float val = bilinear_interpolate(im, rx, ry, c);
|
||||
set_pixel(rot, x, y, c, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rot;
|
||||
}
|
||||
|
||||
void translate_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
|
||||
}
|
||||
|
||||
void scale_image(image m, float s)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
|
||||
}
|
||||
|
||||
image crop_image(image im, int dx, int dy, int w, int h)
|
||||
{
|
||||
image cropped = make_image(w, h, im.c);
|
||||
int i, j, k;
|
||||
for(k = 0; k < im.c; ++k){
|
||||
for(j = 0; j < h; ++j){
|
||||
for(i = 0; i < w; ++i){
|
||||
int r = j + dy;
|
||||
int c = i + dx;
|
||||
float val = 0;
|
||||
r = constrain_int(r, 0, im.h-1);
|
||||
c = constrain_int(c, 0, im.w-1);
|
||||
if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
|
||||
val = get_pixel(im, c, r, k);
|
||||
}
|
||||
set_pixel(cropped, i, j, k, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
return cropped;
|
||||
}
|
||||
return cropped;
|
||||
}
|
||||
|
||||
int best_3d_shift_r(image a, image b, int min, int max)
|
||||
{
|
||||
|
@ -666,7 +686,7 @@ void rgb_to_hsv(image im)
|
|||
v = max;
|
||||
if(max == 0){
|
||||
s = 0;
|
||||
h = -1;
|
||||
h = 0;
|
||||
}else{
|
||||
s = delta/max;
|
||||
if(r == max){
|
||||
|
@ -677,6 +697,7 @@ void rgb_to_hsv(image im)
|
|||
h = 4 + (r - g) / delta;
|
||||
}
|
||||
if (h < 0) h += 6;
|
||||
h = h/6.;
|
||||
}
|
||||
set_pixel(im, i, j, 0, h);
|
||||
set_pixel(im, i, j, 1, s);
|
||||
|
@ -694,7 +715,7 @@ void hsv_to_rgb(image im)
|
|||
float f, p, q, t;
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
h = get_pixel(im, i , j, 0);
|
||||
h = 6 * get_pixel(im, i , j, 0);
|
||||
s = get_pixel(im, i , j, 1);
|
||||
v = get_pixel(im, i , j, 2);
|
||||
if (s == 0) {
|
||||
|
@ -781,6 +802,18 @@ void scale_image_channel(image im, int c, float v)
|
|||
}
|
||||
}
|
||||
|
||||
void translate_image_channel(image im, int c, float v)
|
||||
{
|
||||
int i, j;
|
||||
for(j = 0; j < im.h; ++j){
|
||||
for(i = 0; i < im.w; ++i){
|
||||
float pix = get_pixel(im, i, j, c);
|
||||
pix = pix+v;
|
||||
set_pixel(im, i, j, c, pix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image binarize_image(image im)
|
||||
{
|
||||
image c = copy_image(im);
|
||||
|
@ -800,6 +833,19 @@ void saturate_image(image im, float sat)
|
|||
constrain_image(im);
|
||||
}
|
||||
|
||||
void hue_image(image im, float hue)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
int i;
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
im.data[i] = im.data[i] + hue;
|
||||
if (im.data[i] > 1) im.data[i] -= 1;
|
||||
if (im.data[i] < 0) im.data[i] += 1;
|
||||
}
|
||||
hsv_to_rgb(im);
|
||||
constrain_image(im);
|
||||
}
|
||||
|
||||
void exposure_image(image im, float sat)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
|
@ -808,6 +854,29 @@ void exposure_image(image im, float sat)
|
|||
constrain_image(im);
|
||||
}
|
||||
|
||||
void distort_image(image im, float hue, float sat, float val)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
scale_image_channel(im, 1, sat);
|
||||
scale_image_channel(im, 2, val);
|
||||
int i;
|
||||
for(i = 0; i < im.w*im.h; ++i){
|
||||
im.data[i] = im.data[i] + hue;
|
||||
if (im.data[i] > 1) im.data[i] -= 1;
|
||||
if (im.data[i] < 0) im.data[i] += 1;
|
||||
}
|
||||
hsv_to_rgb(im);
|
||||
constrain_image(im);
|
||||
}
|
||||
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure)
|
||||
{
|
||||
float dhue = rand_uniform(-hue, hue);
|
||||
float dsat = rand_scale(saturation);
|
||||
float dexp = rand_scale(exposure);
|
||||
distort_image(im, dhue, dsat, dexp);
|
||||
}
|
||||
|
||||
void saturate_exposure_image(image im, float sat, float exposure)
|
||||
{
|
||||
rgb_to_hsv(im);
|
||||
|
@ -876,7 +945,6 @@ image resize_image(image im, int w, int h)
|
|||
return resized;
|
||||
}
|
||||
|
||||
#include "cuda.h"
|
||||
|
||||
void test_resize(char *filename)
|
||||
{
|
||||
|
@ -885,59 +953,40 @@ void test_resize(char *filename)
|
|||
printf("L2 Norm: %f\n", mag);
|
||||
image gray = grayscale_image(im);
|
||||
|
||||
image sat2 = copy_image(im);
|
||||
saturate_image(sat2, 2);
|
||||
image c1 = copy_image(im);
|
||||
image c2 = copy_image(im);
|
||||
image c3 = copy_image(im);
|
||||
image c4 = copy_image(im);
|
||||
distort_image(c1, .1, 1.5, 1.5);
|
||||
distort_image(c2, -.1, .66666, .66666);
|
||||
distort_image(c3, .1, 1.5, .66666);
|
||||
distort_image(c4, .1, .66666, 1.5);
|
||||
|
||||
image sat5 = copy_image(im);
|
||||
saturate_image(sat5, .5);
|
||||
|
||||
image exp2 = copy_image(im);
|
||||
exposure_image(exp2, 2);
|
||||
|
||||
image exp5 = copy_image(im);
|
||||
exposure_image(exp5, .5);
|
||||
|
||||
image bin = binarize_image(im);
|
||||
|
||||
/*
|
||||
#ifdef GPU
|
||||
image r = resize_image(im, im.w, im.h);
|
||||
image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
|
||||
image black2 = make_image(im.w, im.h, 3);
|
||||
|
||||
float *r_gpu = cuda_make_array(r.data, r.w*r.h*r.c);
|
||||
float *black_gpu = cuda_make_array(black.data, black.w*black.h*black.c);
|
||||
float *black2_gpu = cuda_make_array(black2.data, black2.w*black2.h*black2.c);
|
||||
shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu);
|
||||
//flip_image(r);
|
||||
//shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data);
|
||||
|
||||
shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu);
|
||||
cuda_pull_array(black_gpu, black.data, black.w*black.h*black.c);
|
||||
cuda_pull_array(black2_gpu, black2.data, black2.w*black2.h*black2.c);
|
||||
show_image_layers(black, "Black");
|
||||
show_image(black2, "Recreate");
|
||||
#endif
|
||||
*/
|
||||
image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
|
||||
image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
|
||||
image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
|
||||
show_image(rot, "Rotated");
|
||||
show_image(rot2, "base");
|
||||
|
||||
show_image(rot3, "Rotated2");
|
||||
|
||||
/*
|
||||
show_image(im, "Original");
|
||||
show_image(bin, "Binary");
|
||||
show_image(gray, "Gray");
|
||||
show_image(sat2, "Saturation-2");
|
||||
show_image(sat5, "Saturation-.5");
|
||||
show_image(exp2, "Exposure-2");
|
||||
show_image(exp5, "Exposure-.5");
|
||||
*/
|
||||
show_image(c1, "C1");
|
||||
show_image(c2, "C2");
|
||||
show_image(c3, "C3");
|
||||
show_image(c4, "C4");
|
||||
#ifdef OPENCV
|
||||
cvWaitKey(0);
|
||||
while(1){
|
||||
float exposure = 1.15;
|
||||
float saturation = 1.15;
|
||||
float hue = .05;
|
||||
|
||||
image c = copy_image(im);
|
||||
|
||||
float dexp = rand_scale(exposure);
|
||||
float dsat = rand_scale(saturation);
|
||||
float dhue = rand_uniform(-hue, hue);
|
||||
|
||||
distort_image(c, dhue, dsat, dexp);
|
||||
show_image(c, "rand");
|
||||
printf("%f %f %f\n", dhue, dsat, dexp);
|
||||
free_image(c);
|
||||
cvWaitKey(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1180,10 +1229,8 @@ void show_images(image *ims, int n, char *window)
|
|||
image sized = resize_image(m, w, h);
|
||||
*/
|
||||
normalize_image(m);
|
||||
image sized = resize_image(m, m.w, m.h);
|
||||
save_image(sized, window);
|
||||
show_image(sized, window);
|
||||
free_image(sized);
|
||||
save_image(m, window);
|
||||
show_image(m, window);
|
||||
free_image(m);
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ void scale_image(image m, float s);
|
|||
image crop_image(image im, int dx, int dy, int w, int h);
|
||||
image random_crop_image(image im, int w, int h);
|
||||
image random_augment_image(image im, float angle, int low, int high, int size);
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure);
|
||||
image resize_image(image im, int w, int h);
|
||||
image resize_min(image im, int min);
|
||||
void translate_image(image m, float s);
|
||||
|
@ -41,6 +42,7 @@ void rotate_image_cw(image im, int times);
|
|||
void embed_image(image source, image dest, int dx, int dy);
|
||||
void saturate_image(image im, float sat);
|
||||
void exposure_image(image im, float sat);
|
||||
void distort_image(image im, float hue, float sat, float val);
|
||||
void saturate_exposure_image(image im, float sat, float exposure);
|
||||
void hsv_to_rgb(image im);
|
||||
void rgbgr_image(image im);
|
||||
|
|
237
src/imagenet.c
237
src/imagenet.c
|
@ -1,237 +0,0 @@
|
|||
#include "network.h"
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
|
||||
#ifdef OPENCV
|
||||
#include "opencv2/highgui/highgui_c.h"
|
||||
#endif
|
||||
|
||||
void train_imagenet(char *cfgfile, char *weightfile)
|
||||
{
|
||||
data_seed = time(0);
|
||||
srand(time(0));
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
printf("%s\n", base);
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = 1024;
|
||||
char **labels = get_labels("data/inet.labels.list");
|
||||
list *plist = get_paths("data/inet.train.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
clock_t time;
|
||||
pthread_t load_thread;
|
||||
data train;
|
||||
data buffer;
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.classes = 1000;
|
||||
args.n = imgs;
|
||||
args.m = N;
|
||||
args.labels = labels;
|
||||
args.d = &buffer;
|
||||
args.type = OLD_CLASSIFICATION_DATA;
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
int epoch = (*net.seen)/N;
|
||||
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
float loss = train_network(net, train);
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||
free_data(train);
|
||||
if(*net.seen/N > epoch){
|
||||
epoch = *net.seen/N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(*net.seen%1000 == 0){
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.weights", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
free_data(buffer);
|
||||
free_network(net);
|
||||
free_ptrs((void**)labels, 1000);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void validate_imagenet(char *filename, char *weightfile)
|
||||
{
|
||||
int i = 0;
|
||||
network net = parse_network_cfg(filename);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
srand(time(0));
|
||||
|
||||
char **labels = get_labels("data/inet.labels.list");
|
||||
//list *plist = get_paths("data/inet.suppress.list");
|
||||
list *plist = get_paths("data/inet.val.list");
|
||||
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
int m = plist->size;
|
||||
free_list(plist);
|
||||
|
||||
clock_t time;
|
||||
float avg_acc = 0;
|
||||
float avg_top5 = 0;
|
||||
int splits = 50;
|
||||
int num = (i+1)*m/splits - i*m/splits;
|
||||
|
||||
data val, buffer;
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net.w;
|
||||
args.h = net.h;
|
||||
args.paths = paths;
|
||||
args.classes = 1000;
|
||||
args.n = num;
|
||||
args.m = 0;
|
||||
args.labels = labels;
|
||||
args.d = &buffer;
|
||||
args.type = OLD_CLASSIFICATION_DATA;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
for(i = 1; i <= splits; ++i){
|
||||
time=clock();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
val = buffer;
|
||||
|
||||
num = (i+1)*m/splits - i*m/splits;
|
||||
char **part = paths+(i*m/splits);
|
||||
if(i != splits){
|
||||
args.paths = part;
|
||||
load_thread = load_data_in_thread(args);
|
||||
}
|
||||
printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
|
||||
|
||||
time=clock();
|
||||
float *acc = network_accuracies(net, val, 5);
|
||||
avg_acc += acc[0];
|
||||
avg_top5 += acc[1];
|
||||
printf("%d: top1: %f, top5: %f, %lf seconds, %d images\n", i, avg_acc/i, avg_top5/i, sec(clock()-time), val.X.rows);
|
||||
free_data(val);
|
||||
}
|
||||
}
|
||||
|
||||
void test_imagenet(char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
set_batch_network(&net, 1);
|
||||
srand(2222222);
|
||||
int i = 0;
|
||||
char **names = get_labels("data/shortnames.txt");
|
||||
clock_t time;
|
||||
int indexes[10];
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 256, 256);
|
||||
float *X = im.data;
|
||||
time=clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
top_predictions(net, 10, indexes);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
for(i = 0; i < 10; ++i){
|
||||
int index = indexes[i];
|
||||
printf("%s: %f\n", names[index], predictions[index]);
|
||||
}
|
||||
free_image(im);
|
||||
if (filename) break;
|
||||
}
|
||||
}
|
||||
|
||||
void run_imagenet(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
char *filename = (argc > 5) ? argv[5]: 0;
|
||||
if(0==strcmp(argv[2], "test")) test_imagenet(cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "train")) train_imagenet(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_imagenet(cfg, weights);
|
||||
}
|
||||
|
||||
/*
|
||||
void train_imagenet_distributed(char *address)
|
||||
{
|
||||
float avg_loss = 1;
|
||||
srand(time(0));
|
||||
network net = parse_network_cfg("cfg/net.cfg");
|
||||
set_learning_network(&net, 0, 1, 0);
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = net.batch;
|
||||
int i = 0;
|
||||
char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
|
||||
list *plist = get_paths("/data/imagenet/cls.train.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
clock_t time;
|
||||
data train, buffer;
|
||||
pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer);
|
||||
while(1){
|
||||
i += 1;
|
||||
|
||||
time=clock();
|
||||
client_update(net, address);
|
||||
printf("Updated: %lf seconds\n", sec(clock()-time));
|
||||
|
||||
time=clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
normalize_data_rows(train);
|
||||
load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
|
||||
float loss = train_network(net, train);
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs);
|
||||
free_data(train);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
|
@ -72,6 +72,7 @@ struct layer{
|
|||
float saturation;
|
||||
float exposure;
|
||||
float shift;
|
||||
float ratio;
|
||||
int softmax;
|
||||
int classes;
|
||||
int coords;
|
||||
|
@ -82,6 +83,7 @@ struct layer{
|
|||
int joint;
|
||||
int noadjust;
|
||||
int reorg;
|
||||
int log;
|
||||
|
||||
float alpha;
|
||||
float beta;
|
||||
|
|
|
@ -18,7 +18,7 @@ image get_maxpool_delta(maxpool_layer l)
|
|||
return float_to_image(w,h,c,l.delta);
|
||||
}
|
||||
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride)
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
|
||||
{
|
||||
fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride);
|
||||
maxpool_layer l = {0};
|
||||
|
@ -27,8 +27,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
|||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.out_w = (w-1)/stride + 1;
|
||||
l.out_h = (h-1)/stride + 1;
|
||||
l.pad = padding;
|
||||
l.out_w = (w + 2*padding - size + 1)/stride + 1;
|
||||
l.out_h = (h + 2*padding - size + 1)/stride + 1;
|
||||
l.out_c = c;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = h*w*c;
|
||||
|
@ -48,13 +49,12 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
|
|||
|
||||
void resize_maxpool_layer(maxpool_layer *l, int w, int h)
|
||||
{
|
||||
int stride = l->stride;
|
||||
l->h = h;
|
||||
l->w = w;
|
||||
l->inputs = h*w*l->c;
|
||||
|
||||
l->out_w = (w-1)/stride + 1;
|
||||
l->out_h = (h-1)/stride + 1;
|
||||
l->out_w = (w + 2*l->pad - l->size + 1)/l->stride + 1;
|
||||
l->out_h = (h + 2*l->pad - l->size + 1)/l->stride + 1;
|
||||
l->outputs = l->out_w * l->out_h * l->c;
|
||||
int output_size = l->outputs * l->batch;
|
||||
|
||||
|
@ -75,11 +75,11 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
|
|||
void forward_maxpool_layer(const maxpool_layer l, network_state state)
|
||||
{
|
||||
int b,i,j,k,m,n;
|
||||
int w_offset = (-l.size-1)/2 + 1;
|
||||
int h_offset = (-l.size-1)/2 + 1;
|
||||
int w_offset = -l.pad;
|
||||
int h_offset = -l.pad;
|
||||
|
||||
int h = (l.h-1)/l.stride + 1;
|
||||
int w = (l.w-1)/l.stride + 1;
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.c;
|
||||
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
|
@ -112,8 +112,8 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
|
|||
void backward_maxpool_layer(const maxpool_layer l, network_state state)
|
||||
{
|
||||
int i;
|
||||
int h = (l.h-1)/l.stride + 1;
|
||||
int w = (l.w-1)/l.stride + 1;
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.c;
|
||||
for(i = 0; i < h*w*c*l.batch; ++i){
|
||||
int index = l.indexes[i];
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
typedef layer maxpool_layer;
|
||||
|
||||
image get_maxpool_image(maxpool_layer l);
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
|
||||
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
|
||||
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
|
||||
void forward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||
void backward_maxpool_layer(const maxpool_layer l, network_state state);
|
||||
|
|
|
@ -7,10 +7,10 @@ extern "C" {
|
|||
#include "cuda.h"
|
||||
}
|
||||
|
||||
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *input, float *output, int *indexes)
|
||||
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
|
||||
{
|
||||
int h = (in_h-1)/stride + 1;
|
||||
int w = (in_w-1)/stride + 1;
|
||||
int h = (in_h + 2*pad - size + 1)/stride + 1;
|
||||
int w = (in_w + 2*pad - size + 1)/stride + 1;
|
||||
int c = in_c;
|
||||
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
|
@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
|
|||
id /= c;
|
||||
int b = id;
|
||||
|
||||
int w_offset = (-size-1)/2 + 1;
|
||||
int h_offset = (-size-1)/2 + 1;
|
||||
int w_offset = -pad;
|
||||
int h_offset = -pad;
|
||||
|
||||
int out_index = j + w*(i + h*(k + c*b));
|
||||
float max = -INFINITY;
|
||||
|
@ -47,10 +47,10 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
|
|||
indexes[out_index] = max_i;
|
||||
}
|
||||
|
||||
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *delta, float *prev_delta, int *indexes)
|
||||
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
|
||||
{
|
||||
int h = (in_h-1)/stride + 1;
|
||||
int w = (in_w-1)/stride + 1;
|
||||
int h = (in_h + 2*pad - size + 1)/stride + 1;
|
||||
int w = (in_w + 2*pad - size + 1)/stride + 1;
|
||||
int c = in_c;
|
||||
int area = (size-1)/stride;
|
||||
|
||||
|
@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
|||
id /= in_c;
|
||||
int b = id;
|
||||
|
||||
int w_offset = (-size-1)/2 + 1;
|
||||
int h_offset = (-size-1)/2 + 1;
|
||||
int w_offset = -pad;
|
||||
int h_offset = -pad;
|
||||
|
||||
float d = 0;
|
||||
int l, m;
|
||||
|
@ -86,13 +86,13 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
|
|||
|
||||
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
|
||||
{
|
||||
int h = (layer.h-1)/layer.stride + 1;
|
||||
int w = (layer.w-1)/layer.stride + 1;
|
||||
int h = layer.out_h;
|
||||
int w = layer.out_w;
|
||||
int c = layer.c;
|
||||
|
||||
size_t n = h*w*c*layer.batch;
|
||||
|
||||
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu);
|
||||
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
|
@ -100,7 +100,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
|
|||
{
|
||||
size_t n = layer.h*layer.w*layer.c*layer.batch;
|
||||
|
||||
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
||||
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
|
|
|
@ -420,7 +420,7 @@ int resize_network(network *net, int w, int h)
|
|||
net->h = h;
|
||||
int inputs = 0;
|
||||
size_t workspace_size = 0;
|
||||
//fprintf(stderr, "Resizing to %d x %d...", w, h);
|
||||
//fprintf(stderr, "Resizing to %d x %d...\n", w, h);
|
||||
//fflush(stderr);
|
||||
for (i = 0; i < net->n; ++i){
|
||||
layer l = net->layers[i];
|
||||
|
|
|
@ -43,6 +43,7 @@ typedef struct network{
|
|||
float angle;
|
||||
float exposure;
|
||||
float saturation;
|
||||
float hue;
|
||||
|
||||
int gpu_index;
|
||||
|
||||
|
|
21
src/parser.c
21
src/parser.c
|
@ -2,6 +2,7 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "blas.h"
|
||||
#include "parser.h"
|
||||
#include "assert.h"
|
||||
#include "activations.h"
|
||||
|
@ -147,7 +148,10 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
|||
int n = option_find_int(options, "filters",1);
|
||||
int size = option_find_int(options, "size",1);
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
int pad = option_find_int(options, "pad",0);
|
||||
int pad = option_find_int_quiet(options, "pad",0);
|
||||
int padding = option_find_int_quiet(options, "padding",0);
|
||||
if(pad) padding = size/2;
|
||||
|
||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||
ACTIVATION activation = get_activation(activation_s);
|
||||
|
||||
|
@ -161,7 +165,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
|
|||
int binary = option_find_int_quiet(options, "binary", 0);
|
||||
int xnor = option_find_int_quiet(options, "xnor", 0);
|
||||
|
||||
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize, binary, xnor);
|
||||
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor);
|
||||
layer.flipped = option_find_int_quiet(options, "flipped", 0);
|
||||
layer.dot = option_find_float_quiet(options, "dot", 0);
|
||||
|
||||
|
@ -234,9 +238,16 @@ layer parse_region(list *options, size_params params)
|
|||
int coords = option_find_int(options, "coords", 4);
|
||||
int classes = option_find_int(options, "classes", 20);
|
||||
int num = option_find_int(options, "num", 1);
|
||||
|
||||
params.w = option_find_int(options, "side", params.w);
|
||||
params.h = option_find_int(options, "side", params.h);
|
||||
|
||||
layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
|
||||
assert(l.outputs == params.inputs);
|
||||
|
||||
l.log = option_find_int_quiet(options, "log", 0);
|
||||
l.sqrt = option_find_int_quiet(options, "sqrt", 0);
|
||||
|
||||
l.softmax = option_find_int(options, "softmax", 0);
|
||||
l.max_boxes = option_find_int_quiet(options, "max",30);
|
||||
l.jitter = option_find_float(options, "jitter", .2);
|
||||
|
@ -278,6 +289,7 @@ cost_layer parse_cost(list *options, size_params params)
|
|||
COST_TYPE type = get_cost_type(type_s);
|
||||
float scale = option_find_float_quiet(options, "scale",1);
|
||||
cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
|
||||
layer.ratio = option_find_float_quiet(options, "ratio",0);
|
||||
return layer;
|
||||
}
|
||||
|
||||
|
@ -324,6 +336,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
|
|||
{
|
||||
int stride = option_find_int(options, "stride",1);
|
||||
int size = option_find_int(options, "size",stride);
|
||||
int padding = option_find_int_quiet(options, "padding", (size-1)/2);
|
||||
|
||||
int batch,h,w,c;
|
||||
h = params.h;
|
||||
|
@ -332,7 +345,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
|
|||
batch=params.batch;
|
||||
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
|
||||
|
||||
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride);
|
||||
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
|
||||
return layer;
|
||||
}
|
||||
|
||||
|
@ -486,6 +499,7 @@ void parse_net_options(list *options, network *net)
|
|||
net->angle = option_find_float_quiet(options, "angle", 0);
|
||||
net->saturation = option_find_float_quiet(options, "saturation", 1);
|
||||
net->exposure = option_find_float_quiet(options, "exposure", 1);
|
||||
net->hue = option_find_float_quiet(options, "hue", 0);
|
||||
|
||||
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
|
||||
|
||||
|
@ -1085,6 +1099,7 @@ void load_convolutional_weights(layer l, FILE *fp)
|
|||
fread(l.rolling_variance, sizeof(float), l.n, fp);
|
||||
}
|
||||
fread(l.filters, sizeof(float), num, fp);
|
||||
//if(l.c == 3) scal_cpu(num, 1./256, l.filters, 1);
|
||||
if (l.flipped) {
|
||||
transpose_matrix(l.filters, l.c*l.size*l.size, l.n);
|
||||
}
|
||||
|
|
|
@ -80,8 +80,8 @@ box get_region_box(float *x, int index, int i, int j, int w, int h, int adjust,
|
|||
b.w = logistic_activate(x[index + 2]);
|
||||
b.h = logistic_activate(x[index + 3]);
|
||||
}
|
||||
if(adjust && b.w < .01) b.w = .01;
|
||||
if(adjust && b.h < .01) b.h = .01;
|
||||
//if(adjust && b.w < .01) b.w = .01;
|
||||
//if(adjust && b.h < .01) b.h = .01;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
@ -149,6 +149,7 @@ void forward_region_layer(const region_layer l, network_state state)
|
|||
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
|
||||
if(best_iou > .5) l.delta[index + 4] = 0;
|
||||
|
||||
/*
|
||||
if(*(state.net.seen) < 6400){
|
||||
box truth = {0};
|
||||
truth.x = (i + .5)/l.w;
|
||||
|
@ -157,6 +158,7 @@ void forward_region_layer(const region_layer l, network_state state)
|
|||
truth.h = .5;
|
||||
delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,6 +45,11 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
|
|||
args.d = &buffer;
|
||||
args.type = TAG_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
fprintf(stderr, "%d classes\n", net.outputs);
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
|
|
@ -585,6 +585,13 @@ float rand_uniform(float min, float max)
|
|||
return ((float)rand()/RAND_MAX * (max - min)) + min;
|
||||
}
|
||||
|
||||
float rand_scale(float s)
|
||||
{
|
||||
float scale = rand_uniform(1, s);
|
||||
if(rand()%2) return scale;
|
||||
return 1./scale;
|
||||
}
|
||||
|
||||
float **one_hot_encode(float *a, int n, int k)
|
||||
{
|
||||
int i;
|
||||
|
|
|
@ -42,6 +42,7 @@ float mse_array(float *a, int n);
|
|||
float rand_normal();
|
||||
size_t rand_size_t();
|
||||
float rand_uniform(float min, float max);
|
||||
float rand_scale(float s);
|
||||
int rand_int(int min, int max);
|
||||
float sum_array(float *a, int n);
|
||||
float mean_array(float *a, int n);
|
||||
|
|
56
src/voxel.c
56
src/voxel.c
|
@ -9,37 +9,37 @@
|
|||
|
||||
void extract_voxel(char *lfile, char *rfile, char *prefix)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
int w = 1920;
|
||||
int h = 1080;
|
||||
#ifdef OPENCV
|
||||
int shift = 0;
|
||||
int count = 0;
|
||||
CvCapture *lcap = cvCaptureFromFile(lfile);
|
||||
CvCapture *rcap = cvCaptureFromFile(rfile);
|
||||
while(1){
|
||||
image l = get_image_from_stream(lcap);
|
||||
image r = get_image_from_stream(rcap);
|
||||
if(!l.w || !r.w) break;
|
||||
if(count%100 == 0) {
|
||||
shift = best_3d_shift_r(l, r, -l.h/100, l.h/100);
|
||||
printf("%d\n", shift);
|
||||
}
|
||||
image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h);
|
||||
image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h);
|
||||
char buff[256];
|
||||
sprintf(buff, "%s_%05d_l", prefix, count);
|
||||
save_image(ls, buff);
|
||||
sprintf(buff, "%s_%05d_r", prefix, count);
|
||||
save_image(rs, buff);
|
||||
free_image(l);
|
||||
free_image(r);
|
||||
free_image(ls);
|
||||
free_image(rs);
|
||||
++count;
|
||||
int shift = 0;
|
||||
int count = 0;
|
||||
CvCapture *lcap = cvCaptureFromFile(lfile);
|
||||
CvCapture *rcap = cvCaptureFromFile(rfile);
|
||||
while(1){
|
||||
image l = get_image_from_stream(lcap);
|
||||
image r = get_image_from_stream(rcap);
|
||||
if(!l.w || !r.w) break;
|
||||
if(count%100 == 0) {
|
||||
shift = best_3d_shift_r(l, r, -l.h/100, l.h/100);
|
||||
printf("%d\n", shift);
|
||||
}
|
||||
image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h);
|
||||
image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h);
|
||||
char buff[256];
|
||||
sprintf(buff, "%s_%05d_l", prefix, count);
|
||||
save_image(ls, buff);
|
||||
sprintf(buff, "%s_%05d_r", prefix, count);
|
||||
save_image(rs, buff);
|
||||
free_image(l);
|
||||
free_image(r);
|
||||
free_image(ls);
|
||||
free_image(rs);
|
||||
++count;
|
||||
}
|
||||
|
||||
#else
|
||||
printf("need OpenCV for extraction\n");
|
||||
printf("need OpenCV for extraction\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -164,6 +164,6 @@ void run_voxel(int argc, char **argv)
|
|||
else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]);
|
||||
/*
|
||||
else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights);
|
||||
*/
|
||||
else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights);
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -54,6 +54,11 @@ void train_yolo(char *cfgfile, char *weightfile)
|
|||
args.d = &buffer;
|
||||
args.type = REGION_DATA;
|
||||
|
||||
args.angle = net.angle;
|
||||
args.exposure = net.exposure;
|
||||
args.saturation = net.saturation;
|
||||
args.hue = net.hue;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
|
|
Loading…
Reference in New Issue