diff --git a/Makefile b/Makefile index 5fb70543..431933ca 100644 --- a/Makefile +++ b/Makefile @@ -151,7 +151,7 @@ LDFLAGS+= -L/usr/local/zed/lib -lsl_zed endif endif -OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o +OBJ=image_opencv.o http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o representation_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o gaussian_yolo_layer.o upsample_layer.o lstm_layer.o conv_lstm_layer.o scale_channels_layer.o sam_layer.o ifeq ($(GPU), 1) LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index 5282de26..5c8a7c3e 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -223,6 +223,7 @@ + @@ -286,6 +287,7 @@ + diff --git a/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg new file mode 100644 index 00000000..1461d888 --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg @@ -0,0 +1,1429 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=800 + + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -7 + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -10 +activation=linear + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 17 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -19 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +#### BFM-1 + +[route] +layers = 17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=256 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 137 +#layers = -1, -20 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 126 +# layers = -1, -49 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 diff --git a/build/darknet/x64/darknet.py b/build/darknet/x64/darknet.py index bb0ee183..23539e0b 100644 --- a/build/darknet/x64/darknet.py +++ b/build/darknet/x64/darknet.py @@ -25,6 +25,8 @@ import math import random import os +print("Run: darknet_images.py or:\n") +print("python.exe darknet_video.py --data_file cfg/coco.data --config_file cfg/yolov4.cfg --weights yolov4.weights --input test.mp4 \n") class BOX(Structure): _fields_ = [("x", c_float), diff --git a/build/darknet/x64/darknet_python.cmd b/build/darknet/x64/darknet_python.cmd index b2df11d8..74556582 100644 --- a/build/darknet/x64/darknet_python.cmd +++ b/build/darknet/x64/darknet_python.cmd @@ -14,6 +14,6 @@ rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install sci rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install scipy rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install opencv-python -C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet.py +C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet_images.py pause \ No newline at end of file diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd index 26a701a9..a9b06ca8 100644 --- a/build/darknet/x64/partial.cmd +++ b/build/darknet/x64/partial.cmd @@ -8,7 +8,10 @@ rem darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-vo darknet.exe partial cfg/yolov4-tiny.cfg yolov4-tiny.weights yolov4-tiny.conv.29 29 -darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137 +rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.137 137 + +rem darknet.exe partial cfg/yolov4-sam-mish.cfg cfg/yolov4-sam-mish.weights cfg/yolov4-sam-mish.conv.105 105 + pause diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index 4bd09206..35742aa2 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -52,7 +52,7 @@ - + @@ -155,7 +155,7 @@ 64 - compute_30,sm_30;compute_75,sm_75 + compute_35,sm_35;compute_75,sm_75 @@ -225,6 +225,7 @@ + @@ -290,6 +291,7 @@ + @@ -306,6 +308,6 @@ - + \ No newline at end of file diff --git a/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg new file mode 100644 index 00000000..1461d888 --- /dev/null +++ b/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg @@ -0,0 +1,1429 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=512 +height=512 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +#:104x104 54:52x52 85:26x26 104:13x13 for 416 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-7 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-10 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-28 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=mish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -1,-16 + +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=mish +stopbackward=800 + + +########################## + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, -13 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -7 + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -10 +activation=linear + +[upsample] +stride=2 + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = 17 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=mish + +[reorg3d] +stride=2 + +[route] +layers = -1, -4, -6 + +[shortcut] +from= -19 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +#### BFM-1 + +[route] +layers = 17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=mish + +[route] +layers = -1, -6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=256 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=mish + + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.2 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1, 137 +#layers = -1, -20 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=512 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.2 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.1 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1, 126 +# layers = -1, -49 + +[route] +layers = -17 + +[reorg3d] +stride=2 + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=mish + +[route] +layers = -1,-6 + +[convolutional] +batch_normalize=1 +size=1 +stride=1 +pad=1 +filters=1024 +activation=logistic + +[sam] +from=-2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=mish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=mish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +scale_x_y = 1.05 +iou_thresh=0.213 +cls_normalizer=1.0 +iou_normalizer=0.07 +uc_normalizer=0.07 +iou_loss=ciou +nms_kind=greedynms +beta_nms=0.6 +beta1=0.6 +max_delta=5 diff --git a/include/darknet.h b/include/darknet.h index 1a5fdf75..f5170f37 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -192,7 +192,8 @@ typedef enum { L2NORM, EMPTY, BLANK, - CONTRASTIVE + CONTRASTIVE, + IMPLICIT } LAYER_TYPE; // layer.h diff --git a/src/blas.h b/src/blas.h index ab888903..b69a702f 100644 --- a/src/blas.h +++ b/src/blas.h @@ -174,6 +174,9 @@ void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, floa void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size); void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type); +void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu); +void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu); + #endif // GPU #ifdef __cplusplus } diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index bc327995..21dbfbba 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -2435,4 +2435,37 @@ extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int coord_conv_kernel << > > (dst, w, h, chan, b, type); CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file +} + + +__global__ void forward_implicit_kernel(int size, int batch, int nweights, float *weight_gpu, float *output_gpu) +{ + const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + + output_gpu[id] = weight_gpu[id % nweights]; +} + +extern "C" void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu) +{ + int size = batch * nweights; + forward_implicit_kernel << > > (size, batch, nweights, weight_gpu, output_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} + + + +__global__ void backward_implicit_kernel(int size, int batch, int nweights, float *weight_updates_gpu, float *delta_gpu) +{ + const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if (id >= size) return; + + weight_updates_gpu[id % nweights] += delta_gpu[id]; +} + +extern "C" void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu) +{ + int size = batch * nweights; + backward_implicit_kernel << > > (size, batch, nweights, weight_updates_gpu, delta_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} diff --git a/src/parser.c b/src/parser.c index e7498d9d..8f8f5842 100644 --- a/src/parser.c +++ b/src/parser.c @@ -39,6 +39,11 @@ #include "version.h" #include "yolo_layer.h" #include "gaussian_yolo_layer.h" +#include "representation_layer.h" + +void empty_func(dropout_layer l, network_state state) { + //l.output_gpu = state.input; +} typedef struct{ char *type; @@ -90,7 +95,9 @@ LAYER_TYPE string_to_layer_type(char * type) if (strcmp(type, "[contrastive]") == 0) return CONTRASTIVE; if (strcmp(type, "[route]")==0) return ROUTE; if (strcmp(type, "[upsample]") == 0) return UPSAMPLE; - if (strcmp(type, "[empty]") == 0) return EMPTY; + if (strcmp(type, "[empty]") == 0 + || strcmp(type, "[silence]") == 0) return EMPTY; + if (strcmp(type, "[implicit]") == 0) return IMPLICIT; return BLANK; } @@ -1036,6 +1043,17 @@ layer parse_sam(list *options, size_params params, network net) return s; } +layer parse_implicit(list *options, size_params params, network net) +{ + float mean_init = option_find_float(options, "mean", 0.0); + float std_init = option_find_float(options, "std", 0.2); + int filters = option_find_int(options, "filters", 128); + int atoms = option_find_int_quiet(options, "atoms", 1); + + layer s = make_implicit_layer(params.batch, params.index, mean_init, std_init, filters, atoms); + + return s; +} layer parse_activation(list *options, size_params params) { @@ -1480,6 +1498,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) net.layers[count - 1].use_bin_output = 0; net.layers[l.index].use_bin_output = 0; net.layers[l.index].keep_delta_gpu = 1; + } else if (lt == IMPLICIT) { + l = parse_implicit(options, params, net); }else if(lt == DROPOUT){ l = parse_dropout(options, params); l.output = net.layers[count-1].output; @@ -1492,16 +1512,25 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) } else if (lt == EMPTY) { layer empty_layer = {(LAYER_TYPE)0}; - empty_layer.out_w = params.w; - empty_layer.out_h = params.h; - empty_layer.out_c = params.c; l = empty_layer; + l.type = EMPTY; + l.w = l.out_w = params.w; + l.h = l.out_h = params.h; + l.c = l.out_c = params.c; + l.batch = params.batch; + l.inputs = l.outputs = params.inputs; l.output = net.layers[count - 1].output; l.delta = net.layers[count - 1].delta; + l.forward = empty_func; + l.backward = empty_func; #ifdef GPU l.output_gpu = net.layers[count - 1].output_gpu; l.delta_gpu = net.layers[count - 1].delta_gpu; + l.keep_delta_gpu = 1; + l.forward_gpu = empty_func; + l.backward_gpu = empty_func; #endif + fprintf(stderr, "empty \n"); }else{ fprintf(stderr, "Type not recognized: %s\n", s->type); } @@ -1604,6 +1633,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); option_unused(options); + net.layers[count] = l; if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; if (l.inputs > max_inputs) max_inputs = l.inputs; @@ -1810,6 +1840,24 @@ void save_shortcut_weights(layer l, FILE *fp) fwrite(l.weights, sizeof(float), num, fp); } +void save_implicit_weights(layer l, FILE *fp) +{ +#ifdef GPU + if (gpu_index >= 0) { + pull_implicit_layer(l); + //printf("\n pull_implicit_layer \n"); + } +#endif + int i; + //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]); + //printf(" l.nweights = %d - update \n", l.nweights); + //for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); + //printf(" l.nweights = %d \n\n", l.nweights); + + int num = l.nweights; + fwrite(l.weights, sizeof(float), num, fp); +} + void save_convolutional_weights(layer l, FILE *fp) { if(l.binary){ @@ -1921,6 +1969,8 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema) } } if (l.type == SHORTCUT && l.nweights > 0) { save_shortcut_weights(l, fp); + } if (l.type == IMPLICIT) { + save_implicit_weights(l, fp); } if(l.type == CONNECTED){ save_connected_weights(l, fp); } if(l.type == BATCHNORM){ @@ -2131,6 +2181,21 @@ void load_shortcut_weights(layer l, FILE *fp) #endif } +void load_implicit_weights(layer l, FILE *fp) +{ + int num = l.nweights; + int read_bytes; + read_bytes = fread(l.weights, sizeof(float), num, fp); + if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); + //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); + //printf(" read_bytes = %d \n\n", read_bytes); +#ifdef GPU + if (gpu_index >= 0) { + push_implicit_layer(l); + } +#endif +} + void load_weights_upto(network *net, char *filename, int cutoff) { #ifdef GPU @@ -2175,6 +2240,9 @@ void load_weights_upto(network *net, char *filename, int cutoff) if (l.type == SHORTCUT && l.nweights > 0) { load_shortcut_weights(l, fp); } + if (l.type == IMPLICIT) { + load_implicit_weights(l, fp); + } if(l.type == CONNECTED){ load_connected_weights(l, fp, transpose); } diff --git a/src/representation_layer.c b/src/representation_layer.c new file mode 100644 index 00000000..a6cc5c5f --- /dev/null +++ b/src/representation_layer.c @@ -0,0 +1,160 @@ +#include "representation_layer.h" +#include "utils.h" +#include "dark_cuda.h" +#include "blas.h" +#include +#include + +layer make_implicit_layer(int batch, int index, float mean_init, float std_init, int filters, int atoms) +{ + fprintf(stderr,"implicit Layer: %d x %d \t mean=%.2f, std=%.2f \n", filters, atoms, mean_init, std_init); + layer l = { (LAYER_TYPE)0 }; + l.type = IMPLICIT; + l.batch = batch; + l.w = 1; + l.h = 1; + l.c = 1; + + l.out_w = 1; + l.out_h = atoms; + l.out_c = filters; + + l.outputs = l.out_w*l.out_h*l.out_c; + l.inputs = 1; + l.index = index; + + l.nweights = l.out_w * l.out_h * l.out_c; + + l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float)); + l.weights = (float*)xcalloc(l.nweights, sizeof(float)); + int i; + for (i = 0; i < l.nweights; ++i) l.weights[i] = mean_init + rand_uniform(-std_init, std_init); + + + l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float)); + l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); + + l.forward = forward_implicit_layer; + l.backward = backward_implicit_layer; + l.update = update_implicit_layer; +#ifdef GPU + l.forward_gpu = forward_implicit_layer_gpu; + l.backward_gpu = backward_implicit_layer_gpu; + l.update_gpu = update_implicit_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); + l.output_gpu = cuda_make_array(l.output, l.outputs*batch); + + l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); + l.weights_gpu = cuda_make_array(l.weights, l.nweights); +#endif + return l; +} + +void resize_implicit_layer(layer *l, int w, int h) +{ +} + +void forward_implicit_layer(const layer l, network_state state) +{ + int i; + #pragma omp parallel for + for (i = 0; i < l.nweights * l.batch; ++i) { + l.output[i] = l.weights[i % l.nweights]; + } +} + +void backward_implicit_layer(const layer l, network_state state) +{ + int i; + #pragma omp parallel for + for (i = 0; i < l.nweights * l.batch; ++i) { + l.weight_updates[i % l.nweights] += l.delta[i]; + } +} + +void update_implicit_layer(layer l, int batch, float learning_rate_init, float momentum, float decay) +{ + float learning_rate = learning_rate_init*l.learning_rate_scale; + //float momentum = a.momentum; + //float decay = a.decay; + //int batch = a.batch; + + axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); + axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1); + scal_cpu(l.nweights, momentum, l.weight_updates, 1); + +} + + +#ifdef GPU +void forward_implicit_layer_gpu(const layer l, network_state state) +{ + forward_implicit_gpu(l.batch, l.nweights, l.weights_gpu, l.output_gpu); +} + +void backward_implicit_layer_gpu(const layer l, network_state state) +{ + backward_implicit_gpu(l.batch, l.nweights, l.weight_updates_gpu, l.delta_gpu); +} + +void update_implicit_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale) +{ + // Loss scale for Mixed-Precision on Tensor-Cores + float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale; + //float momentum = a.momentum; + //float decay = a.decay; + //int batch = a.batch; + + reset_nan_and_inf(l.weight_updates_gpu, l.nweights); + fix_nan_and_inf(l.weights_gpu, l.nweights); + + if (l.adam) { + //adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.nweights, batch, l.t); + } + else { + //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + //axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + //scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); + + axpy_ongpu(l.nweights, -decay*batch*loss_scale, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + + scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); + } + + if (l.clip) { + constrain_ongpu(l.nweights, l.clip, l.weights_gpu, 1); + } +} + +void pull_implicit_layer(layer l) +{ + cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); + cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); + + if (l.adam) { + cuda_pull_array_async(l.m_gpu, l.m, l.nweights); + cuda_pull_array_async(l.v_gpu, l.v, l.nweights); + } + CHECK_CUDA(cudaPeekAtLastError()); + cudaStreamSynchronize(get_cuda_stream()); +} + +void push_implicit_layer(layer l) +{ + cuda_push_array(l.weights_gpu, l.weights, l.nweights); + + if (l.train) { + cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); + } + if (l.adam) { + cuda_push_array(l.m_gpu, l.m, l.nweights); + cuda_push_array(l.v_gpu, l.v, l.nweights); + } + CHECK_CUDA(cudaPeekAtLastError()); +} +#endif + + diff --git a/src/representation_layer.h b/src/representation_layer.h new file mode 100644 index 00000000..8b2a9da3 --- /dev/null +++ b/src/representation_layer.h @@ -0,0 +1,29 @@ +#ifndef REPRESENTATION_LAYER_H +#define REPRESENTATION_LAYER_H + +#include "layer.h" +#include "network.h" + +#ifdef __cplusplus +extern "C" { +#endif +layer make_implicit_layer(int batch, int index, float mean_init, float std_init, int filters, int atoms); +void forward_implicit_layer(const layer l, network_state state); +void backward_implicit_layer(const layer l, network_state state); +void update_implicit_layer(layer l, int batch, float learning_rate_init, float momentum, float decay); + +void resize_implicit_layer(layer *l, int w, int h); + +#ifdef GPU +void forward_implicit_layer_gpu(const layer l, network_state state); +void backward_implicit_layer_gpu(const layer l, network_state state); + +void update_implicit_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale); +void pull_implicit_layer(layer l); +void push_implicit_layer(layer l); +#endif + +#ifdef __cplusplus +} +#endif +#endif // REPRESENTATION_LAYER_H