From 2418fa7b08eb7d42635420780e78187eb0b42e35 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 12 Jul 2021 05:30:51 +0300 Subject: [PATCH] Added: yolov4-csp-swish.cfg, yolov4-csp-x-swish.cfg, yolov4-csp-x-swish-frozen.cfg --- build/darknet/x64/cfg/yolov4-csp-swish.cfg | 1355 ++++++++++++++ .../x64/cfg/yolov4-csp-x-swish-frozen.cfg | 1556 +++++++++++++++++ build/darknet/x64/cfg/yolov4-csp-x-swish.cfg | 1556 +++++++++++++++++ build/darknet/x64/partial.cmd | 9 + cfg/yolov4-csp-swish.cfg | 1355 ++++++++++++++ cfg/yolov4-csp-x-swish-frozen.cfg | 1556 +++++++++++++++++ cfg/yolov4-csp-x-swish.cfg | 1556 +++++++++++++++++ 7 files changed, 8943 insertions(+) create mode 100644 build/darknet/x64/cfg/yolov4-csp-swish.cfg create mode 100644 build/darknet/x64/cfg/yolov4-csp-x-swish-frozen.cfg create mode 100644 build/darknet/x64/cfg/yolov4-csp-x-swish.cfg create mode 100644 cfg/yolov4-csp-swish.cfg create mode 100644 cfg/yolov4-csp-x-swish-frozen.cfg create mode 100644 cfg/yolov4-csp-x-swish.cfg diff --git a/build/darknet/x64/cfg/yolov4-csp-swish.cfg b/build/darknet/x64/cfg/yolov4-csp-swish.cfg new file mode 100644 index 00000000..2aab444e --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-csp-swish.cfg @@ -0,0 +1,1355 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-10 + +# Transition last + +# 17 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-28 + +# Transition last + +# 48 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-28 + +# Transition last + +# 79 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-16 + +# Transition last + +# 98 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1, -13 + +# 113 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 127 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 141 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1, 127 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1,-6 + +# Transition last + +# 152 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1, 113 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1,-6 + +# Transition last + +# 163 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish +stopbackward=900 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 141 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 152 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/build/darknet/x64/cfg/yolov4-csp-x-swish-frozen.cfg b/build/darknet/x64/cfg/yolov4-csp-x-swish-frozen.cfg new file mode 100644 index 00000000..838e0e1a --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-csp-x-swish-frozen.cfg @@ -0,0 +1,1556 @@ + +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish +stopbackward=1 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/build/darknet/x64/cfg/yolov4-csp-x-swish.cfg b/build/darknet/x64/cfg/yolov4-csp-x-swish.cfg new file mode 100644 index 00000000..015db22c --- /dev/null +++ b/build/darknet/x64/cfg/yolov4-csp-x-swish.cfg @@ -0,0 +1,1556 @@ + +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish +stopbackward=900 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd index 6080cf68..ee0cb0d2 100644 --- a/build/darknet/x64/partial.cmd +++ b/build/darknet/x64/partial.cmd @@ -2,6 +2,15 @@ rem Download weights for - DenseNet201, ResNet50 and ResNet152 by this link: htt rem Download Yolo/Tiny-yolo: https://pjreddie.com/darknet/yolo/ rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights + +darknet.exe partial cfg/yolov4-csp-swish.cfg yolov4-csp-swish.weights yolov4-csp-swish.conv.164 164 + + +darknet.exe partial cfg/yolov4-csp-x-swish.cfg yolov4-csp-x-swish.weights yolov4-csp-x-swish.conv.192 192 + + +pause + darknet.exe partial cfg/yolov4-csp.cfg yolov4-csp.weights yolov4-csp.conv.142 142 darknet.exe partial cfg/yolov4x-mish.cfg yolov4x-mish.weights yolov4x-mish.conv.166 166 diff --git a/cfg/yolov4-csp-swish.cfg b/cfg/yolov4-csp-swish.cfg new file mode 100644 index 00000000..2aab444e --- /dev/null +++ b/cfg/yolov4-csp-swish.cfg @@ -0,0 +1,1355 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-10 + +# Transition last + +# 17 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-28 + +# Transition last + +# 48 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-28 + +# Transition last + +# 79 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-16 + +# Transition last + +# 98 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1024 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1, -13 + +# 113 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 79 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 127 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 48 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -6 + +# Transition last + +# 141 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1, 127 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[route] +layers = -1,-6 + +# Transition last + +# 152 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1, 113 + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[route] +layers = -1,-6 + +# Transition last + +# 163 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=swish +stopbackward=900 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 141 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 152 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 163 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/cfg/yolov4-csp-x-swish-frozen.cfg b/cfg/yolov4-csp-x-swish-frozen.cfg new file mode 100644 index 00000000..838e0e1a --- /dev/null +++ b/cfg/yolov4-csp-x-swish-frozen.cfg @@ -0,0 +1,1556 @@ + +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish +stopbackward=1 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/cfg/yolov4-csp-x-swish.cfg b/cfg/yolov4-csp-x-swish.cfg new file mode 100644 index 00000000..015db22c --- /dev/null +++ b/cfg/yolov4-csp-x-swish.cfg @@ -0,0 +1,1556 @@ + +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=640 +height=640 +channels=3 +momentum=0.949 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500500 +policy=steps +steps=400000,450000 +scales=.1,.1 + +mosaic=1 + +letter_box=1 + +ema_alpha=0.9998 + +#optimized_memory=1 + + +# ============ Backbone ============ # + +# Stem + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=swish + +# P1 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=2 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=40 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +# 4 (previous+1+3k) +[shortcut] +from=-3 +activation=linear + +# P2 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=80 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=80 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1, -(3k+4)] + +[route] +layers = -1,-13 + +# Transition last + +# 20 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# P3 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(4+3k)] + +[route] +layers = -1,-34 + +# Transition last + +# 57 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# P4 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-34 + +# Transition last + +# 94 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# P5 + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1280 +size=3 +stride=2 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Residual Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=3 +stride=1 +pad=1 +activation=swish + +[shortcut] +from=-3 +activation=linear + +# Transition first + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Merge [-1 -(3k+4)] + +[route] +layers = -1,-19 + +# Transition last + +# 116 (previous+7+3k) +[convolutional] +batch_normalize=1 +filters=1280 +size=1 +stride=1 +pad=1 +activation=swish + +# ============ End of Backbone ============ # + +# ============ Neck ============ # + +# CSPSPP + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, -15 + +# 133 (previous+6+5+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# End of CSPSPP + + +# FPN-4 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 94 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 149 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# FPN-3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[upsample] +stride=2 + +[route] +layers = 57 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -1, -3 + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=160 +activation=swish + +# Merge [-1, -(2k+2)] + +[route] +layers = -1, -8 + +# Transition last + +# 165 (previous+6+4+2k) +[convolutional] +batch_normalize=1 +filters=160 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-4 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1, 149 + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 178 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=320 +size=1 +stride=1 +pad=1 +activation=swish + + +# PAN-5 + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1, 133 + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +# Split + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[route] +layers = -2 + +# Plain Block + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[route] +layers = -1,-8 + +# Transition last + +# 191 (previous+3+4+2k) +[convolutional] +batch_normalize=1 +filters=640 +size=1 +stride=1 +pad=1 +activation=swish +stopbackward=900 + +# ============ End of Neck ============ # + +# ============ Head ============ # + +# YOLO-3 + +[route] +layers = 165 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=320 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-4 + +[route] +layers = 178 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=640 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# YOLO-5 + +[route] +layers = 191 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1280 +activation=swish + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +#iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=0.4 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2