From a234a5022333c930de08f2470184ef4e0c68356e Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 23 Mar 2020 21:56:32 +0300 Subject: [PATCH] Added efficientnet-lite3.cfg and activation=relu6 --- build/darknet/x64/cfg/efficientnet-lite3.cfg | 1009 ++++++++++++++++++ cfg/efficientnet-lite3.cfg | 1009 ++++++++++++++++++ include/darknet.h | 2 +- src/activation_kernels.cu | 25 +- src/activations.c | 1 + src/activations.h | 3 + 6 files changed, 2047 insertions(+), 2 deletions(-) create mode 100644 build/darknet/x64/cfg/efficientnet-lite3.cfg create mode 100644 cfg/efficientnet-lite3.cfg diff --git a/build/darknet/x64/cfg/efficientnet-lite3.cfg b/build/darknet/x64/cfg/efficientnet-lite3.cfg new file mode 100644 index 00000000..e76bbe30 --- /dev/null +++ b/build/darknet/x64/cfg/efficientnet-lite3.cfg @@ -0,0 +1,1009 @@ +# https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/lite/efficientnet_lite_builder.py +# (width_coefficient, depth_coefficient, resolution, dropout_rate) +# 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), +# +#_DEFAULT_BLOCKS_ARGS = [ +# 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', +# 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', +# 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', +# 'r1_k3_s11_e6_i192_o320_se0.25', +#] + +[net] +# Training +batch=120 +subdivisions=6 +height=288 +width=288 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +cutmix=1 +mosaic=1 +label_smooth_eps=0.1 + +burn_in=1000 +learning_rate=0.256 +policy=step +step=10000 +scale=0.96 +max_batches=1600000 +momentum=0.9 +decay=0.00005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +### CONV1 - 1 (1) +# conv1 +[convolutional] +filters=40 #32 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + + +### CONV2 - MBConv1 - 1 (2) +# conv2_1_expand +[convolutional] +filters=40 #32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv2_1_dwise +[convolutional] +groups=40 #32 +filters=40 #32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv2_1_linear +[convolutional] +filters=16 #16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV2 - MBConv1 - 2 (2) +# conv2_1_expand +[convolutional] +filters=40 #32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv2_1_dwise +[convolutional] +groups=40 #32 +filters=40 #32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv2_1_linear +[convolutional] +filters=16 #16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_3_1 +[shortcut] +from=-5 +activation=linear + +# conv2_2_expand +[convolutional] +filters=112 #96 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv2_2_dwise +[convolutional] +groups=112 #96 +filters=112 #96 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + +# conv2_2_linear +[convolutional] +filters=32 #24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 2 (3) +# conv3_1_expand +[convolutional] +filters=176 #144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv3_1_dwise +[convolutional] +groups=176 #144 +filters=176 #144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv3_1_linear +[convolutional] +filters=32 #24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_3_1 +[shortcut] +from=-5 +activation=linear + +# conv3_1_expand +[convolutional] +filters=176 #144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv3_1_dwise +[convolutional] +groups=176 #144 +filters=176 #144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv3_1_linear +[convolutional] +filters=32 #24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV4 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_3_1 +[shortcut] +from=-5 +activation=linear + +# conv_3_2_expand +[convolutional] +filters=176 #144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_3_2_dwise +[convolutional] +groups=176 #144 +filters=176 #144 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + +# conv_3_2_linear +[convolutional] +filters=48 #40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 2 (3) +# conv_4_1_expand +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_1_dwise +[convolutional] +groups=232 #192 +filters=232 #192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_1_linear +[convolutional] +filters=48 #40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_2 +[shortcut] +from=-5 +activation=linear + +# conv_4_1_expand +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_1_dwise +[convolutional] +groups=232 #192 +filters=232 #192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_1_linear +[convolutional] +filters=48 #40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV5 - MBConv6 - 1 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_2 +[shortcut] +from=-5 +activation=linear + +# conv_4_3_expand +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_3_dwise +[convolutional] +groups=232 #192 +filters=232 #192 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_3_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 2 (5) +# conv_4_4_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_4_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_4_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 3 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_4 +[shortcut] +from=-5 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_5_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_5_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 4 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_4 +[shortcut] +from=-5 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_5_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_5_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV5 - MBConv6 - 5 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_4 +[shortcut] +from=-5 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_5_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_5_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 1 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_6 +[shortcut] +from=-5 +activation=linear + +# conv_4_7_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_7_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + +# conv_4_7_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 2 (5) +# conv_5_1_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_1_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_1_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 3 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_1 +[shortcut] +from=-5 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_2_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_2_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 4 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_1 +[shortcut] +from=-5 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_2_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_2_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 5 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_1 +[shortcut] +from=-5 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_2_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_2_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV7 - MBConv6 - 1 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_2 +[shortcut] +from=-5 +activation=linear + +# conv_5_3_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_3_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + + +# conv_5_3_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 2 (6) +# conv_6_1_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_1_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_1_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 3 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 4 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 5 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 6 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + + + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV8 - MBConv6 - 1 (1) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_2 +[shortcut] +from=-5 +activation=linear + +# conv_6_3_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_3_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + + + +# conv_6_3_linear +[convolutional] +filters=384 #320 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV9 - Conv2d 1x1 +# conv_6_4 +[convolutional] +filters=1536 #1280 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + + +[avgpool] + +[dropout] +probability=.3 + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=0 +activation=linear + +[softmax] +groups=1 + +#[cost] +#type=sse + diff --git a/cfg/efficientnet-lite3.cfg b/cfg/efficientnet-lite3.cfg new file mode 100644 index 00000000..e76bbe30 --- /dev/null +++ b/cfg/efficientnet-lite3.cfg @@ -0,0 +1,1009 @@ +# https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/lite/efficientnet_lite_builder.py +# (width_coefficient, depth_coefficient, resolution, dropout_rate) +# 'efficientnet-lite3': (1.2, 1.4, 280, 0.3), +# +#_DEFAULT_BLOCKS_ARGS = [ +# 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', +# 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', +# 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', +# 'r1_k3_s11_e6_i192_o320_se0.25', +#] + +[net] +# Training +batch=120 +subdivisions=6 +height=288 +width=288 +channels=3 +momentum=0.9 +decay=0.0005 +max_crop=320 + +cutmix=1 +mosaic=1 +label_smooth_eps=0.1 + +burn_in=1000 +learning_rate=0.256 +policy=step +step=10000 +scale=0.96 +max_batches=1600000 +momentum=0.9 +decay=0.00005 + +angle=7 +hue=.1 +saturation=.75 +exposure=.75 +aspect=.75 + + +### CONV1 - 1 (1) +# conv1 +[convolutional] +filters=40 #32 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + + +### CONV2 - MBConv1 - 1 (2) +# conv2_1_expand +[convolutional] +filters=40 #32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv2_1_dwise +[convolutional] +groups=40 #32 +filters=40 #32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv2_1_linear +[convolutional] +filters=16 #16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV2 - MBConv1 - 2 (2) +# conv2_1_expand +[convolutional] +filters=40 #32 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv2_1_dwise +[convolutional] +groups=40 #32 +filters=40 #32 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv2_1_linear +[convolutional] +filters=16 #16 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_3_1 +[shortcut] +from=-5 +activation=linear + +# conv2_2_expand +[convolutional] +filters=112 #96 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv2_2_dwise +[convolutional] +groups=112 #96 +filters=112 #96 +size=3 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + +# conv2_2_linear +[convolutional] +filters=32 #24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 2 (3) +# conv3_1_expand +[convolutional] +filters=176 #144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv3_1_dwise +[convolutional] +groups=176 #144 +filters=176 #144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv3_1_linear +[convolutional] +filters=32 #24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV3 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_3_1 +[shortcut] +from=-5 +activation=linear + +# conv3_1_expand +[convolutional] +filters=176 #144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv3_1_dwise +[convolutional] +groups=176 #144 +filters=176 #144 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv3_1_linear +[convolutional] +filters=32 #24 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV4 - MBConv6 - 1 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_3_1 +[shortcut] +from=-5 +activation=linear + +# conv_3_2_expand +[convolutional] +filters=176 #144 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_3_2_dwise +[convolutional] +groups=176 #144 +filters=176 #144 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + +# conv_3_2_linear +[convolutional] +filters=48 #40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 2 (3) +# conv_4_1_expand +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_1_dwise +[convolutional] +groups=232 #192 +filters=232 #192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_1_linear +[convolutional] +filters=48 #40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV4 - MBConv6 - 3 (3) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_2 +[shortcut] +from=-5 +activation=linear + +# conv_4_1_expand +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_1_dwise +[convolutional] +groups=232 #192 +filters=232 #192 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_1_linear +[convolutional] +filters=48 #40 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV5 - MBConv6 - 1 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_2 +[shortcut] +from=-5 +activation=linear + +# conv_4_3_expand +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_3_dwise +[convolutional] +groups=232 #192 +filters=232 #192 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_3_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 2 (5) +# conv_4_4_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_4_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_4_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 3 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_4 +[shortcut] +from=-5 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_5_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_5_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV5 - MBConv6 - 4 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_4 +[shortcut] +from=-5 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_5_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_5_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV5 - MBConv6 - 5 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_4 +[shortcut] +from=-5 +activation=linear + +# conv_4_5_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_5_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_4_5_linear +[convolutional] +filters=96 #80 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 1 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_4_6 +[shortcut] +from=-5 +activation=linear + +# conv_4_7_expand +[convolutional] +filters=464 #384 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_4_7_dwise +[convolutional] +groups=464 #384 +filters=464 #384 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + +# conv_4_7_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 2 (5) +# conv_5_1_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_1_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_1_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 3 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_1 +[shortcut] +from=-5 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_2_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_2_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV6 - MBConv6 - 4 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_1 +[shortcut] +from=-5 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_2_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_2_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV6 - MBConv6 - 5 (5) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_1 +[shortcut] +from=-5 +activation=linear + +# conv_5_2_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_2_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_5_2_linear +[convolutional] +filters=136 #112 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV7 - MBConv6 - 1 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_5_2 +[shortcut] +from=-5 +activation=linear + +# conv_5_3_expand +[convolutional] +filters=688 #576 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_5_3_dwise +[convolutional] +groups=688 #576 +filters=688 #576 +size=5 +pad=1 +stride=2 +batch_normalize=1 +activation=relu6 + + +# conv_5_3_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 2 (6) +# conv_6_1_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_1_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_1_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 3 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 4 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 5 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + +### CONV7 - MBConv6 - 6 (6) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_1 +[shortcut] +from=-5 +activation=linear + +# conv_6_2_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_2_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=5 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + + + +# conv_6_2_linear +[convolutional] +filters=232 #192 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + +### CONV8 - MBConv6 - 1 (1) +# dropout only before residual connection +[dropout] +probability=.3 + +# block_6_2 +[shortcut] +from=-5 +activation=linear + +# conv_6_3_expand +[convolutional] +filters=1152 #960 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + +# conv_6_3_dwise +[convolutional] +groups=1152 #960 +filters=1152 #960 +size=3 +stride=1 +pad=1 +batch_normalize=1 +activation=relu6 + + + +# conv_6_3_linear +[convolutional] +filters=384 #320 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=linear + + + + +### CONV9 - Conv2d 1x1 +# conv_6_4 +[convolutional] +filters=1536 #1280 +size=1 +stride=1 +pad=0 +batch_normalize=1 +activation=relu6 + + +[avgpool] + +[dropout] +probability=.3 + +[convolutional] +filters=1000 +size=1 +stride=1 +pad=0 +activation=linear + +[softmax] +groups=1 + +#[cost] +#type=sse + diff --git a/include/darknet.h b/include/darknet.h index 9637d7b7..d6bc9ee1 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -104,7 +104,7 @@ typedef struct tree { // activations.h typedef enum { - LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL + LOGISTIC, RELU, RELU6, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL }ACTIVATION; // parser.h diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index ef2ddbe4..ba1113fd 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -7,7 +7,6 @@ #include "activations.h" #include "dark_cuda.h" - __device__ float lhtan_activate_kernel(float x) { if(x < 0) return .001*x; @@ -30,6 +29,7 @@ __device__ float linear_activate_kernel(float x){return x;} __device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} __device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} __device__ float relu_activate_kernel(float x){return x*(x>0);} +__device__ float relu6_activate_kernel(float x) { return min_val_cmp(max_val_cmp(x, 0), 6); } __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} __device__ float selu_activate_kernel(float x) { return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x) - 1); } __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} @@ -68,6 +68,7 @@ __device__ float loggy_gradient_kernel(float x) return 2*(1-y)*y; } __device__ float relu_gradient_kernel(float x){return (x>0);} +__device__ float relu6_gradient_kernel(float x) { return (x > 0 && x < 6); } __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} __device__ float selu_gradient_kernel(float x) { return (x >= 0)*1.0507f + (x < 0)*(x + 1.0507f*1.6732f); } __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} @@ -92,6 +93,8 @@ __device__ float activate_kernel(float x, ACTIVATION a) return loggy_activate_kernel(x); case RELU: return relu_activate_kernel(x); + case RELU6: + return relu6_activate_kernel(x); case ELU: return elu_activate_kernel(x); case SELU: @@ -127,6 +130,8 @@ __device__ float gradient_kernel(float x, ACTIVATION a) return loggy_gradient_kernel(x); case RELU: return relu_gradient_kernel(x); + case RELU6: + return relu6_gradient_kernel(x); case NORM_CHAN: return relu_gradient_kernel(x); case ELU: @@ -272,6 +277,14 @@ __global__ void activate_array_relu_kernel(float *x, int n) } } +__global__ void activate_array_relu6_kernel(float *x, int n) +{ + int index = blockIdx.x*blockDim.x + threadIdx.x; + if (index < n) { + x[index] = relu6_activate_kernel(x[index]); + } +} + __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; @@ -362,6 +375,14 @@ __global__ void gradient_array_relu_kernel(float *x, int n, float *delta) } } +__global__ void gradient_array_relu6_kernel(float *x, int n, float *delta) +{ + int index = blockIdx.x*blockDim.x + threadIdx.x; + if (index < n) { + delta[index] *= relu6_gradient_kernel(x[index]); + } +} + extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) { const int num_blocks = get_number_of_blocks(n, BLOCK); @@ -371,6 +392,7 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) else if (a == TANH) activate_array_tanh_kernel << > >(x, n); else if (a == HARDTAN) activate_array_hardtan_kernel << > >(x, n); else if (a == RELU) activate_array_relu_kernel << > >(x, n); + else if (a == RELU6) activate_array_relu6_kernel << > >(x, n); else if (a == SELU) activate_array_selu_kernel << > >(x, n); else activate_array_kernel<<>>(x, n, a); @@ -400,6 +422,7 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta else if (a == TANH) gradient_array_tanh_kernel << > >(x, n, delta); else if (a == HARDTAN) gradient_array_hardtan_kernel << > >(x, n, delta); else if (a == RELU) gradient_array_relu_kernel << > >(x, n, delta); + else if (a == RELU6) gradient_array_relu6_kernel << > >(x, n, delta); //else if (a == NORM_CHAN) gradient_array_relu_kernel << > >(x, n, delta); else if (a == NORM_CHAN_SOFTMAX || a == NORM_CHAN) { printf(" Error: should be used custom NORM_CHAN_SOFTMAX-function for gradient \n"); diff --git a/src/activations.c b/src/activations.c index fd586e2b..0b68fda0 100644 --- a/src/activations.c +++ b/src/activations.c @@ -53,6 +53,7 @@ ACTIVATION get_activation(char *s) if (strcmp(s, "normalize_channels_softmax_maxval") == 0) return NORM_CHAN_SOFTMAX_MAXVAL; if (strcmp(s, "loggy")==0) return LOGGY; if (strcmp(s, "relu")==0) return RELU; + if (strcmp(s, "relu6") == 0) return RELU6; if (strcmp(s, "elu")==0) return ELU; if (strcmp(s, "selu") == 0) return SELU; if (strcmp(s, "relie")==0) return RELIE; diff --git a/src/activations.h b/src/activations.h index 16285c04..9e9b0053 100644 --- a/src/activations.h +++ b/src/activations.h @@ -3,6 +3,7 @@ #include "darknet.h" #include "dark_cuda.h" #include "math.h" +#include "utils.h" //typedef enum{ // LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH @@ -56,6 +57,7 @@ static inline float linear_activate(float x){return x;} static inline float logistic_activate(float x){return 1.f/(1.f + expf(-x));} static inline float loggy_activate(float x){return 2.f/(1.f + expf(-x)) - 1;} static inline float relu_activate(float x){return x*(x>0);} +static inline float relu6_activate(float x) { return min_val_cmp(max_val_cmp(x, 0), 6); } static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} static inline float selu_activate(float x) { return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x) - 1); } static inline float relie_activate(float x){return (x>0) ? x : .01f*x;} @@ -105,6 +107,7 @@ static inline float stair_gradient(float x) return 1.0f; } static inline float relu_gradient(float x){return (x>0);} +static inline float relu6_gradient(float x) { return (x > 0 && x < 6); } static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} static inline float selu_gradient(float x) { return (x >= 0)*1.0507f + (x < 0)*(x + 1.0507f*1.6732f); } static inline float relie_gradient(float x){return (x>0) ? 1 : .01f;}