mirror of https://github.com/AlexeyAB/darknet.git
Added command line param -dontuse_opencv for training Classifier. Also use GaussianBlur instead of bilateralFilter for blur=1 for training Classifier and Detector.
This commit is contained in:
parent
b8605bda1e
commit
114a7f942b
|
@ -822,6 +822,7 @@ typedef struct load_args {
|
|||
int augment_speed;
|
||||
int letter_box;
|
||||
int show_imgs;
|
||||
int dontuse_opencv;
|
||||
float jitter;
|
||||
int flip;
|
||||
int blur;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
### Datasets:
|
||||
|
||||
25 thousand datasets on Kaggle: https://www.kaggle.com/datasets
|
||||
|
||||
BDD100K - Diverse Driving Video: https://bair.berkeley.edu/blog/2018/05/30/bdd/
|
||||
|
||||
Pascal VOC: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html
|
||||
|
|
|
@ -27,7 +27,7 @@ float *get_regression_values(char **labels, int n)
|
|||
return v;
|
||||
}
|
||||
|
||||
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int mjpeg_port, int calc_topk, int show_imgs)
|
||||
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dontuse_opencv, int dont_show, int mjpeg_port, int calc_topk, int show_imgs)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -81,6 +81,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
args.threads = 32;
|
||||
args.hierarchy = net.hierarchy;
|
||||
|
||||
args.dontuse_opencv = dontuse_opencv;
|
||||
args.min = net.min_crop;
|
||||
args.max = net.max_crop;
|
||||
args.flip = net.flip;
|
||||
|
@ -112,7 +113,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
int img_size = 1000;
|
||||
char windows_name[100];
|
||||
sprintf(windows_name, "average loss (id:%d)", random_gen());
|
||||
img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show);
|
||||
if (!dontuse_opencv) img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show);
|
||||
#endif //OPENCV
|
||||
|
||||
data train;
|
||||
|
@ -176,7 +177,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
|
||||
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
|
||||
#ifdef OPENCV
|
||||
draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port);
|
||||
if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port);
|
||||
#endif // OPENCV
|
||||
|
||||
if (i >= (iter_save + 1000)) {
|
||||
|
@ -1298,6 +1299,7 @@ void run_classifier(int argc, char **argv)
|
|||
}
|
||||
|
||||
int dont_show = find_arg(argc, argv, "-dont_show");
|
||||
int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv");
|
||||
int show_imgs = find_arg(argc, argv, "-show_imgs");
|
||||
int calc_topk = find_arg(argc, argv, "-topk");
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
|
@ -1311,7 +1313,7 @@ void run_classifier(int argc, char **argv)
|
|||
int layer = layer_s ? atoi(layer_s) : -1;
|
||||
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top);
|
||||
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
|
||||
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dont_show, mjpeg_port, calc_topk, show_imgs);
|
||||
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
|
||||
|
|
21
src/data.c
21
src/data.c
|
@ -142,7 +142,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
|
|||
return X;
|
||||
}
|
||||
|
||||
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure)
|
||||
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv)
|
||||
{
|
||||
int i;
|
||||
matrix X;
|
||||
|
@ -152,7 +152,10 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
|
|||
|
||||
for(i = 0; i < n; ++i){
|
||||
int size = w > h ? w : h;
|
||||
image im = load_image_color(paths[i], 0, 0);
|
||||
image im;
|
||||
if(dontuse_opencv) im = load_image_stb_resize(paths[i], 0, 0, 3);
|
||||
else im = load_image_color(paths[i], 0, 0);
|
||||
|
||||
image crop = random_augment_image(im, angle, aspect, min, max, size);
|
||||
int flip = use_flip ? random_gen() % 2 : 0;
|
||||
if (flip)
|
||||
|
@ -1362,7 +1365,7 @@ void *load_thread(void *ptr)
|
|||
if (a.type == OLD_CLASSIFICATION_DATA){
|
||||
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
|
||||
} else if (a.type == CLASSIFICATION_DATA){
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps);
|
||||
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps, a.dontuse_opencv);
|
||||
} else if (a.type == SUPER_DATA){
|
||||
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
|
||||
} else if (a.type == WRITING_DATA){
|
||||
|
@ -1507,20 +1510,20 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
|
|||
return d;
|
||||
}
|
||||
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps)
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv)
|
||||
{
|
||||
char **paths_stored = paths;
|
||||
if(m) paths = get_random_paths(paths, n, m);
|
||||
data d = {0};
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
|
||||
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
|
||||
d.y = load_labels_paths(paths, n, labels, k, hierarchy, label_smooth_eps);
|
||||
|
||||
if (use_mixup && rand_int(0, 1)) {
|
||||
char **paths_mix = get_random_paths(paths_stored, n, m);
|
||||
data d2 = { 0 };
|
||||
d2.shallow = 0;
|
||||
d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
|
||||
d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
|
||||
d2.y = load_labels_paths(paths_mix, n, labels, k, hierarchy, label_smooth_eps);
|
||||
free(paths_mix);
|
||||
|
||||
|
@ -1530,12 +1533,12 @@ data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *h
|
|||
d4.shallow = 0;
|
||||
if (use_mixup >= 3) {
|
||||
char **paths_mix3 = get_random_paths(paths_stored, n, m);
|
||||
d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
|
||||
d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
|
||||
d3.y = load_labels_paths(paths_mix3, n, labels, k, hierarchy, label_smooth_eps);
|
||||
free(paths_mix3);
|
||||
|
||||
char **paths_mix4 = get_random_paths(paths_stored, n, m);
|
||||
d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
|
||||
d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
|
||||
d4.y = load_labels_paths(paths_mix4, n, labels, k, hierarchy, label_smooth_eps);
|
||||
free(paths_mix4);
|
||||
}
|
||||
|
@ -1706,7 +1709,7 @@ data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int
|
|||
d.w = w;
|
||||
d.h = h;
|
||||
d.shallow = 0;
|
||||
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
|
||||
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, 0);
|
||||
d.y = load_tags_paths(paths, n, k);
|
||||
if(m) free(paths);
|
||||
return d;
|
||||
|
|
|
@ -89,9 +89,9 @@ data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int
|
|||
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup,
|
||||
float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv);
|
||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv);
|
||||
data load_go(char *filename);
|
||||
|
||||
box_label *read_boxes(char *filename, int *n);
|
||||
|
|
12
src/image.c
12
src/image.c
|
@ -1419,6 +1419,18 @@ image load_image_stb(char *filename, int channels)
|
|||
return im;
|
||||
}
|
||||
|
||||
image load_image_stb_resize(char *filename, int w, int h, int c)
|
||||
{
|
||||
image out = load_image_stb(filename, c); // without OpenCV
|
||||
|
||||
if ((h && w) && (h != out.h || w != out.w)) {
|
||||
image resized = resize_image(out, w, h);
|
||||
free_image(out);
|
||||
out = resized;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
image load_image(char *filename, int w, int h, int c)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
|
|
|
@ -83,6 +83,7 @@ image float_to_image_scaled(int w, int h, int c, float *data);
|
|||
image float_to_image(int w, int h, int c, float *data);
|
||||
image copy_image(image p);
|
||||
image load_image(char *filename, int w, int h, int c);
|
||||
image load_image_stb_resize(char *filename, int w, int h, int c);
|
||||
//LIB_API image load_image_color(char *filename, int w, int h);
|
||||
image **load_alphabet();
|
||||
|
||||
|
|
|
@ -1210,15 +1210,15 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
|
|||
if (blur) {
|
||||
cv::Mat dst(sized.size(), sized.type());
|
||||
if (blur == 1) {
|
||||
//cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0);
|
||||
cv::bilateralFilter(sized, dst, 17, 75, 75);
|
||||
cv::GaussianBlur(sized, dst, cv::Size(17, 17), 0);
|
||||
//cv::bilateralFilter(sized, dst, 17, 75, 75);
|
||||
}
|
||||
else {
|
||||
int ksize = (blur / 2) * 2 + 1;
|
||||
cv::Size kernel_size = cv::Size(ksize, ksize);
|
||||
//cv::GaussianBlur(sized, dst, kernel_size, 0);
|
||||
cv::GaussianBlur(sized, dst, kernel_size, 0);
|
||||
//cv::medianBlur(sized, dst, ksize);
|
||||
cv::bilateralFilter(sized, dst, ksize, 75, 75);
|
||||
//cv::bilateralFilter(sized, dst, ksize, 75, 75);
|
||||
|
||||
// sharpen
|
||||
//cv::Mat img_tmp;
|
||||
|
@ -1274,7 +1274,9 @@ extern "C" image blur_image(image src_img, int ksize)
|
|||
{
|
||||
cv::Mat src = image_to_mat(src_img);
|
||||
cv::Mat dst;
|
||||
cv::bilateralFilter(src, dst, ksize, 75, 75);
|
||||
cv::Size kernel_size = cv::Size(ksize, ksize);
|
||||
cv::GaussianBlur(src, dst, kernel_size, 0);
|
||||
//cv::bilateralFilter(src, dst, ksize, 75, 75);
|
||||
image dst_img = mat_to_image(dst);
|
||||
return dst_img;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue