Added command line param -dontuse_opencv for training Classifier. Also use GaussianBlur instead of bilateralFilter for blur=1 for training Classifier and Detector.

This commit is contained in:
AlexeyAB 2019-12-28 18:44:32 +03:00
parent b8605bda1e
commit 114a7f942b
8 changed files with 43 additions and 20 deletions

View File

@ -822,6 +822,7 @@ typedef struct load_args {
int augment_speed;
int letter_box;
int show_imgs;
int dontuse_opencv;
float jitter;
int flip;
int blur;

View File

@ -2,6 +2,8 @@
### Datasets:
25 thousand datasets on Kaggle: https://www.kaggle.com/datasets
BDD100K - Diverse Driving Video: https://bair.berkeley.edu/blog/2018/05/30/bdd/
Pascal VOC: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html

View File

@ -27,7 +27,7 @@ float *get_regression_values(char **labels, int n)
return v;
}
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int mjpeg_port, int calc_topk, int show_imgs)
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dontuse_opencv, int dont_show, int mjpeg_port, int calc_topk, int show_imgs)
{
int i;
@ -81,6 +81,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
args.threads = 32;
args.hierarchy = net.hierarchy;
args.dontuse_opencv = dontuse_opencv;
args.min = net.min_crop;
args.max = net.max_crop;
args.flip = net.flip;
@ -112,7 +113,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
int img_size = 1000;
char windows_name[100];
sprintf(windows_name, "average loss (id:%d)", random_gen());
img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show);
if (!dontuse_opencv) img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show);
#endif //OPENCV
data train;
@ -176,7 +177,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
#ifdef OPENCV
draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port);
if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port);
#endif // OPENCV
if (i >= (iter_save + 1000)) {
@ -1298,6 +1299,7 @@ void run_classifier(int argc, char **argv)
}
int dont_show = find_arg(argc, argv, "-dont_show");
int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv");
int show_imgs = find_arg(argc, argv, "-show_imgs");
int calc_topk = find_arg(argc, argv, "-topk");
int cam_index = find_int_arg(argc, argv, "-c", 0);
@ -1311,7 +1313,7 @@ void run_classifier(int argc, char **argv)
int layer = layer_s ? atoi(layer_s) : -1;
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top);
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dont_show, mjpeg_port, calc_topk, show_imgs);
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs);
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);

View File

@ -142,7 +142,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
return X;
}
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure)
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv)
{
int i;
matrix X;
@ -152,7 +152,10 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
for(i = 0; i < n; ++i){
int size = w > h ? w : h;
image im = load_image_color(paths[i], 0, 0);
image im;
if(dontuse_opencv) im = load_image_stb_resize(paths[i], 0, 0, 3);
else im = load_image_color(paths[i], 0, 0);
image crop = random_augment_image(im, angle, aspect, min, max, size);
int flip = use_flip ? random_gen() % 2 : 0;
if (flip)
@ -1362,7 +1365,7 @@ void *load_thread(void *ptr)
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps);
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps, a.dontuse_opencv);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){
@ -1507,20 +1510,20 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
return d;
}
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps)
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv)
{
char **paths_stored = paths;
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d.y = load_labels_paths(paths, n, labels, k, hierarchy, label_smooth_eps);
if (use_mixup && rand_int(0, 1)) {
char **paths_mix = get_random_paths(paths_stored, n, m);
data d2 = { 0 };
d2.shallow = 0;
d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d2.y = load_labels_paths(paths_mix, n, labels, k, hierarchy, label_smooth_eps);
free(paths_mix);
@ -1530,12 +1533,12 @@ data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *h
d4.shallow = 0;
if (use_mixup >= 3) {
char **paths_mix3 = get_random_paths(paths_stored, n, m);
d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d3.y = load_labels_paths(paths_mix3, n, labels, k, hierarchy, label_smooth_eps);
free(paths_mix3);
char **paths_mix4 = get_random_paths(paths_stored, n, m);
d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d4.y = load_labels_paths(paths_mix4, n, labels, k, hierarchy, label_smooth_eps);
free(paths_mix4);
}
@ -1706,7 +1709,7 @@ data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int
d.w = w;
d.h = h;
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure);
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, 0);
d.y = load_tags_paths(paths, n, k);
if(m) free(paths);
return d;

View File

@ -89,9 +89,9 @@ data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup,
float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps);
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv);
data load_go(char *filename);
box_label *read_boxes(char *filename, int *n);

View File

@ -1419,6 +1419,18 @@ image load_image_stb(char *filename, int channels)
return im;
}
image load_image_stb_resize(char *filename, int w, int h, int c)
{
image out = load_image_stb(filename, c); // without OpenCV
if ((h && w) && (h != out.h || w != out.w)) {
image resized = resize_image(out, w, h);
free_image(out);
out = resized;
}
return out;
}
image load_image(char *filename, int w, int h, int c)
{
#ifdef OPENCV

View File

@ -83,6 +83,7 @@ image float_to_image_scaled(int w, int h, int c, float *data);
image float_to_image(int w, int h, int c, float *data);
image copy_image(image p);
image load_image(char *filename, int w, int h, int c);
image load_image_stb_resize(char *filename, int w, int h, int c);
//LIB_API image load_image_color(char *filename, int w, int h);
image **load_alphabet();

View File

@ -1210,15 +1210,15 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
if (blur) {
cv::Mat dst(sized.size(), sized.type());
if (blur == 1) {
//cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0);
cv::bilateralFilter(sized, dst, 17, 75, 75);
cv::GaussianBlur(sized, dst, cv::Size(17, 17), 0);
//cv::bilateralFilter(sized, dst, 17, 75, 75);
}
else {
int ksize = (blur / 2) * 2 + 1;
cv::Size kernel_size = cv::Size(ksize, ksize);
//cv::GaussianBlur(sized, dst, kernel_size, 0);
cv::GaussianBlur(sized, dst, kernel_size, 0);
//cv::medianBlur(sized, dst, ksize);
cv::bilateralFilter(sized, dst, ksize, 75, 75);
//cv::bilateralFilter(sized, dst, ksize, 75, 75);
// sharpen
//cv::Mat img_tmp;
@ -1274,7 +1274,9 @@ extern "C" image blur_image(image src_img, int ksize)
{
cv::Mat src = image_to_mat(src_img);
cv::Mat dst;
cv::bilateralFilter(src, dst, ksize, 75, 75);
cv::Size kernel_size = cv::Size(ksize, ksize);
cv::GaussianBlur(src, dst, kernel_size, 0);
//cv::bilateralFilter(src, dst, ksize, 75, 75);
image dst_img = mat_to_image(dst);
return dst_img;
}