From 9db618329a1a4786ead73fab29d46dbb7fb58430 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Mon, 20 Jul 2015 14:56:53 -0700 Subject: [PATCH] changes to detection --- Makefile | 4 +-- cfg/alexnet.cfg | 1 - cfg/jnet-conv.cfg | 1 - cfg/strided.cfg | 1 - cfg/vgg-16.cfg | 1 - cfg/vgg-conv.cfg | 1 - cfg/yolo-small.cfg | 1 - cfg/yolo.cfg | 7 +++-- src/data.c | 2 +- src/detection.c | 64 ++++++++++++++++++++++------------------------ src/parser.c | 3 +-- 11 files changed, 38 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 1b422777..32ff4c0a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -GPU=1 -OPENCV=1 +GPU=0 +OPENCV=0 DEBUG=0 ARCH= -arch=sm_52 diff --git a/cfg/alexnet.cfg b/cfg/alexnet.cfg index 65346f5e..8205c40d 100644 --- a/cfg/alexnet.cfg +++ b/cfg/alexnet.cfg @@ -7,7 +7,6 @@ channels=3 learning_rate=0.01 momentum=0.9 decay=0.0005 -seen=0 [crop] crop_height=224 diff --git a/cfg/jnet-conv.cfg b/cfg/jnet-conv.cfg index 64e7521c..056f82aa 100644 --- a/cfg/jnet-conv.cfg +++ b/cfg/jnet-conv.cfg @@ -7,7 +7,6 @@ channels=3 learning_rate=0.01 momentum=0.9 decay=0.0005 -seen=0 [convolutional] filters=32 diff --git a/cfg/strided.cfg b/cfg/strided.cfg index 31d8155b..6c6628ec 100644 --- a/cfg/strided.cfg +++ b/cfg/strided.cfg @@ -7,7 +7,6 @@ channels=3 learning_rate=0.01 momentum=0.9 decay=0.0005 -seen=0 [crop] crop_height=224 diff --git a/cfg/vgg-16.cfg b/cfg/vgg-16.cfg index 72133d9e..7b5b74ae 100644 --- a/cfg/vgg-16.cfg +++ b/cfg/vgg-16.cfg @@ -6,7 +6,6 @@ width=256 channels=3 learning_rate=0.00001 momentum=0.9 -seen=0 decay=0.0005 [crop] diff --git a/cfg/vgg-conv.cfg b/cfg/vgg-conv.cfg index ab0fb1e1..21e1d724 100644 --- a/cfg/vgg-conv.cfg +++ b/cfg/vgg-conv.cfg @@ -6,7 +6,6 @@ height=224 channels=3 learning_rate=0.00001 momentum=0.9 -seen=0 decay=0.0005 [convolutional] diff --git a/cfg/yolo-small.cfg b/cfg/yolo-small.cfg index e1be1ace..a8f001af 100644 --- a/cfg/yolo-small.cfg +++ b/cfg/yolo-small.cfg @@ -7,7 +7,6 @@ channels=3 learning_rate=0.01 momentum=0.9 decay=0.0005 -seen = 0 [crop] crop_width=448 diff --git a/cfg/yolo.cfg b/cfg/yolo.cfg index 40f87c45..eef0b695 100644 --- a/cfg/yolo.cfg +++ b/cfg/yolo.cfg @@ -7,7 +7,6 @@ channels=3 learning_rate=0.01 momentum=0.9 decay=0.0005 -seen = 0 [crop] crop_width=448 @@ -200,6 +199,6 @@ activation=logistic classes=20 coords=4 rescore=0 -joint=1 -objectness = 0 -background=0 +joint=0 +objectness=1 + diff --git a/src/data.c b/src/data.c index f6df50f5..982ef216 100644 --- a/src/data.c +++ b/src/data.c @@ -140,7 +140,7 @@ void randomize_boxes(box_label *b, int n) void fill_truth_detection(char *path, float *truth, int classes, int num_boxes, int flip, int background, float dx, float dy, float sx, float sy) { - char *labelpath = find_replace(path, "detection_images", "labels"); + char *labelpath = find_replace(path, "JPEGImages", "labels"); labelpath = find_replace(labelpath, ".jpg", ".txt"); labelpath = find_replace(labelpath, ".JPEG", ".txt"); int count = 0; diff --git a/src/detection.c b/src/detection.c index 94d37007..b57f597e 100644 --- a/src/detection.c +++ b/src/detection.c @@ -8,20 +8,22 @@ char *class_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; -void draw_detection(image im, float *box, int side, char *label) +void draw_detection(image im, float *box, int side, int objectness, char *label) { int classes = 20; - int elems = 4+classes; + int elems = 4+classes+objectness; int j; int r, c; for(r = 0; r < side; ++r){ for(c = 0; c < side; ++c){ j = (r*side + c) * elems; + float scale = 1; + if(objectness) scale = 1 - box[j++]; int class = max_index(box+j, classes); - if(box[j+class] > 0.2){ + if(scale * box[j+class] > 0.2){ int width = box[j+class]*5 + 1; - printf("%f %s\n", box[j+class], class_names[class]); + printf("%f %s\n", scale * box[j+class], class_names[class]); float red = get_color(0,class,classes); float green = get_color(1,class,classes); float blue = get_color(2,class,classes); @@ -51,7 +53,6 @@ void train_detection(char *cfgfile, char *weightfile) { srand(time(0)); data_seed = time(0); - int imgnet = 0; char *base = basecfg(cfgfile); printf("%s\n", base); float avg_loss = -1; @@ -66,49 +67,45 @@ void train_detection(char *cfgfile, char *weightfile) data train, buffer; int classes = layer.classes; - int background = (layer.background || layer.objectness); - printf("%d\n", background); + int background = layer.objectness; int side = sqrt(get_detection_layer_locations(layer)); char **paths; - list *plist; - if (imgnet){ - plist = get_paths("/home/pjreddie/data/imagenet/det.train.list"); - }else{ - //plist = get_paths("/home/pjreddie/data/voc/no_2012_val.txt"); - //plist = get_paths("/home/pjreddie/data/voc/no_2007_test.txt"); - //plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); - //plist = get_paths("/home/pjreddie/data/voc/no_2007_test.txt"); - //plist = get_paths("/home/pjreddie/data/coco/trainval.txt"); - plist = get_paths("/home/pjreddie/data/voc/all2007-2012.txt"); - } + list *plist = get_paths("/home/pjreddie/data/voc/test/train.txt"); + int N = plist->size; + paths = (char **)list_to_array(plist); pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); clock_t time; - while(1){ + while(i*imgs < N*120){ i += 1; time=clock(); pthread_join(load_thread, 0); train = buffer; load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); -/* - image im = float_to_image(net.w, net.h, 3, train.X.vals[114]); - image copy = copy_image(im); - draw_detection(copy, train.y.vals[114], 7, "truth"); - cvWaitKey(0); - free_image(copy); - */ - printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); float loss = train_network(net, train); net.seen += imgs; if (avg_loss < 0) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); - if(i == 100){ + if((i-1)*imgs <= N && i*imgs > N){ + fprintf(stderr, "Starting second stage...\n"); net.learning_rate *= 10; + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_first_stage.weights", base); + save_weights(net, buff); + } + if((i-1)*imgs <= 80*N && i*imgs > N*80){ + fprintf(stderr, "Second stage done.\n"); + net.learning_rate *= .1; + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_second_stage.weights", base); + save_weights(net, buff); + return; } if(i%1000==0){ char buff[256]; @@ -117,6 +114,9 @@ void train_detection(char *cfgfile, char *weightfile) } free_data(train); } + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_final.weights",base); + save_weights(net, buff); } void convert_detections(float *predictions, int classes, int objectness, int background, int num_boxes, int w, int h, float thresh, float **probs, box *boxes) @@ -174,7 +174,7 @@ void print_detections(FILE **fps, char *id, box *boxes, float **probs, int num_b if (ymin < 0) ymin = 0; if (xmax > w) xmax = w; if (ymax > h) ymax = h; - + for(j = 0; j < classes; ++j){ if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j], xmin, ymin, xmax, ymax); @@ -267,8 +267,6 @@ void test_detection(char *cfgfile, char *weightfile, char *filename) load_weights(&net, weightfile); } detection_layer layer = get_network_detection_layer(net); - if (!layer.joint) fprintf(stderr, "Detection layer should use joint prediction to draw correctly.\n"); - int im_size = 448; set_batch_network(&net, 1); srand(2222222); clock_t time; @@ -283,12 +281,12 @@ void test_detection(char *cfgfile, char *weightfile, char *filename) strtok(input, "\n"); } image im = load_image_color(input,0,0); - image sized = resize_image(im, im_size, im_size); + image sized = resize_image(im, net.w, net.h); float *X = sized.data; time=clock(); float *predictions = network_predict(net, X); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - draw_detection(im, predictions, 7, "predictions"); + draw_detection(im, predictions, 7, layer.objectness, "predictions"); free_image(im); free_image(sized); #ifdef OPENCV diff --git a/src/parser.c b/src/parser.c index 96707152..2a82ee70 100644 --- a/src/parser.c +++ b/src/parser.c @@ -167,7 +167,7 @@ detection_layer parse_detection(list *options, size_params params) int rescore = option_find_int(options, "rescore", 0); int joint = option_find_int(options, "joint", 0); int objectness = option_find_int(options, "objectness", 0); - int background = option_find_int(options, "background", 0); + int background = 0; detection_layer layer = make_detection_layer(params.batch, params.inputs, classes, coords, joint, rescore, background, objectness); return layer; } @@ -295,7 +295,6 @@ void parse_net_options(list *options, network *net) net->learning_rate = option_find_float(options, "learning_rate", .001); net->momentum = option_find_float(options, "momentum", .9); net->decay = option_find_float(options, "decay", .0001); - net->seen = option_find_int(options, "seen",0); int subdivs = option_find_int(options, "subdivisions",1); net->batch /= subdivs; net->subdivisions = subdivs;