From e63b3a6f912cc2b1f6f00f2a9d342624a06dc3a4 Mon Sep 17 00:00:00 2001 From: Joseph Redmon Date: Tue, 9 Jun 2015 11:17:46 -0700 Subject: [PATCH] syncing messed something up --- cfg/detection.cfg | 7 +++ cfg/rescore.cfg | 8 ++- src/data.c | 4 +- src/detection.c | 119 +++++++++++++++++++++++++++++++++++------ src/detection_layer.c | 5 +- src/imagenet.c | 2 +- src/network_kernels.cu | 4 +- src/writing.c | 73 +++++++++++++++++++++++++ 8 files changed, 195 insertions(+), 27 deletions(-) create mode 100644 src/writing.c diff --git a/cfg/detection.cfg b/cfg/detection.cfg index d08d2af5..3f3b50ed 100644 --- a/cfg/detection.cfg +++ b/cfg/detection.cfg @@ -178,6 +178,13 @@ pad=1 filters=1024 activation=ramp +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=ramp + [connected] output=4096 activation=ramp diff --git a/cfg/rescore.cfg b/cfg/rescore.cfg index 9024d532..954c1582 100644 --- a/cfg/rescore.cfg +++ b/cfg/rescore.cfg @@ -178,6 +178,13 @@ pad=1 filters=1024 activation=ramp +[convolutional] +size=3 +stride=1 +pad=1 +filters=1024 +activation=ramp + [connected] output=4096 activation=ramp @@ -195,4 +202,3 @@ coords=4 rescore=1 nuisance = 0 background=0 - diff --git a/src/data.c b/src/data.c index ca5f4a63..425d216c 100644 --- a/src/data.c +++ b/src/data.c @@ -527,11 +527,11 @@ pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, in data load_data_writing(char **paths, int n, int m, int w, int h) { if(m) paths = get_random_paths(paths, n, m); - char **replace_paths = find_replace_paths(paths, n, ".png", "label.png"); + char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); data d; d.shallow = 0; d.X = load_image_paths(paths, n, w, h); - d.y = load_image_paths_gray(replace_paths, n, w/4, h/4); + d.y = load_image_paths_gray(replace_paths, n, w/8, h/8); if(m) free(paths); int i; for(i = 0; i < n; ++i) free(replace_paths[i]); diff --git a/src/detection.c b/src/detection.c index c012848c..ccd5097c 100644 --- a/src/detection.c +++ b/src/detection.c @@ -21,7 +21,7 @@ void draw_detection(image im, float *box, int side, char *label) //printf("%d\n", j); //printf("Prob: %f\n", box[j]); int class = max_index(box+j, classes); - if(box[j+class] > .4){ + if(box[j+class] > .05){ //int z; //for(z = 0; z < classes; ++z) printf("%f %s\n", box[j+z], class_names[z]); printf("%f %s\n", box[j+class], class_names[class]); @@ -32,8 +32,8 @@ void draw_detection(image im, float *box, int side, char *label) //float maxheight = distance_from_edge(r, side); //float maxwidth = distance_from_edge(c, side); j += classes; - float y = box[j+0]; - float x = box[j+1]; + float x = box[j+0]; + float y = box[j+1]; x = (x+c)/side; y = (y+r)/side; float w = box[j+2]; //*maxwidth; @@ -257,10 +257,11 @@ void train_detection(char *cfgfile, char *weightfile) if (imgnet){ plist = get_paths("/home/pjreddie/data/imagenet/det.train.list"); }else{ - plist = get_paths("/home/pjreddie/data/voc/no_2012_val.txt"); + //plist = get_paths("/home/pjreddie/data/voc/no_2012_val.txt"); //plist = get_paths("/home/pjreddie/data/voc/no_2007_test.txt"); + //plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); //plist = get_paths("/home/pjreddie/data/coco/trainval.txt"); - //plist = get_paths("/home/pjreddie/data/voc/all2007-2012.txt"); + plist = get_paths("/home/pjreddie/data/voc/all2007-2012.txt"); } paths = (char **)list_to_array(plist); pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); @@ -272,12 +273,13 @@ void train_detection(char *cfgfile, char *weightfile) train = buffer; load_thread = load_data_detection_thread(imgs, paths, plist->size, classes, net.w, net.h, side, side, background, &buffer); - /* +/* image im = float_to_image(net.w, net.h, 3, train.X.vals[114]); image copy = copy_image(im); - draw_detection(copy, train.y.vals[114], 7); + draw_detection(copy, train.y.vals[114], 7, "truth"); + cvWaitKey(0); free_image(copy); - */ + */ printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock(); @@ -289,7 +291,7 @@ void train_detection(char *cfgfile, char *weightfile) if(i == 100){ net.learning_rate *= 10; } - if(i%100==0){ + if(i%1000==0){ char buff[256]; sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); save_weights(net, buff); @@ -313,8 +315,8 @@ void predict_detections(network net, data d, float threshold, int offset, int cl int ci = k+classes+background+nuisance; float x = (pred.vals[j][ci + 0] + col)/num_boxes; float y = (pred.vals[j][ci + 1] + row)/num_boxes; - float w = pred.vals[j][ci + 2]; //* distance_from_edge(row, num_boxes); - float h = pred.vals[j][ci + 3]; //* distance_from_edge(col, num_boxes); + float w = pred.vals[j][ci + 2]; // distance_from_edge(row, num_boxes); + float h = pred.vals[j][ci + 3]; // distance_from_edge(col, num_boxes); w = pow(w, 2); h = pow(h, 2); float prob = scale*pred.vals[j][k+class+background+nuisance]; @@ -337,7 +339,88 @@ void validate_detection(char *cfgfile, char *weightfile) srand(time(0)); //list *plist = get_paths("/home/pjreddie/data/voc/test_2007.txt"); - list *plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); + list *plist = get_paths("/home/pjreddie/data/voc/test.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/val.expanded.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); + char **paths = (char **)list_to_array(plist); + + int classes = layer.classes; + int nuisance = layer.nuisance; + int background = (layer.background && !nuisance); + int num_boxes = sqrt(get_detection_layer_locations(layer)); + + int per_box = 4+classes+background+nuisance; + int num_output = num_boxes*num_boxes*per_box; + + int m = plist->size; + int i = 0; + int splits = 100; + + int nthreads = 4; + int t; + data *val = calloc(nthreads, sizeof(data)); + data *buf = calloc(nthreads, sizeof(data)); + pthread_t *thr = calloc(nthreads, sizeof(data)); + + time_t start = time(0); + + for(t = 0; t < nthreads; ++t){ + int num = (i+1+t)*m/splits - (i+t)*m/splits; + char **part = paths+((i+t)*m/splits); + thr[t] = load_data_thread(part, num, 0, 0, num_output, net.w, net.h, &(buf[t])); + } + + //clock_t time; + for(i = nthreads; i <= splits; i += nthreads){ + //time=clock(); + for(t = 0; t < nthreads; ++t){ + pthread_join(thr[t], 0); + val[t] = buf[t]; + } + for(t = 0; t < nthreads && i < splits; ++t){ + int num = (i+1+t)*m/splits - (i+t)*m/splits; + char **part = paths+((i+t)*m/splits); + thr[t] = load_data_thread(part, num, 0, 0, num_output, net.w, net.h, &(buf[t])); + } + + //fprintf(stderr, "%d: Loaded: %lf seconds\n", i, sec(clock()-time)); + fprintf(stderr, "%d\n", i); + for(t = 0; t < nthreads; ++t){ + predict_detections(net, val[t], .001, (i-nthreads+t)*m/splits, classes, nuisance, background, num_boxes, per_box); + free_data(val[t]); + } + } + fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); +} + +void do_mask(network net, data d, int offset, int classes, int nuisance, int background, int num_boxes, int per_box) +{ + matrix pred = network_predict_data(net, d); + int j, k; + for(j = 0; j < pred.rows; ++j){ + printf("%d ", offset + j); + for(k = 0; k < pred.cols; k += per_box){ + float scale = 1.-pred.vals[j][k]; + printf("%f ", scale); + } + printf("\n"); + } + free_matrix(pred); +} + +void mask_detection(char *cfgfile, char *weightfile) +{ + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + detection_layer layer = get_network_detection_layer(net); + fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + srand(time(0)); + + list *plist = get_paths("/home/pjreddie/data/voc/test_2007.txt"); + //list *plist = get_paths("/home/pjreddie/data/voc/val_2012.txt"); //list *plist = get_paths("/home/pjreddie/data/voc/test.txt"); //list *plist = get_paths("/home/pjreddie/data/voc/val.expanded.txt"); //list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); @@ -381,7 +464,7 @@ void validate_detection(char *cfgfile, char *weightfile) fprintf(stderr, "%d: Loaded: %lf seconds\n", i, sec(clock()-time)); for(t = 0; t < nthreads; ++t){ - predict_detections(net, val[t], .01, (i-nthreads+t)*m/splits, classes, nuisance, background, num_boxes, per_box); + do_mask(net, val[t], (i-nthreads+t)*m/splits, classes, nuisance, background, num_boxes, per_box); free_data(val[t]); } time=clock(); @@ -526,14 +609,17 @@ void test_detection(char *cfgfile, char *weightfile) while(1){ fgets(filename, 256, stdin); strtok(filename, "\n"); - image im = load_image_color(filename, im_size, im_size); + image im = load_image_color(filename,0,0); + image sized = resize_image(im, im_size, im_size); printf("%d %d %d\n", im.h, im.w, im.c); - float *X = im.data; + float *X = sized.data; time=clock(); float *predictions = network_predict(net, X); printf("%s: Predicted in %f seconds.\n", filename, sec(clock()-time)); - draw_detection(im, predictions, 7, "detections"); + draw_detection(im, predictions, 7, "YOLO#SWAG#BLAZEIT"); free_image(im); + free_image(sized); + cvWaitKey(0); } } @@ -551,5 +637,6 @@ void run_detection(int argc, char **argv) else if(0==strcmp(argv[2], "teststuff")) train_detection_teststuff(cfg, weights); else if(0==strcmp(argv[2], "trainloc")) train_localization(cfg, weights); else if(0==strcmp(argv[2], "valid")) validate_detection(cfg, weights); + else if(0==strcmp(argv[2], "mask")) mask_detection(cfg, weights); else if(0==strcmp(argv[2], "validpost")) validate_detection_post(cfg, weights); } diff --git a/src/detection_layer.c b/src/detection_layer.c index ae5930fd..fcae7f31 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -372,15 +372,12 @@ void forward_detection_layer(const detection_layer l, network_state state) l.delta[j+1] = 4 * (state.truth[j+1] - l.output[j+1]); l.delta[j+2] = 4 * (state.truth[j+2] - l.output[j+2]); l.delta[j+3] = 4 * (state.truth[j+3] - l.output[j+3]); - if(1){ + if(0){ for (j = offset; j < offset+classes; ++j) { if(state.truth[j]) state.truth[j] = iou; l.delta[j] = state.truth[j] - l.output[j]; } } - - /* - */ } printf("Avg IOU: %f\n", avg_iou/count); } diff --git a/src/imagenet.c b/src/imagenet.c index 2e1b685b..9925a9ab 100644 --- a/src/imagenet.c +++ b/src/imagenet.c @@ -32,7 +32,7 @@ void train_imagenet(char *cfgfile, char *weightfile) pthread_join(load_thread, 0); train = buffer; -/* + /* image im = float_to_image(256, 256, 3, train.X.vals[114]); show_image(im, "training"); cvWaitKey(0); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 2ca2e2db..5e353aee 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -133,20 +133,18 @@ float train_network_datum_gpu(network net, float *x, float *y) float *get_network_output_layer_gpu(network net, int i) { layer l = net.layers[i]; + cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); if(l.type == CONVOLUTIONAL){ return l.output; } else if(l.type == DECONVOLUTIONAL){ return l.output; } else if(l.type == CONNECTED){ - cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); return l.output; } else if(l.type == DETECTION){ - cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); return l.output; } else if(l.type == MAXPOOL){ return l.output; } else if(l.type == SOFTMAX){ - pull_softmax_layer_output(l); return l.output; } return 0; diff --git a/src/writing.c b/src/writing.c new file mode 100644 index 00000000..1c1684bc --- /dev/null +++ b/src/writing.c @@ -0,0 +1,73 @@ +#include "network.h" +#include "utils.h" +#include "parser.h" + +void train_writing(char *cfgfile, char *weightfile) +{ + data_seed = time(0); + srand(time(0)); + float avg_loss = -1; + char *base = basecfg(cfgfile); + printf("%s\n", base); + network net = parse_network_cfg(cfgfile); + if(weightfile){ + load_weights(&net, weightfile); + } + printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); + int imgs = 1024; + int i = net.seen/imgs; + list *plist = get_paths("figures.list"); + char **paths = (char **)list_to_array(plist); + printf("%d\n", plist->size); + clock_t time; + while(1){ + ++i; + time=clock(); + data train = load_data_writing(paths, imgs, plist->size, 512, 512); + float loss = train_network(net, train); + #ifdef GPU + float *out = get_network_output_gpu(net); + #else + float *out = get_network_output(net); + #endif + image pred = float_to_image(64, 64, 1, out); + print_image(pred); + +/* + image im = float_to_image(256, 256, 3, train.X.vals[0]); + image lab = float_to_image(64, 64, 1, train.y.vals[0]); + image pred = float_to_image(64, 64, 1, out); + show_image(im, "image"); + show_image(lab, "label"); + print_image(lab); + show_image(pred, "pred"); + cvWaitKey(0); + */ + + net.seen += imgs; + if(avg_loss == -1) avg_loss = loss; + avg_loss = avg_loss*.9 + loss*.1; + printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); + free_data(train); + if((i % 20000) == 0) net.learning_rate *= .1; + //if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97; + if(i%1000==0){ + char buff[256]; + sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); + save_weights(net, buff); + } + } +} + +void run_writing(int argc, char **argv) +{ + if(argc < 4){ + fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); + return; + } + + char *cfg = argv[3]; + char *weights = (argc > 4) ? argv[4] : 0; + if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); +} +