mirror of https://github.com/AlexeyAB/darknet.git
Added flag -benchmark_layers for: classifier demo, detector test, detector train
This commit is contained in:
parent
987448cad8
commit
2c08a20b22
|
@ -609,6 +609,7 @@ typedef struct network {
|
|||
layer *layers;
|
||||
float *output;
|
||||
learning_rate_policy policy;
|
||||
int benchmark_layers;
|
||||
|
||||
float learning_rate;
|
||||
float learning_rate_min;
|
||||
|
@ -893,9 +894,9 @@ LIB_API void reset_rnn(network *net);
|
|||
LIB_API float *network_predict_image(network *net, image im);
|
||||
LIB_API float *network_predict_image_letterbox(network *net, image im);
|
||||
LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, const int map_points, int letter_box, network *existing_net);
|
||||
LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs);
|
||||
LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers);
|
||||
LIB_API void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
|
||||
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box);
|
||||
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers);
|
||||
LIB_API int network_width(network *net);
|
||||
LIB_API int network_height(network *net);
|
||||
LIB_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm);
|
||||
|
|
|
@ -1187,7 +1187,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
|
|||
#endif
|
||||
}
|
||||
|
||||
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int benchmark)
|
||||
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int benchmark, int benchmark_layers)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
printf("Classifier Demo\n");
|
||||
|
@ -1195,6 +1195,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||
if(weightfile){
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
net.benchmark_layers = benchmark_layers;
|
||||
set_batch_network(&net, 1);
|
||||
list *options = read_data_cfg(datacfg);
|
||||
|
||||
|
@ -1329,6 +1330,8 @@ void run_classifier(int argc, char **argv)
|
|||
|
||||
int dont_show = find_arg(argc, argv, "-dont_show");
|
||||
int benchmark = find_arg(argc, argv, "-benchmark");
|
||||
int benchmark_layers = find_arg(argc, argv, "-benchmark_layers");
|
||||
if (benchmark_layers) benchmark = 1;
|
||||
int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv");
|
||||
int show_imgs = find_arg(argc, argv, "-show_imgs");
|
||||
int calc_topk = find_arg(argc, argv, "-topk");
|
||||
|
@ -1344,7 +1347,7 @@ void run_classifier(int argc, char **argv)
|
|||
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top);
|
||||
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
|
||||
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename, benchmark);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename, benchmark, benchmark_layers);
|
||||
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
|
||||
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
|
||||
|
|
|
@ -476,7 +476,7 @@ int main(int argc, char **argv)
|
|||
float thresh = find_float_arg(argc, argv, "-thresh", .24);
|
||||
int ext_output = find_arg(argc, argv, "-ext_output");
|
||||
char *filename = (argc > 4) ? argv[4]: 0;
|
||||
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0);
|
||||
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0, 0);
|
||||
} else if (0 == strcmp(argv[1], "cifar")){
|
||||
run_cifar(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "go")){
|
||||
|
|
|
@ -22,7 +22,7 @@ int check_mistakes = 0;
|
|||
|
||||
static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 };
|
||||
|
||||
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs)
|
||||
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers)
|
||||
{
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *train_images = option_find_str(options, "train", "data/train.txt");
|
||||
|
@ -73,6 +73,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
cuda_set_device(gpus[i]);
|
||||
#endif
|
||||
nets[i] = parse_network_cfg(cfgfile);
|
||||
nets[i].benchmark_layers = benchmark_layers;
|
||||
if (weightfile) {
|
||||
load_weights(&nets[i], weightfile);
|
||||
}
|
||||
|
@ -1432,7 +1433,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
|
|||
|
||||
|
||||
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
|
||||
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box)
|
||||
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers)
|
||||
{
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *name_list = option_find_str(options, "names", "data/names.list");
|
||||
|
@ -1444,6 +1445,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
net.benchmark_layers = benchmark_layers;
|
||||
fuse_conv_batchnorm(net);
|
||||
calculate_binary_weights(net);
|
||||
if (net.layers[net.n - 1].classes != names_size) {
|
||||
|
@ -1587,6 +1589,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||
void run_detector(int argc, char **argv)
|
||||
{
|
||||
int dont_show = find_arg(argc, argv, "-dont_show");
|
||||
int benchmark_layers = find_arg(argc, argv, "-benchmark_layers");
|
||||
int show = find_arg(argc, argv, "-show");
|
||||
int letter_box = find_arg(argc, argv, "-letter_box");
|
||||
int calc_map = find_arg(argc, argv, "-map");
|
||||
|
@ -1649,8 +1652,8 @@ void run_detector(int argc, char **argv)
|
|||
if (strlen(weights) > 0)
|
||||
if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
|
||||
char *filename = (argc > 6) ? argv[6] : 0;
|
||||
if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box);
|
||||
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port, show_imgs);
|
||||
if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box, benchmark_layers);
|
||||
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port, show_imgs, benchmark_layers);
|
||||
else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
|
||||
else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, map_points, letter_box, NULL);
|
||||
|
|
|
@ -42,9 +42,34 @@ float * get_network_output_gpu_layer(network net, int i);
|
|||
float * get_network_delta_gpu_layer(network net, int i);
|
||||
float * get_network_output_gpu(network net);
|
||||
|
||||
typedef struct time_benchmark_layers {
|
||||
float time;
|
||||
int layer_id, layer_type;
|
||||
} time_benchmark_layers;
|
||||
|
||||
int time_comparator(const void *pa, const void *pb)
|
||||
{
|
||||
time_benchmark_layers a = *(time_benchmark_layers *)pa;
|
||||
time_benchmark_layers b = *(time_benchmark_layers *)pb;
|
||||
float diff = a.time - b.time;
|
||||
if (diff < 0) return 1;
|
||||
else if (diff > 0) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void forward_network_gpu(network net, network_state state)
|
||||
{
|
||||
//cudaDeviceSynchronize();
|
||||
static time_benchmark_layers *avg_time_per_layer = NULL;
|
||||
static time_benchmark_layers *sorted_avg_time_per_layer = NULL;
|
||||
double start_time, end_time;
|
||||
if (net.benchmark_layers) {
|
||||
if (!avg_time_per_layer) {
|
||||
avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers));
|
||||
sorted_avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers));
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
//printf("\n");
|
||||
state.workspace = net.workspace;
|
||||
int i;
|
||||
|
@ -54,11 +79,28 @@ void forward_network_gpu(network net, network_state state)
|
|||
if(l.delta_gpu && state.train){
|
||||
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
|
||||
}
|
||||
//printf("\n layer %d - type: %d - \n", i, l.type);
|
||||
//start_timer();
|
||||
|
||||
if (net.benchmark_layers) {
|
||||
start_time = get_time_point();
|
||||
}
|
||||
|
||||
l.forward_gpu(l, state);
|
||||
//CHECK_CUDA(cudaDeviceSynchronize());
|
||||
//stop_timer_and_show();
|
||||
|
||||
if (net.benchmark_layers) {
|
||||
CHECK_CUDA(cudaDeviceSynchronize());
|
||||
end_time = get_time_point();
|
||||
const double took_time = (end_time - start_time) / 1000;
|
||||
const double alpha = 0.9;
|
||||
if (avg_time_per_layer[i].time == 0) {
|
||||
avg_time_per_layer[i].layer_id = i;
|
||||
avg_time_per_layer[i].layer_type = l.type;
|
||||
avg_time_per_layer[i].time = took_time;
|
||||
}
|
||||
else avg_time_per_layer[i].time = avg_time_per_layer[i].time * alpha + took_time * (1 - alpha);
|
||||
|
||||
sorted_avg_time_per_layer[i] = avg_time_per_layer[i];
|
||||
printf("\n layer %d - type: %d - %lf ms - avg_time %lf ms \n", i, l.type, took_time, avg_time_per_layer[i].time);
|
||||
}
|
||||
|
||||
if(net.wait_stream)
|
||||
cudaStreamSynchronize(get_cuda_stream());
|
||||
|
@ -83,9 +125,18 @@ void forward_network_gpu(network net, network_state state)
|
|||
}
|
||||
*/
|
||||
}
|
||||
|
||||
if (net.benchmark_layers) {
|
||||
printf("\n\nSorted by time:\n");
|
||||
qsort(sorted_avg_time_per_layer, net.n, sizeof(time_benchmark_layers), time_comparator);
|
||||
for (i = 0; i < net.n; ++i) {
|
||||
//printf("\layer %d - type: %d - avg_time %lf ms \n", avg_time_per_layer[i].layer_id, avg_time_per_layer[i].layer_type, avg_time_per_layer[i].time);
|
||||
printf("\%d - layer %d - type: %d - avg_time %lf ms \n", i, sorted_avg_time_per_layer[i].layer_id, sorted_avg_time_per_layer[i].layer_type, sorted_avg_time_per_layer[i].time);
|
||||
}
|
||||
}
|
||||
|
||||
//cudaStreamSynchronize(get_cuda_stream()); // sync CUDA-functions
|
||||
//cudaDeviceSynchronize();
|
||||
//show_total_time();
|
||||
}
|
||||
|
||||
void backward_network_gpu(network net, network_state state)
|
||||
|
|
Loading…
Reference in New Issue