Added flag -benchmark_layers for: classifier demo, detector test, detector train

This commit is contained in:
AlexeyAB 2019-12-28 23:03:30 +03:00
parent 987448cad8
commit 2c08a20b22
5 changed files with 73 additions and 15 deletions

View File

@ -609,6 +609,7 @@ typedef struct network {
layer *layers;
float *output;
learning_rate_policy policy;
int benchmark_layers;
float learning_rate;
float learning_rate_min;
@ -893,9 +894,9 @@ LIB_API void reset_rnn(network *net);
LIB_API float *network_predict_image(network *net, image im);
LIB_API float *network_predict_image_letterbox(network *net, image im);
LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, const int map_points, int letter_box, network *existing_net);
LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs);
LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers);
LIB_API void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box);
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers);
LIB_API int network_width(network *net);
LIB_API int network_height(network *net);
LIB_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm);

View File

@ -1187,7 +1187,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
#endif
}
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int benchmark)
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int benchmark, int benchmark_layers)
{
#ifdef OPENCV
printf("Classifier Demo\n");
@ -1195,6 +1195,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
if(weightfile){
load_weights(&net, weightfile);
}
net.benchmark_layers = benchmark_layers;
set_batch_network(&net, 1);
list *options = read_data_cfg(datacfg);
@ -1329,6 +1330,8 @@ void run_classifier(int argc, char **argv)
int dont_show = find_arg(argc, argv, "-dont_show");
int benchmark = find_arg(argc, argv, "-benchmark");
int benchmark_layers = find_arg(argc, argv, "-benchmark_layers");
if (benchmark_layers) benchmark = 1;
int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv");
int show_imgs = find_arg(argc, argv, "-show_imgs");
int calc_topk = find_arg(argc, argv, "-topk");
@ -1344,7 +1347,7 @@ void run_classifier(int argc, char **argv)
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top);
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs);
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename, benchmark);
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename, benchmark, benchmark_layers);
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);

View File

@ -476,7 +476,7 @@ int main(int argc, char **argv)
float thresh = find_float_arg(argc, argv, "-thresh", .24);
int ext_output = find_arg(argc, argv, "-ext_output");
char *filename = (argc > 4) ? argv[4]: 0;
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0);
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0, 0);
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){

View File

@ -22,7 +22,7 @@ int check_mistakes = 0;
static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 };
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs)
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers)
{
list *options = read_data_cfg(datacfg);
char *train_images = option_find_str(options, "train", "data/train.txt");
@ -73,6 +73,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
cuda_set_device(gpus[i]);
#endif
nets[i] = parse_network_cfg(cfgfile);
nets[i].benchmark_layers = benchmark_layers;
if (weightfile) {
load_weights(&nets[i], weightfile);
}
@ -1432,7 +1433,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box)
float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers)
{
list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", "data/names.list");
@ -1444,6 +1445,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
if (weightfile) {
load_weights(&net, weightfile);
}
net.benchmark_layers = benchmark_layers;
fuse_conv_batchnorm(net);
calculate_binary_weights(net);
if (net.layers[net.n - 1].classes != names_size) {
@ -1587,6 +1589,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
void run_detector(int argc, char **argv)
{
int dont_show = find_arg(argc, argv, "-dont_show");
int benchmark_layers = find_arg(argc, argv, "-benchmark_layers");
int show = find_arg(argc, argv, "-show");
int letter_box = find_arg(argc, argv, "-letter_box");
int calc_map = find_arg(argc, argv, "-map");
@ -1649,8 +1652,8 @@ void run_detector(int argc, char **argv)
if (strlen(weights) > 0)
if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
char *filename = (argc > 6) ? argv[6] : 0;
if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box);
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port, show_imgs);
if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box, benchmark_layers);
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port, show_imgs, benchmark_layers);
else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);
else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, map_points, letter_box, NULL);

View File

@ -42,9 +42,34 @@ float * get_network_output_gpu_layer(network net, int i);
float * get_network_delta_gpu_layer(network net, int i);
float * get_network_output_gpu(network net);
typedef struct time_benchmark_layers {
float time;
int layer_id, layer_type;
} time_benchmark_layers;
int time_comparator(const void *pa, const void *pb)
{
time_benchmark_layers a = *(time_benchmark_layers *)pa;
time_benchmark_layers b = *(time_benchmark_layers *)pb;
float diff = a.time - b.time;
if (diff < 0) return 1;
else if (diff > 0) return -1;
return 0;
}
void forward_network_gpu(network net, network_state state)
{
//cudaDeviceSynchronize();
static time_benchmark_layers *avg_time_per_layer = NULL;
static time_benchmark_layers *sorted_avg_time_per_layer = NULL;
double start_time, end_time;
if (net.benchmark_layers) {
if (!avg_time_per_layer) {
avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers));
sorted_avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers));
}
cudaDeviceSynchronize();
}
//printf("\n");
state.workspace = net.workspace;
int i;
@ -54,11 +79,28 @@ void forward_network_gpu(network net, network_state state)
if(l.delta_gpu && state.train){
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
}
//printf("\n layer %d - type: %d - \n", i, l.type);
//start_timer();
if (net.benchmark_layers) {
start_time = get_time_point();
}
l.forward_gpu(l, state);
//CHECK_CUDA(cudaDeviceSynchronize());
//stop_timer_and_show();
if (net.benchmark_layers) {
CHECK_CUDA(cudaDeviceSynchronize());
end_time = get_time_point();
const double took_time = (end_time - start_time) / 1000;
const double alpha = 0.9;
if (avg_time_per_layer[i].time == 0) {
avg_time_per_layer[i].layer_id = i;
avg_time_per_layer[i].layer_type = l.type;
avg_time_per_layer[i].time = took_time;
}
else avg_time_per_layer[i].time = avg_time_per_layer[i].time * alpha + took_time * (1 - alpha);
sorted_avg_time_per_layer[i] = avg_time_per_layer[i];
printf("\n layer %d - type: %d - %lf ms - avg_time %lf ms \n", i, l.type, took_time, avg_time_per_layer[i].time);
}
if(net.wait_stream)
cudaStreamSynchronize(get_cuda_stream());
@ -83,9 +125,18 @@ void forward_network_gpu(network net, network_state state)
}
*/
}
if (net.benchmark_layers) {
printf("\n\nSorted by time:\n");
qsort(sorted_avg_time_per_layer, net.n, sizeof(time_benchmark_layers), time_comparator);
for (i = 0; i < net.n; ++i) {
//printf("\layer %d - type: %d - avg_time %lf ms \n", avg_time_per_layer[i].layer_id, avg_time_per_layer[i].layer_type, avg_time_per_layer[i].time);
printf("\%d - layer %d - type: %d - avg_time %lf ms \n", i, sorted_avg_time_per_layer[i].layer_id, sorted_avg_time_per_layer[i].layer_type, sorted_avg_time_per_layer[i].time);
}
}
//cudaStreamSynchronize(get_cuda_stream()); // sync CUDA-functions
//cudaDeviceSynchronize();
//show_total_time();
}
void backward_network_gpu(network net, network_state state)