From d669680879f72e58a5bc4d8de98c2e3c0aab0b62 Mon Sep 17 00:00:00 2001 From: Sergey Nuzhny Date: Sun, 18 Jul 2021 18:58:01 +0300 Subject: [PATCH] Add batch inference on C++ (#7915) * Add batch inference on C++ * Return default params * Add make_nms parameter --- include/yolo_v2_class.hpp | 5 ++- src/yolo_v2_class.cpp | 81 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 8 deletions(-) diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp index 360c3320..1d70a2c2 100644 --- a/include/yolo_v2_class.hpp +++ b/include/yolo_v2_class.hpp @@ -56,7 +56,7 @@ struct bbox_t_container { #include // C #endif -extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu); +extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu, int batch_size); extern "C" LIB_API int detect_image(const char *filename, bbox_t_container &container); extern "C" LIB_API int detect_mat(const uint8_t* data, const size_t data_length, bbox_t_container &container); extern "C" LIB_API int dispose(); @@ -76,11 +76,12 @@ public: float nms = .4; bool wait_stream; - LIB_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); + LIB_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0, int batch_size = 1); LIB_API ~Detector(); LIB_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); LIB_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); + LIB_API std::vector> detectBatch(image_t img, int batch_size, int width, int height, float thresh, bool make_nms = true); static LIB_API image_t load_image(std::string image_filename); static LIB_API void free_image(image_t m); LIB_API int get_net_width() const; diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 93812e7f..f7c1ea69 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -27,9 +27,9 @@ extern "C" { //static Detector* detector = NULL; static std::unique_ptr detector; -int init(const char *configurationFilename, const char *weightsFilename, int gpu) +int init(const char *configurationFilename, const char *weightsFilename, int gpu, int batch_size) { - detector.reset(new Detector(configurationFilename, weightsFilename, gpu)); + detector.reset(new Detector(configurationFilename, weightsFilename, gpu, batch_size)); return 1; } @@ -127,7 +127,8 @@ struct detector_gpu_t { unsigned int *track_id; }; -LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id) +LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id, int batch_size) + : cur_gpu_id(gpu_id) { wait_stream = 0; #ifdef GPU @@ -153,11 +154,11 @@ LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename char *cfgfile = const_cast(_cfg_filename.c_str()); char *weightfile = const_cast(_weight_filename.c_str()); - net = parse_network_cfg_custom(cfgfile, 1, 1); + net = parse_network_cfg_custom(cfgfile, batch_size, batch_size); if (weightfile) { load_weights(&net, weightfile); } - set_batch_network(&net, 1); + set_batch_network(&net, batch_size); net.gpu_index = cur_gpu_id; fuse_conv_batchnorm(net); @@ -354,6 +355,74 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use return bbox_vec; } +LIB_API std::vector> Detector::detectBatch(image_t img, int batch_size, int width, int height, float thresh, bool make_nms) +{ + detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); + network &net = detector_gpu.net; +#ifdef GPU + int old_gpu_index; + cudaGetDevice(&old_gpu_index); + if(cur_gpu_id != old_gpu_index) + cudaSetDevice(net.gpu_index); + + net.wait_stream = wait_stream; // 1 - wait CUDA-stream, 0 - not to wait +#endif + //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; + + layer l = net.layers[net.n - 1]; + + float hier_thresh = 0.5; + image in_img; + in_img.c = img.c; + in_img.w = img.w; + in_img.h = img.h; + in_img.data = img.data; + det_num_pair* prediction = network_predict_batch(&net, in_img, batch_size, width, height, thresh, hier_thresh, 0, 0, 0); + + std::vector> bbox_vec(batch_size); + + for (int bi = 0; bi < batch_size; ++bi) + { + auto dets = prediction[bi].dets; + + if (make_nms && nms) + do_nms_sort(dets, prediction[bi].num, l.classes, nms); + + for (int i = 0; i < prediction[bi].num; ++i) + { + box b = dets[i].bbox; + int const obj_id = max_index(dets[i].prob, l.classes); + float const prob = dets[i].prob[obj_id]; + + if (prob > thresh) + { + bbox_t bbox; + bbox.x = std::max((double)0, (b.x - b.w / 2.)); + bbox.y = std::max((double)0, (b.y - b.h / 2.)); + bbox.w = b.w; + bbox.h = b.h; + bbox.obj_id = obj_id; + bbox.prob = prob; + bbox.track_id = 0; + bbox.frames_counter = 0; + bbox.x_3d = NAN; + bbox.y_3d = NAN; + bbox.z_3d = NAN; + + bbox_vec[bi].push_back(bbox); + } + } + } + free_batch_detections(prediction, batch_size); + +#ifdef GPU + if (cur_gpu_id != old_gpu_index) + cudaSetDevice(old_gpu_index); +#endif + + return bbox_vec; +} + LIB_API std::vector Detector::tracking_id(std::vector cur_bbox_vec, bool const change_history, int const frames_story, int const max_dist) { @@ -430,4 +499,4 @@ void *Detector::get_cuda_context() #else // GPU return NULL; #endif // GPU -} \ No newline at end of file +}