diff --git a/build/darknet/yolo_console_dll.vcxproj b/build/darknet/yolo_console_dll.vcxproj index 176d70ba..064c94a1 100644 --- a/build/darknet/yolo_console_dll.vcxproj +++ b/build/darknet/yolo_console_dll.vcxproj @@ -118,6 +118,7 @@ C:\opencv_source\opencv\bin\install\include _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) Async + true true diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index 483e2589..2cdb3909 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -134,7 +134,7 @@ true true ..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + CUDNN;YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y CompileAsCpp diff --git a/src/cuda.c b/src/cuda.c index f168e4e2..d8db8514 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -96,6 +96,7 @@ cublasHandle_t blas_handle() int i = cuda_get_device(); if(!init[i]) { cublasCreate(&handle[i]); + cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream()); init[i] = 1; } return handle[i]; diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index 0e291a4c..0b475548 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -15,6 +15,7 @@ #endif #define TRACK_OPTFLOW + #include "yolo_v2_class.hpp" // imported functions from DLL #ifdef OPENCV @@ -93,8 +94,8 @@ int main(int argc, char *argv[]) std::string filename; if (argc > 1) filename = argv[1]; - Detector detector("cfg/yolo-voc.cfg", "yolo-voc.weights"); - //Detector detector("tiny-yolo-voc_air.cfg", "backup/tiny-yolo-voc_air_5000.weights"); + //Detector detector("cfg/yolo-voc.cfg", "yolo-voc.weights"); + Detector detector("tiny-yolo-voc_air.cfg", "backup/tiny-yolo-voc_air_5000.weights"); auto obj_names = objects_names_from_file("data/voc.names"); std::string out_videofile = "result.avi"; @@ -131,7 +132,7 @@ int main(int argc, char *argv[]) int current_det_fps = 0, current_cap_fps = 0; std::thread t_detect, t_cap, t_videowrite; std::mutex mtx; - std::condition_variable cv; + std::condition_variable cv_detected, cv_pre_tracked; std::chrono::steady_clock::time_point steady_start, steady_end; cv::VideoCapture cap(filename); cap >> cur_frame; int const video_fps = cap.get(CV_CAP_PROP_FPS); @@ -153,35 +154,31 @@ int main(int argc, char *argv[]) // swap result bouned-boxes and input-frame if(consumed) { - { - std::unique_lock lock(mtx); - det_image = detector.mat_to_image_resize(cur_frame); - result_vec = thread_result_vec; - result_vec = detector.tracking(result_vec); // comment it - if track_id is not required - consumed = false; + std::unique_lock lock(mtx); + det_image = detector.mat_to_image_resize(cur_frame); + result_vec = thread_result_vec; + result_vec = detector.tracking(result_vec); // comment it - if track_id is not required #ifdef TRACK_OPTFLOW - // track optical flow - if (track_optflow_queue.size() > 0) { - std::queue new_track_optflow_queue; - //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl; - tracker_flow.update_tracking_flow(track_optflow_queue.front()); + // track optical flow + if (track_optflow_queue.size() > 0) { + std::queue new_track_optflow_queue; + //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl; + tracker_flow.update_tracking_flow(track_optflow_queue.front()); + while (track_optflow_queue.size() > 1) { track_optflow_queue.pop(); - while (track_optflow_queue.size() > 0) { - result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), result_vec); - if (track_optflow_queue.size() <= passed_flow_frames && new_track_optflow_queue.size() == 0) - new_track_optflow_queue = track_optflow_queue; - - track_optflow_queue.pop(); - } - track_optflow_queue = new_track_optflow_queue; - new_track_optflow_queue.swap(std::queue()); - passed_flow_frames = 0; + result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), result_vec); + if (track_optflow_queue.size() <= passed_flow_frames && new_track_optflow_queue.size() == 0) + new_track_optflow_queue = track_optflow_queue; } + track_optflow_queue = new_track_optflow_queue; + new_track_optflow_queue.swap(std::queue()); + passed_flow_frames = 0; } #endif - + consumed = false; + cv_pre_tracked.notify_all(); } // launch thread once - Detection if (!t_detect.joinable()) { @@ -189,6 +186,7 @@ int main(int argc, char *argv[]) auto current_image = det_image; consumed = true; while (current_image.use_count() > 0) { + //std::vector result; auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true //Sleep(200); Sleep(50); @@ -197,7 +195,8 @@ int main(int argc, char *argv[]) thread_result_vec = result; current_image = det_image; consumed = true; - cv.notify_all(); + cv_detected.notify_all(); + while (consumed) cv_pre_tracked.wait(lock); } }); } @@ -234,7 +233,7 @@ int main(int argc, char *argv[]) // wait detection result for video-file only (not for net-cam) //if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { // std::unique_lock lock(mtx); - // while (!consumed) cv.wait(lock); + // while (!consumed) cv_detected.wait(lock); //} } if (t_cap.joinable()) t_cap.join(); diff --git a/src/yolo_v2_class.hpp b/src/yolo_v2_class.hpp index d60f359c..edb24cbf 100644 --- a/src/yolo_v2_class.hpp +++ b/src/yolo_v2_class.hpp @@ -89,12 +89,15 @@ public: return mat_to_image(det_mat); } - static std::shared_ptr mat_to_image(cv::Mat img) + static std::shared_ptr mat_to_image(cv::Mat img_src) { + cv::Mat img; + cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); + //std::cout << "\n img_rgb: " << img_rgb.size() << ", " << img_rgb.type() << ", " << img_rgb.channels() << std::endl; + //std::cout << "\n img: " << img.size() << ", " << img.type() << ", " << img.channels() << std::endl; std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; }); std::shared_ptr ipl_small = std::make_shared(img); *image_ptr = ipl_to_image(ipl_small.get()); - rgbgr_image(*image_ptr); return image_ptr; } @@ -108,15 +111,23 @@ private: int c = src->nChannels; int step = src->widthStep; image_t out = make_image_custom(w, h, c); - int i, j, k, count = 0;; + int count = 0; - for (k = 0; k < c; ++k) { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - out.data[count++] = data[i*step + j*c + k] / 255.; + //std::vector tmp(w*h*c); + + for (int k = 0; k < c; ++k) { + for (int i = 0; i < h; ++i) { + int i_step = i*step; + for (int j = 0; j < w; ++j) { + out.data[count++] = data[i_step + j*c + k] / 255.; + //tmp[count++] = data[i_step + j*c + k]; } } } + //cv::Mat wrapped_8bit(cv::Size(w, h), CV_8UC3, tmp.data()); + //cv::Mat wrapped_32float(cv::Size(w, h), CV_32FC3, out.data); + //wrapped_8bit.convertTo(wrapped_32float, CV_32FC3, 1 / 255.); + return out; } @@ -137,16 +148,6 @@ private: return out; } - static void rgbgr_image(image_t im) - { - int i; - for (i = 0; i < im.w*im.h; ++i) { - float swap = im.data[i]; - im.data[i] = im.data[i + im.w*im.h * 2]; - im.data[i + im.w*im.h * 2] = swap; - } - } - #endif // OPENCV }; @@ -156,22 +157,23 @@ private: class Tracker_optflow { public: - int gpu_id; + const int gpu_count; + const int gpu_id; - Tracker_optflow(int _gpu_id = 0) : gpu_id(_gpu_id) + + Tracker_optflow(int _gpu_id = 0) : gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)) { int const old_gpu_id = cv::cuda::getDevice(); - static const int gpu_count = cv::cuda::getCudaEnabledDeviceCount(); - if (gpu_count > gpu_id) - cv::cuda::setDevice(gpu_id); + cv::cuda::setDevice(gpu_id); + + stream = cv::cuda::Stream(); sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create(); - //sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(31, 31)); //sync_PyrLKOpticalFlow_gpu.winSize = cv::Size(31, 31); - //sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(15, 15)); //sync_PyrLKOpticalFlow_gpu.winSize = cv::Size(15, 15); + sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31 + sync_PyrLKOpticalFlow_gpu->setMaxLevel(5); // +- 50 ptx + sync_PyrLKOpticalFlow_gpu->setNumIters(2000); // def: 30 - sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); - sync_PyrLKOpticalFlow_gpu->setMaxLevel(50); //sync_PyrLKOpticalFlow_gpu.maxLevel = 8; // +-32 points // def: 3 - sync_PyrLKOpticalFlow_gpu->setNumIters(6000); //sync_PyrLKOpticalFlow_gpu.iters = 8000; // def: 30 + cv::cuda::setDevice(old_gpu_id); } // just to avoid extra allocations @@ -183,15 +185,14 @@ public: cv::cuda::GpuMat src_grey_gpu; // used in both functions cv::Ptr sync_PyrLKOpticalFlow_gpu; + cv::cuda::Stream stream; void update_tracking_flow(cv::Mat src_mat) { int const old_gpu_id = cv::cuda::getDevice(); - static const int gpu_count = cv::cuda::getCudaEnabledDeviceCount(); - if (gpu_count > gpu_id) - cv::cuda::setDevice(gpu_id); + cv::cuda::setDevice(gpu_id); - cv::cuda::Stream stream; + //cv::cuda::Stream stream; if (src_mat.channels() == 3) { if (src_mat_gpu.cols == 0) { @@ -214,11 +215,9 @@ public: } int const old_gpu_id = cv::cuda::getDevice(); - static const int gpu_count = cv::cuda::getCudaEnabledDeviceCount(); - if (gpu_count > gpu_id) - cv::cuda::setDevice(gpu_id); + cv::cuda::setDevice(gpu_id); - cv::cuda::Stream stream; + //cv::cuda::Stream stream; if (dst_mat_gpu.cols == 0) { dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());