mirror of https://github.com/AlexeyAB/darknet.git
Some stream fixes
This commit is contained in:
parent
0419c54042
commit
0cb81e5f50
|
@ -26,6 +26,7 @@ int *cuda_make_int_array(size_t n);
|
|||
void cuda_push_array(float *x_gpu, float *x, size_t n);
|
||||
void cuda_pull_array(float *x_gpu, float *x, size_t n);
|
||||
void cuda_set_device(int n);
|
||||
int cuda_get_device();
|
||||
void cuda_free(float *x_gpu);
|
||||
void cuda_random(float *x_gpu, size_t n);
|
||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
|
||||
|
|
|
@ -51,6 +51,7 @@ void forward_network_gpu(network net, network_state state)
|
|||
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
|
||||
}
|
||||
l.forward_gpu(l, state);
|
||||
cudaStreamSynchronize(get_cuda_stream());
|
||||
state.input = l.output_gpu;
|
||||
}
|
||||
}
|
||||
|
@ -392,7 +393,8 @@ float *get_network_output_gpu(network net)
|
|||
|
||||
float *network_predict_gpu(network net, float *input)
|
||||
{
|
||||
cuda_set_device(net.gpu_index);
|
||||
if (net.gpu_index != cuda_get_device())
|
||||
cuda_set_device(net.gpu_index);
|
||||
int size = get_network_input_size(net) * net.batch;
|
||||
network_state state;
|
||||
state.index = 0;
|
||||
|
|
|
@ -158,7 +158,6 @@ int main(int argc, char *argv[])
|
|||
det_image = detector.mat_to_image_resize(cur_frame);
|
||||
result_vec = thread_result_vec;
|
||||
result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
|
||||
|
||||
#ifdef TRACK_OPTFLOW
|
||||
// track optical flow
|
||||
if (track_optflow_queue.size() > 0) {
|
||||
|
@ -189,7 +188,7 @@ int main(int argc, char *argv[])
|
|||
//std::vector<bbox_t> result;
|
||||
auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
|
||||
//Sleep(200);
|
||||
Sleep(50);
|
||||
//Sleep(50);
|
||||
++fps_det_counter;
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
thread_result_vec = result;
|
||||
|
|
|
@ -34,7 +34,7 @@ struct detector_gpu_t{
|
|||
};
|
||||
|
||||
|
||||
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id)
|
||||
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
|
||||
{
|
||||
int old_gpu_index;
|
||||
#ifdef GPU
|
||||
|
@ -178,7 +178,8 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
|
|||
int old_gpu_index;
|
||||
#ifdef GPU
|
||||
cudaGetDevice(&old_gpu_index);
|
||||
cudaSetDevice(net.gpu_index);
|
||||
if(cur_gpu_id != old_gpu_index)
|
||||
cudaSetDevice(net.gpu_index);
|
||||
#endif
|
||||
//std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
|
||||
|
||||
|
@ -242,7 +243,8 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
|
|||
free(sized.data);
|
||||
|
||||
#ifdef GPU
|
||||
cudaSetDevice(old_gpu_index);
|
||||
if (cur_gpu_id != old_gpu_index)
|
||||
cudaSetDevice(old_gpu_index);
|
||||
#endif
|
||||
|
||||
return bbox_vec;
|
||||
|
|
|
@ -47,6 +47,7 @@ struct image_t {
|
|||
class Detector {
|
||||
std::shared_ptr<void> detector_gpu_ptr;
|
||||
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
|
||||
const int cur_gpu_id;
|
||||
public:
|
||||
float nms = .4;
|
||||
|
||||
|
@ -170,8 +171,8 @@ public:
|
|||
|
||||
sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
|
||||
sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31
|
||||
sync_PyrLKOpticalFlow_gpu->setMaxLevel(5); // +- 50 ptx
|
||||
sync_PyrLKOpticalFlow_gpu->setNumIters(2000); // def: 30
|
||||
sync_PyrLKOpticalFlow_gpu->setMaxLevel(3); // +- 5 ptx
|
||||
sync_PyrLKOpticalFlow_gpu->setNumIters(1000); // def: 30
|
||||
|
||||
cv::cuda::setDevice(old_gpu_id);
|
||||
}
|
||||
|
@ -190,9 +191,8 @@ public:
|
|||
void update_tracking_flow(cv::Mat src_mat)
|
||||
{
|
||||
int const old_gpu_id = cv::cuda::getDevice();
|
||||
cv::cuda::setDevice(gpu_id);
|
||||
|
||||
//cv::cuda::Stream stream;
|
||||
if (old_gpu_id != gpu_id)
|
||||
cv::cuda::setDevice(gpu_id);
|
||||
|
||||
if (src_mat.channels() == 3) {
|
||||
if (src_mat_gpu.cols == 0) {
|
||||
|
@ -203,7 +203,8 @@ public:
|
|||
src_mat_gpu.upload(src_mat, stream);
|
||||
cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
|
||||
}
|
||||
cv::cuda::setDevice(old_gpu_id);
|
||||
if (old_gpu_id != gpu_id)
|
||||
cv::cuda::setDevice(old_gpu_id);
|
||||
}
|
||||
|
||||
|
||||
|
@ -215,9 +216,8 @@ public:
|
|||
}
|
||||
|
||||
int const old_gpu_id = cv::cuda::getDevice();
|
||||
cv::cuda::setDevice(gpu_id);
|
||||
|
||||
//cv::cuda::Stream stream;
|
||||
if(old_gpu_id != gpu_id)
|
||||
cv::cuda::setDevice(gpu_id);
|
||||
|
||||
if (dst_mat_gpu.cols == 0) {
|
||||
dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
|
||||
|
@ -225,9 +225,9 @@ public:
|
|||
tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
|
||||
}
|
||||
|
||||
|
||||
dst_mat_gpu.upload(dst_mat, stream);
|
||||
|
||||
|
||||
cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
|
||||
|
||||
if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
|
||||
|
@ -237,6 +237,8 @@ public:
|
|||
return cur_bbox_vec;
|
||||
}
|
||||
|
||||
//return cur_bbox_vec;
|
||||
|
||||
cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
|
||||
|
||||
for (auto &i : cur_bbox_vec) {
|
||||
|
@ -298,7 +300,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
cv::cuda::setDevice(old_gpu_id);
|
||||
if (old_gpu_id != gpu_id)
|
||||
cv::cuda::setDevice(old_gpu_id);
|
||||
|
||||
return result_bbox_vec;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue