Some stream fixes

This commit is contained in:
AlexeyAB 2018-01-07 20:44:04 +03:00
parent 0419c54042
commit 0cb81e5f50
5 changed files with 24 additions and 17 deletions

View File

@ -26,6 +26,7 @@ int *cuda_make_int_array(size_t n);
void cuda_push_array(float *x_gpu, float *x, size_t n);
void cuda_pull_array(float *x_gpu, float *x, size_t n);
void cuda_set_device(int n);
int cuda_get_device();
void cuda_free(float *x_gpu);
void cuda_random(float *x_gpu, size_t n);
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);

View File

@ -51,6 +51,7 @@ void forward_network_gpu(network net, network_state state)
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
}
l.forward_gpu(l, state);
cudaStreamSynchronize(get_cuda_stream());
state.input = l.output_gpu;
}
}
@ -392,7 +393,8 @@ float *get_network_output_gpu(network net)
float *network_predict_gpu(network net, float *input)
{
cuda_set_device(net.gpu_index);
if (net.gpu_index != cuda_get_device())
cuda_set_device(net.gpu_index);
int size = get_network_input_size(net) * net.batch;
network_state state;
state.index = 0;

View File

@ -158,7 +158,6 @@ int main(int argc, char *argv[])
det_image = detector.mat_to_image_resize(cur_frame);
result_vec = thread_result_vec;
result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
#ifdef TRACK_OPTFLOW
// track optical flow
if (track_optflow_queue.size() > 0) {
@ -189,7 +188,7 @@ int main(int argc, char *argv[])
//std::vector<bbox_t> result;
auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
//Sleep(200);
Sleep(50);
//Sleep(50);
++fps_det_counter;
std::unique_lock<std::mutex> lock(mtx);
thread_result_vec = result;

View File

@ -34,7 +34,7 @@ struct detector_gpu_t{
};
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id)
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
{
int old_gpu_index;
#ifdef GPU
@ -178,7 +178,8 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
int old_gpu_index;
#ifdef GPU
cudaGetDevice(&old_gpu_index);
cudaSetDevice(net.gpu_index);
if(cur_gpu_id != old_gpu_index)
cudaSetDevice(net.gpu_index);
#endif
//std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
@ -242,7 +243,8 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
free(sized.data);
#ifdef GPU
cudaSetDevice(old_gpu_index);
if (cur_gpu_id != old_gpu_index)
cudaSetDevice(old_gpu_index);
#endif
return bbox_vec;

View File

@ -47,6 +47,7 @@ struct image_t {
class Detector {
std::shared_ptr<void> detector_gpu_ptr;
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
const int cur_gpu_id;
public:
float nms = .4;
@ -170,8 +171,8 @@ public:
sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31
sync_PyrLKOpticalFlow_gpu->setMaxLevel(5); // +- 50 ptx
sync_PyrLKOpticalFlow_gpu->setNumIters(2000); // def: 30
sync_PyrLKOpticalFlow_gpu->setMaxLevel(3); // +- 5 ptx
sync_PyrLKOpticalFlow_gpu->setNumIters(1000); // def: 30
cv::cuda::setDevice(old_gpu_id);
}
@ -190,9 +191,8 @@ public:
void update_tracking_flow(cv::Mat src_mat)
{
int const old_gpu_id = cv::cuda::getDevice();
cv::cuda::setDevice(gpu_id);
//cv::cuda::Stream stream;
if (old_gpu_id != gpu_id)
cv::cuda::setDevice(gpu_id);
if (src_mat.channels() == 3) {
if (src_mat_gpu.cols == 0) {
@ -203,7 +203,8 @@ public:
src_mat_gpu.upload(src_mat, stream);
cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
}
cv::cuda::setDevice(old_gpu_id);
if (old_gpu_id != gpu_id)
cv::cuda::setDevice(old_gpu_id);
}
@ -215,9 +216,8 @@ public:
}
int const old_gpu_id = cv::cuda::getDevice();
cv::cuda::setDevice(gpu_id);
//cv::cuda::Stream stream;
if(old_gpu_id != gpu_id)
cv::cuda::setDevice(gpu_id);
if (dst_mat_gpu.cols == 0) {
dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
@ -225,9 +225,9 @@ public:
tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
}
dst_mat_gpu.upload(dst_mat, stream);
cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
@ -237,6 +237,8 @@ public:
return cur_bbox_vec;
}
//return cur_bbox_vec;
cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
for (auto &i : cur_bbox_vec) {
@ -298,7 +300,8 @@ public:
}
}
cv::cuda::setDevice(old_gpu_id);
if (old_gpu_id != gpu_id)
cv::cuda::setDevice(old_gpu_id);
return result_bbox_vec;
}