From 5c927228de38f4a57256c63944923f1a41f391d5 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 26 Apr 2021 00:01:33 +0200 Subject: [PATCH] Fix python video (#7611) * some bbox conversion functions * output video in original resolution * fixing drawing of bboxes into orig sized video --- darknet_video.py | 71 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/darknet_video.py b/darknet_video.py index cc20b266..04895133 100644 --- a/darknet_video.py +++ b/darknet_video.py @@ -60,16 +60,61 @@ def set_saved_video(input_video, output_video, size): return video +def convert2relative(bbox): + """ + YOLO format use relative coordinates for annotation + """ + x, y, w, h = bbox + _height = darknet_height + _width = darknet_width + return x/_width, y/_height, w/_width, h/_height + + +def convert2original(image, bbox): + x, y, w, h = convert2relative(bbox) + + image_h, image_w, __ = image.shape + + orig_x = int(x * image_w) + orig_y = int(y * image_h) + orig_width = int(w * image_w) + orig_height = int(h * image_h) + + bbox_converted = (orig_x, orig_y, orig_width, orig_height) + + return bbox_converted + + +def convert4cropping(image, bbox): + x, y, w, h = convert2relative(bbox) + + image_h, image_w, __ = image.shape + + orig_left = int((x - w / 2.) * image_w) + orig_right = int((x + w / 2.) * image_w) + orig_top = int((y - h / 2.) * image_h) + orig_bottom = int((y + h / 2.) * image_h) + + if (orig_left < 0): orig_left = 0 + if (orig_right > image_w - 1): orig_right = image_w - 1 + if (orig_top < 0): orig_top = 0 + if (orig_bottom > image_h - 1): orig_bottom = image_h - 1 + + bbox_cropping = (orig_left, orig_top, orig_right, orig_bottom) + + return bbox_cropping + + def video_capture(frame_queue, darknet_image_queue): while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - frame_resized = cv2.resize(frame_rgb, (width, height), + frame_resized = cv2.resize(frame_rgb, (darknet_width, darknet_height), interpolation=cv2.INTER_LINEAR) - frame_queue.put(frame_resized) - img_for_detect = darknet.make_image(width, height, 3) + frame_queue.put(frame) + img_for_detect = darknet.make_image(darknet_width, darknet_height, 3) darknet.copy_image_from_bytes(img_for_detect, frame_resized.tobytes()) darknet_image_queue.put(img_for_detect) cap.release() @@ -91,18 +136,22 @@ def inference(darknet_image_queue, detections_queue, fps_queue): def drawing(frame_queue, detections_queue, fps_queue): random.seed(3) # deterministic bbox colors - video = set_saved_video(cap, args.out_filename, (width, height)) + video = set_saved_video(cap, args.out_filename, (darknet_width, darknet_height)) while cap.isOpened(): - frame_resized = frame_queue.get() + frame = frame_queue.get() detections = detections_queue.get() fps = fps_queue.get() - if frame_resized is not None: - image = darknet.draw_boxes(detections, frame_resized, class_colors) + detections_adjusted = [] + if frame is not None: + for label, confidence, bbox in detections: + bbox_adjusted = convert2original(frame, bbox) + detections_adjusted.append((str(label), confidence, bbox_adjusted)) + image = darknet.draw_boxes(detections_adjusted, frame, class_colors) + if not args.dont_show: + cv2.imshow('Inference', image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if args.out_filename is not None: video.write(image) - if not args.dont_show: - cv2.imshow('Inference', image) if cv2.waitKey(fps) == 27: break cap.release() @@ -124,8 +173,8 @@ if __name__ == '__main__': args.weights, batch_size=1 ) - width = darknet.network_width(network) - height = darknet.network_height(network) + darknet_width = darknet.network_width(network) + darknet_height = darknet.network_height(network) input_path = str2int(args.input) cap = cv2.VideoCapture(input_path) Thread(target=video_capture, args=(frame_queue, darknet_image_queue)).start()