mirror of https://github.com/AlexeyAB/darknet.git
142 lines
5.0 KiB
Python
142 lines
5.0 KiB
Python
#!/usr/bin/env python
|
|
from __future__ import print_function
|
|
from __future__ import division
|
|
#
|
|
# This is a utility for converting ground truth data from the kitti format
|
|
# to the YOLO format.
|
|
#
|
|
#
|
|
#
|
|
# YOLO FORMAT
|
|
# .txt for each .jpg - in the same directory and with the same name
|
|
# <object-class> <x> <y> <width> <height>
|
|
#
|
|
# Where:
|
|
#
|
|
# <object-class> - integer number of object from 0 to (classes-1)
|
|
# <x> <y> <width> <height> - floats relative to image width/height 0.0 to 1.0
|
|
# eg. <x> = <absolute_x> / <image_width>
|
|
# Note: <x> <y> - are center of rectangle (not top-left corner)
|
|
#
|
|
# For example for img1.jpg you will be created img1.txt containing:
|
|
#
|
|
# 1 0.716797 0.395833 0.216406 0.147222
|
|
# 0 0.687109 0.379167 0.255469 0.158333
|
|
# 1 0.420312 0.395833 0.140625 0.166667
|
|
#
|
|
# KITTI FORMAT
|
|
#
|
|
# All images as .png in a separate folder to the .txt labels of the same name
|
|
# One label line is as follows:
|
|
#
|
|
# 1 type Describes the type of object: Car, Van, Truck,
|
|
# Pedestrian, Person_sitting, Cyclist, Tram,
|
|
# Misc or DontCare
|
|
# 1 truncated Float from 0 (non-truncated) to 1 (truncated), where
|
|
# truncated refers to the object leaving image boundaries
|
|
# 1 occluded Integer (0,1,2,3) indicating occlusion state:
|
|
# 0 = fully visible, 1 = partly occluded
|
|
# 2 = largely occluded, 3 = unknown
|
|
# 1 alpha Observation angle of object, ranging [-pi..pi]
|
|
# 4 bbox 2D bounding box of object in the image (0-based index):
|
|
# contains left, top, right, bottom pixel coordinates
|
|
# 3 dimensions 3D object dimensions: height, width, length (in meters)
|
|
# 3 location 3D object location x,y,z in camera coordinates (in meters)
|
|
# 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
|
|
# 1 score Only for results: Float, indicating confidence in
|
|
# detection, needed for p/r curves, higher is better.
|
|
#
|
|
# Car 0.0 0 -1.5 57.0 17.3 614.1 200.12 1.65 1.67 3.64 -0.65 1.71 46.70 -1.59
|
|
# Cyclist 0.0 0 -2.46 665.45 160.00 717.9 217.9 1.7 0.4 1.6 2.4 1.3 22.1 -2.35
|
|
# Pedestrian 0.00 2 0.2 42.1 17.6 433.1 24.0 1.6 0.38 0.30 -5.8 1.6 23.1 -0.03
|
|
# DontCare -1 -1 -10 650.19 175.02 668.98 210.48 -1 -1 -1 -1000 -1000 -1000 -10
|
|
|
|
# core imports
|
|
import argparse
|
|
import sys
|
|
import os
|
|
import shutil
|
|
import cv2
|
|
|
|
|
|
kitti2yolotype_dict = {'Car': '0',
|
|
'Van': '0',
|
|
'Pedestrian': '1',
|
|
'Person_sitting': '1',
|
|
'Cyclist': '2',
|
|
'Truck': '3',
|
|
'Tram': '6',
|
|
'Misc': '6',
|
|
'DontCare': '6'}
|
|
|
|
|
|
def kitti2yolo(kitti_label, img_height, img_width):
|
|
|
|
kitti_label_arr = kitti_label.split(' ')
|
|
x1 = float(kitti_label_arr[4])
|
|
y1 = float(kitti_label_arr[5])
|
|
x2 = float(kitti_label_arr[6])
|
|
y2 = float(kitti_label_arr[7])
|
|
|
|
bb_width = x2 - x1
|
|
bb_height = y2 - y1
|
|
yolo_x = (x1 + 0.5*bb_width) / img_width
|
|
yolo_y = (y1 + 0.5*bb_height) / img_height
|
|
yolo_bb_width = bb_width / img_width
|
|
yolo_bb_height = bb_height / img_height
|
|
yolo_label = kitti2yolotype_dict[kitti_label_arr[0]]
|
|
|
|
return (yolo_label + ' '
|
|
+ str(yolo_x) + ' '
|
|
+ str(yolo_y) + ' '
|
|
+ str(yolo_bb_width) + ' '
|
|
+ str(yolo_bb_height))
|
|
|
|
|
|
def main(args):
|
|
|
|
# parse command line arguments
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--kitti",
|
|
help="path to kitti-format images and labels, images\
|
|
should be under images_path/images and labels should\
|
|
be under images_path/labels")
|
|
parser.add_argument("--yolo",
|
|
help="path to output yolo-ready training data")
|
|
# kitti paths
|
|
args = parser.parse_args()
|
|
root_path = args.kitti
|
|
yolo_path = args.yolo
|
|
if root_path is None:
|
|
root_path = os.getcwd()
|
|
if (root_path[-1] != os.sep):
|
|
root_path += os.sep
|
|
kitti_images_path = root_path + 'image_2' + os.sep
|
|
kitti_labels_path = root_path + 'label_2' + os.sep
|
|
|
|
# yolo paths
|
|
if yolo_path is None:
|
|
yolo_path = root_path + 'yolo_labels' + os.sep
|
|
|
|
if not os.path.exists(yolo_path):
|
|
os.makedirs(yolo_path)
|
|
|
|
# load each kitti label, convert to yolo and save
|
|
for labelfilename in os.listdir(kitti_labels_path):
|
|
yolo_labels = []
|
|
with open(kitti_labels_path + labelfilename, 'r') as kittilabelfile:
|
|
cvimage = cv2.imread(kitti_images_path
|
|
+ labelfilename.split('.txt')[0] + '.png')
|
|
height, width, frame_depth = cvimage.shape
|
|
for kitti_label in kittilabelfile:
|
|
yolo_labels.append(kitti2yolo(kitti_label,
|
|
img_height=height,
|
|
img_width=width))
|
|
with open(yolo_path + labelfilename, 'w+') as yololabelfile:
|
|
for label in yolo_labels:
|
|
yololabelfile.write(label + '\n')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv)
|