mirror of https://github.com/AlexeyAB/darknet.git
321 lines
9.3 KiB
Python
321 lines
9.3 KiB
Python
from ctypes import *
|
|
import math
|
|
import random
|
|
import os
|
|
import cv2
|
|
import numpy as np
|
|
import time
|
|
|
|
|
|
def sample(probs):
|
|
s = sum(probs)
|
|
probs = [a/s for a in probs]
|
|
r = random.uniform(0, 1)
|
|
for i in range(len(probs)):
|
|
r = r - probs[i]
|
|
if r <= 0:
|
|
return i
|
|
return len(probs)-1
|
|
|
|
|
|
def c_array(ctype, values):
|
|
arr = (ctype*len(values))()
|
|
arr[:] = values
|
|
return arr
|
|
|
|
|
|
class BOX(Structure):
|
|
_fields_ = [("x", c_float),
|
|
("y", c_float),
|
|
("w", c_float),
|
|
("h", c_float)]
|
|
|
|
|
|
class DETECTION(Structure):
|
|
_fields_ = [("bbox", BOX),
|
|
("classes", c_int),
|
|
("prob", POINTER(c_float)),
|
|
("mask", POINTER(c_float)),
|
|
("objectness", c_float),
|
|
("sort_class", c_int)]
|
|
|
|
|
|
class IMAGE(Structure):
|
|
_fields_ = [("w", c_int),
|
|
("h", c_int),
|
|
("c", c_int),
|
|
("data", POINTER(c_float))]
|
|
|
|
|
|
class METADATA(Structure):
|
|
_fields_ = [("classes", c_int),
|
|
("names", POINTER(c_char_p))]
|
|
|
|
|
|
hasGPU = True
|
|
|
|
lib = CDLL("./libdarknet.so", RTLD_GLOBAL)
|
|
lib.network_width.argtypes = [c_void_p]
|
|
lib.network_width.restype = c_int
|
|
lib.network_height.argtypes = [c_void_p]
|
|
lib.network_height.restype = c_int
|
|
|
|
predict = lib.network_predict
|
|
predict.argtypes = [c_void_p, POINTER(c_float)]
|
|
predict.restype = POINTER(c_float)
|
|
|
|
if hasGPU:
|
|
set_gpu = lib.cuda_set_device
|
|
set_gpu.argtypes = [c_int]
|
|
|
|
make_image = lib.make_image
|
|
make_image.argtypes = [c_int, c_int, c_int]
|
|
make_image.restype = IMAGE
|
|
|
|
get_network_boxes = lib.get_network_boxes
|
|
get_network_boxes.argtypes = \
|
|
[c_void_p, c_int, c_int, c_float, c_float, POINTER(
|
|
c_int), c_int, POINTER(c_int), c_int]
|
|
get_network_boxes.restype = POINTER(DETECTION)
|
|
|
|
make_network_boxes = lib.make_network_boxes
|
|
make_network_boxes.argtypes = [c_void_p]
|
|
make_network_boxes.restype = POINTER(DETECTION)
|
|
|
|
free_detections = lib.free_detections
|
|
free_detections.argtypes = [POINTER(DETECTION), c_int]
|
|
|
|
free_ptrs = lib.free_ptrs
|
|
free_ptrs.argtypes = [POINTER(c_void_p), c_int]
|
|
|
|
network_predict = lib.network_predict
|
|
network_predict.argtypes = [c_void_p, POINTER(c_float)]
|
|
|
|
reset_rnn = lib.reset_rnn
|
|
reset_rnn.argtypes = [c_void_p]
|
|
|
|
load_net = lib.load_network
|
|
load_net.argtypes = [c_char_p, c_char_p, c_int]
|
|
load_net.restype = c_void_p
|
|
|
|
load_net_custom = lib.load_network_custom
|
|
load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int]
|
|
load_net_custom.restype = c_void_p
|
|
|
|
do_nms_obj = lib.do_nms_obj
|
|
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
|
|
|
|
do_nms_sort = lib.do_nms_sort
|
|
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
|
|
|
|
free_image = lib.free_image
|
|
free_image.argtypes = [IMAGE]
|
|
|
|
letterbox_image = lib.letterbox_image
|
|
letterbox_image.argtypes = [IMAGE, c_int, c_int]
|
|
letterbox_image.restype = IMAGE
|
|
|
|
load_meta = lib.get_metadata
|
|
lib.get_metadata.argtypes = [c_char_p]
|
|
lib.get_metadata.restype = METADATA
|
|
|
|
load_image = lib.load_image_color
|
|
load_image.argtypes = [c_char_p, c_int, c_int]
|
|
load_image.restype = IMAGE
|
|
|
|
rgbgr_image = lib.rgbgr_image
|
|
rgbgr_image.argtypes = [IMAGE]
|
|
|
|
predict_image = lib.network_predict_image
|
|
predict_image.argtypes = [c_void_p, IMAGE]
|
|
predict_image.restype = POINTER(c_float)
|
|
|
|
|
|
def array_to_image(arr):
|
|
import numpy as np
|
|
arr = arr.transpose(2, 0, 1)
|
|
c = arr.shape[0]
|
|
h = arr.shape[1]
|
|
w = arr.shape[2]
|
|
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
|
|
data = arr.ctypes.data_as(POINTER(c_float))
|
|
im = IMAGE(w, h, c, data)
|
|
return im, arr
|
|
|
|
|
|
def classify(net, meta, im):
|
|
out = predict_image(net, im)
|
|
res = []
|
|
for i in range(meta.classes):
|
|
if altNames is None:
|
|
nameTag = meta.names[i]
|
|
else:
|
|
nameTag = altNames[i]
|
|
res.append((nameTag, out[i]))
|
|
res = sorted(res, key=lambda x: -x[1])
|
|
return res
|
|
|
|
|
|
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45, debug=False):
|
|
im, arr = array_to_image(image)
|
|
if debug:
|
|
print("Loaded image")
|
|
num = c_int(0)
|
|
if debug:
|
|
print("Assigned num")
|
|
pnum = pointer(num)
|
|
if debug:
|
|
print("Assigned pnum")
|
|
predict_image(net, im)
|
|
if debug:
|
|
print("did prediction")
|
|
# dets = get_network_boxes(
|
|
# net, image.shape[1], image.shape[0],
|
|
# thresh, hier_thresh,
|
|
# None, 0, pnum, 0) # OpenCV
|
|
dets = get_network_boxes(net, im.w, im.h,
|
|
thresh, hier_thresh, None, 0, pnum, 0)
|
|
if debug:
|
|
print("Got dets")
|
|
num = pnum[0]
|
|
if debug:
|
|
print("got zeroth index of pnum")
|
|
if nms:
|
|
do_nms_sort(dets, num, meta.classes, nms)
|
|
if debug:
|
|
print("did sort")
|
|
res = []
|
|
if debug:
|
|
print("about to range")
|
|
for j in range(num):
|
|
if debug:
|
|
print("Ranging on "+str(j)+" of "+str(num))
|
|
if debug:
|
|
print("Classes: "+str(meta), meta.classes, meta.names)
|
|
for i in range(meta.classes):
|
|
if debug:
|
|
print("Class-ranging on "+str(i)+" of " +
|
|
str(meta.classes)+"= "+str(dets[j].prob[i]))
|
|
if dets[j].prob[i] > 0:
|
|
b = dets[j].bbox
|
|
if altNames is None:
|
|
nameTag = meta.names[i]
|
|
else:
|
|
nameTag = altNames[i]
|
|
if debug:
|
|
print("Got bbox", b)
|
|
print(nameTag)
|
|
print(dets[j].prob[i])
|
|
print((b.x, b.y, b.w, b.h))
|
|
res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h)))
|
|
if debug:
|
|
print("did range")
|
|
res = sorted(res, key=lambda x: -x[1])
|
|
if debug:
|
|
print("did sort")
|
|
# free_image(im)
|
|
if debug:
|
|
print("freed image")
|
|
free_detections(dets, num)
|
|
if debug:
|
|
print("freed detections")
|
|
return res
|
|
|
|
|
|
def convertBack(x, y, w, h):
|
|
xmin = int(round(x - (w / 2)))
|
|
xmax = int(round(x + (w / 2)))
|
|
ymin = int(round(y - (h / 2)))
|
|
ymax = int(round(y + (h / 2)))
|
|
return xmin, ymin, xmax, ymax
|
|
|
|
|
|
def cvDrawBoxes(detections, img):
|
|
for detection in detections:
|
|
x, y, w, h = detection[2][0],\
|
|
detection[2][1],\
|
|
detection[2][2],\
|
|
detection[2][3]
|
|
xmin, ymin, xmax, ymax = convertBack(
|
|
float(x), float(y), float(w), float(h))
|
|
pt1 = (xmin, ymin)
|
|
pt2 = (xmax, ymax)
|
|
cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2)
|
|
cv2.putText(img,
|
|
detection[0].decode() +
|
|
" [" + str(round(detection[1] * 100, 2)) + "]",
|
|
(pt1[0], pt1[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 1,
|
|
[0, 255, 0], 4)
|
|
return img
|
|
|
|
|
|
netMain = None
|
|
metaMain = None
|
|
altNames = None
|
|
|
|
|
|
def YOLO():
|
|
global metaMain, netMain, altNames
|
|
configPath = "./cfg/yolov3.cfg"
|
|
weightPath = "./yolov3.weights"
|
|
metaPath = "./cfg/coco.data"
|
|
if not os.path.exists(configPath):
|
|
raise ValueError("Invalid config path `" +
|
|
os.path.abspath(configPath)+"`")
|
|
if not os.path.exists(weightPath):
|
|
raise ValueError("Invalid weight path `" +
|
|
os.path.abspath(weightPath)+"`")
|
|
if not os.path.exists(metaPath):
|
|
raise ValueError("Invalid data file path `" +
|
|
os.path.abspath(metaPath)+"`")
|
|
if netMain is None:
|
|
netMain = load_net_custom(configPath.encode(
|
|
"ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1
|
|
if metaMain is None:
|
|
metaMain = load_meta(metaPath.encode("ascii"))
|
|
if altNames is None:
|
|
try:
|
|
with open(metaPath) as metaFH:
|
|
metaContents = metaFH.read()
|
|
import re
|
|
match = re.search("names *= *(.*)$", metaContents,
|
|
re.IGNORECASE | re.MULTILINE)
|
|
if match:
|
|
result = match.group(1)
|
|
else:
|
|
result = None
|
|
try:
|
|
if os.path.exists(result):
|
|
with open(result) as namesFH:
|
|
namesList = namesFH.read().strip().split("\n")
|
|
altNames = [x.strip() for x in namesList]
|
|
except TypeError:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
#cap = cv2.VideoCapture(0)
|
|
cap = cv2.VideoCapture("test.mp4")
|
|
cap.set(3, 1280)
|
|
cap.set(4, 720)
|
|
out = cv2.VideoWriter(
|
|
"output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0,
|
|
(lib.network_width(netMain), lib.network_height(netMain)))
|
|
print("Starting the YOLO loop...")
|
|
while True:
|
|
prev_time = time.time()
|
|
ret, frame_read = cap.read()
|
|
frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)
|
|
frame_resized = cv2.resize(frame_rgb,
|
|
(lib.network_width(netMain),
|
|
lib.network_height(netMain)),
|
|
interpolation=cv2.INTER_LINEAR)
|
|
detections = detect(netMain, metaMain, frame_resized, thresh=0.25)
|
|
image = cvDrawBoxes(detections, frame_resized)
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
print(1/(time.time()-prev_time))
|
|
cap.release()
|
|
out.release()
|
|
|
|
if __name__ == "__main__":
|
|
YOLO()
|