start_no_thread.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. import datetime
  2. import time
  3. import threading
  4. import queue
  5. import itertools
  6. from collections import defaultdict
  7. from statistics import mean
  8. import cv2
  9. import imutils
  10. import numpy as np
  11. import subprocess as sp
  12. import multiprocessing as mp
  13. import SharedArray as sa
  14. from scipy.spatial import distance as dist
  15. import tflite_runtime.interpreter as tflite
  16. from tflite_runtime.interpreter import load_delegate
  17. from frigate.edgetpu import ObjectDetector, EdgeTPUProcess, RemoteObjectDetector, load_labels
  18. from frigate.motion import MotionDetector
  19. def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, info, thickness=2, color=None, position='ul'):
  20. if color is None:
  21. color = (0,0,255)
  22. display_text = "{}: {}".format(label, info)
  23. cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, thickness)
  24. font_scale = 0.5
  25. font = cv2.FONT_HERSHEY_SIMPLEX
  26. # get the width and height of the text box
  27. size = cv2.getTextSize(display_text, font, fontScale=font_scale, thickness=2)
  28. text_width = size[0][0]
  29. text_height = size[0][1]
  30. line_height = text_height + size[1]
  31. # set the text start position
  32. if position == 'ul':
  33. text_offset_x = x_min
  34. text_offset_y = 0 if y_min < line_height else y_min - (line_height+8)
  35. elif position == 'ur':
  36. text_offset_x = x_max - (text_width+8)
  37. text_offset_y = 0 if y_min < line_height else y_min - (line_height+8)
  38. elif position == 'bl':
  39. text_offset_x = x_min
  40. text_offset_y = y_max
  41. elif position == 'br':
  42. text_offset_x = x_max - (text_width+8)
  43. text_offset_y = y_max
  44. # make the coords of the box with a small padding of two pixels
  45. textbox_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y + line_height))
  46. cv2.rectangle(frame, textbox_coords[0], textbox_coords[1], color, cv2.FILLED)
  47. cv2.putText(frame, display_text, (text_offset_x, text_offset_y + line_height - 3), font, fontScale=font_scale, color=(0, 0, 0), thickness=2)
  48. def calculate_region(frame_shape, xmin, ymin, xmax, ymax, multiplier=2):
  49. # size is larger than longest edge
  50. size = int(max(xmax-xmin, ymax-ymin)*multiplier)
  51. # if the size is too big to fit in the frame
  52. if size > min(frame_shape[0], frame_shape[1]):
  53. size = min(frame_shape[0], frame_shape[1])
  54. # x_offset is midpoint of bounding box minus half the size
  55. x_offset = int((xmax-xmin)/2.0+xmin-size/2.0)
  56. # if outside the image
  57. if x_offset < 0:
  58. x_offset = 0
  59. elif x_offset > (frame_shape[1]-size):
  60. x_offset = (frame_shape[1]-size)
  61. # y_offset is midpoint of bounding box minus half the size
  62. y_offset = int((ymax-ymin)/2.0+ymin-size/2.0)
  63. # if outside the image
  64. if y_offset < 0:
  65. y_offset = 0
  66. elif y_offset > (frame_shape[0]-size):
  67. y_offset = (frame_shape[0]-size)
  68. return (x_offset, y_offset, x_offset+size, y_offset+size)
  69. def intersection(box_a, box_b):
  70. return (
  71. max(box_a[0], box_b[0]),
  72. max(box_a[1], box_b[1]),
  73. min(box_a[2], box_b[2]),
  74. min(box_a[3], box_b[3])
  75. )
  76. def area(box):
  77. return (box[2]-box[0] + 1)*(box[3]-box[1] + 1)
  78. def intersection_over_union(box_a, box_b):
  79. # determine the (x, y)-coordinates of the intersection rectangle
  80. intersect = intersection(box_a, box_b)
  81. # compute the area of intersection rectangle
  82. inter_area = max(0, intersect[2] - intersect[0] + 1) * max(0, intersect[3] - intersect[1] + 1)
  83. if inter_area == 0:
  84. return 0.0
  85. # compute the area of both the prediction and ground-truth
  86. # rectangles
  87. box_a_area = (box_a[2] - box_a[0] + 1) * (box_a[3] - box_a[1] + 1)
  88. box_b_area = (box_b[2] - box_b[0] + 1) * (box_b[3] - box_b[1] + 1)
  89. # compute the intersection over union by taking the intersection
  90. # area and dividing it by the sum of prediction + ground-truth
  91. # areas - the interesection area
  92. iou = inter_area / float(box_a_area + box_b_area - inter_area)
  93. # return the intersection over union value
  94. return iou
  95. def clipped(obj, frame_shape):
  96. # if the object is within 5 pixels of the region border, and the region is not on the edge
  97. # consider the object to be clipped
  98. box = obj[2]
  99. region = obj[3]
  100. if ((region[0] > 5 and box[0]-region[0] <= 5) or
  101. (region[1] > 5 and box[1]-region[1] <= 5) or
  102. (frame_shape[1]-region[2] > 5 and region[2]-box[2] <= 5) or
  103. (frame_shape[0]-region[3] > 5 and region[3]-box[3] <= 5)):
  104. return True
  105. else:
  106. return False
  107. def filtered(obj):
  108. if obj[0] != 'person':
  109. return True
  110. return False
  111. def create_tensor_input(frame, region):
  112. cropped_frame = frame[region[1]:region[3], region[0]:region[2]]
  113. # Resize to 300x300 if needed
  114. if cropped_frame.shape != (300, 300, 3):
  115. # TODO: use Pillow-SIMD?
  116. cropped_frame = cv2.resize(cropped_frame, dsize=(300, 300), interpolation=cv2.INTER_LINEAR)
  117. # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3]
  118. return np.expand_dims(cropped_frame, axis=0)
  119. class ObjectTracker():
  120. def __init__(self, max_disappeared):
  121. self.tracked_objects = {}
  122. self.disappeared = {}
  123. self.max_disappeared = max_disappeared
  124. def register(self, index, frame_time, obj):
  125. id = f"{frame_time}-{index}"
  126. obj['id'] = id
  127. obj['frame_time'] = frame_time
  128. obj['top_score'] = obj['score']
  129. self.add_history(obj)
  130. self.tracked_objects[id] = obj
  131. self.disappeared[id] = 0
  132. def deregister(self, id):
  133. del self.tracked_objects[id]
  134. del self.disappeared[id]
  135. def update(self, id, new_obj):
  136. self.disappeared[id] = 0
  137. self.tracked_objects[id].update(new_obj)
  138. self.add_history(self.tracked_objects[id])
  139. if self.tracked_objects[id]['score'] > self.tracked_objects[id]['top_score']:
  140. self.tracked_objects[id]['top_score'] = self.tracked_objects[id]['score']
  141. def add_history(self, obj):
  142. entry = {
  143. 'score': obj['score'],
  144. 'box': obj['box'],
  145. 'region': obj['region'],
  146. 'centroid': obj['centroid'],
  147. 'frame_time': obj['frame_time']
  148. }
  149. if 'history' in obj:
  150. obj['history'].append(entry)
  151. else:
  152. obj['history'] = [entry]
  153. def match_and_update(self, frame_time, new_objects):
  154. if len(new_objects) == 0:
  155. for id in list(self.tracked_objects.keys()):
  156. if self.disappeared[id] >= self.max_disappeared:
  157. self.deregister(id)
  158. else:
  159. self.disappeared[id] += 1
  160. return
  161. # group by name
  162. new_object_groups = defaultdict(lambda: [])
  163. for obj in new_objects:
  164. new_object_groups[obj[0]].append({
  165. 'label': obj[0],
  166. 'score': obj[1],
  167. 'box': obj[2],
  168. 'region': obj[3]
  169. })
  170. # track objects for each label type
  171. for label, group in new_object_groups.items():
  172. current_objects = [o for o in self.tracked_objects.values() if o['label'] == label]
  173. current_ids = [o['id'] for o in current_objects]
  174. current_centroids = np.array([o['centroid'] for o in current_objects])
  175. # compute centroids of new objects
  176. for obj in group:
  177. centroid_x = int((obj['box'][0]+obj['box'][2]) / 2.0)
  178. centroid_y = int((obj['box'][1]+obj['box'][3]) / 2.0)
  179. obj['centroid'] = (centroid_x, centroid_y)
  180. if len(current_objects) == 0:
  181. for index, obj in enumerate(group):
  182. self.register(index, frame_time, obj)
  183. return
  184. new_centroids = np.array([o['centroid'] for o in group])
  185. # compute the distance between each pair of tracked
  186. # centroids and new centroids, respectively -- our
  187. # goal will be to match each new centroid to an existing
  188. # object centroid
  189. D = dist.cdist(current_centroids, new_centroids)
  190. # in order to perform this matching we must (1) find the
  191. # smallest value in each row and then (2) sort the row
  192. # indexes based on their minimum values so that the row
  193. # with the smallest value is at the *front* of the index
  194. # list
  195. rows = D.min(axis=1).argsort()
  196. # next, we perform a similar process on the columns by
  197. # finding the smallest value in each column and then
  198. # sorting using the previously computed row index list
  199. cols = D.argmin(axis=1)[rows]
  200. # in order to determine if we need to update, register,
  201. # or deregister an object we need to keep track of which
  202. # of the rows and column indexes we have already examined
  203. usedRows = set()
  204. usedCols = set()
  205. # loop over the combination of the (row, column) index
  206. # tuples
  207. for (row, col) in zip(rows, cols):
  208. # if we have already examined either the row or
  209. # column value before, ignore it
  210. if row in usedRows or col in usedCols:
  211. continue
  212. # otherwise, grab the object ID for the current row,
  213. # set its new centroid, and reset the disappeared
  214. # counter
  215. objectID = current_ids[row]
  216. self.update(objectID, group[col])
  217. # indicate that we have examined each of the row and
  218. # column indexes, respectively
  219. usedRows.add(row)
  220. usedCols.add(col)
  221. # compute the column index we have NOT yet examined
  222. unusedRows = set(range(0, D.shape[0])).difference(usedRows)
  223. unusedCols = set(range(0, D.shape[1])).difference(usedCols)
  224. # in the event that the number of object centroids is
  225. # equal or greater than the number of input centroids
  226. # we need to check and see if some of these objects have
  227. # potentially disappeared
  228. if D.shape[0] >= D.shape[1]:
  229. for row in unusedRows:
  230. id = current_ids[row]
  231. if self.disappeared[id] >= self.max_disappeared:
  232. self.deregister(id)
  233. else:
  234. self.disappeared[id] += 1
  235. # if the number of input centroids is greater
  236. # than the number of existing object centroids we need to
  237. # register each new input centroid as a trackable object
  238. else:
  239. for col in unusedCols:
  240. self.register(col, frame_time, group[col])
  241. def main():
  242. frames = 0
  243. # frame_queue = queue.Queue(maxsize=5)
  244. # frame_cache = {}
  245. frame_shape = (1080,1920,3)
  246. # frame_shape = (720,1280,3)
  247. frame_size = frame_shape[0]*frame_shape[1]*frame_shape[2]
  248. frame = np.zeros(frame_shape, np.uint8)
  249. motion_detector = MotionDetector(frame_shape, resize_factor=6)
  250. # object_detector = ObjectDetector('/lab/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite', '/lab/labelmap.txt')
  251. # object_detector = RemoteObjectDetector('/lab/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite', '/lab/labelmap.txt')
  252. # object_detector = ObjectDetector('/lab/detect.tflite', '/lab/labelmap.txt')
  253. object_detector = RemoteObjectDetector('/lab/detect.tflite', '/lab/labelmap.txt')
  254. object_tracker = ObjectTracker(10)
  255. # f = open('/debug/input/back.rgb24', 'rb')
  256. # f = open('/debug/back.raw_video', 'rb')
  257. # f = open('/debug/ali-jake.raw_video', 'rb')
  258. # -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format yuv420p -i output.mp4 -f rawvideo -pix_fmt rgb24 pipe:
  259. ffmpeg_cmd = (['ffmpeg'] +
  260. ['-hide_banner','-loglevel','panic'] +
  261. # ['-hwaccel','vaapi','-hwaccel_device','/dev/dri/renderD129','-hwaccel_output_format','yuv420p'] +
  262. # ['-i', '/debug/input/output.mp4'] +
  263. ['-i', '/lab/debug/back-night.mp4'] +
  264. ['-f','rawvideo','-pix_fmt','rgb24'] +
  265. ['pipe:'])
  266. print(" ".join(ffmpeg_cmd))
  267. ffmpeg_process = sp.Popen(ffmpeg_cmd, stdout = sp.PIPE, bufsize=frame_size)
  268. total_detections = 0
  269. start = datetime.datetime.now().timestamp()
  270. frame_times = []
  271. while True:
  272. start_frame = datetime.datetime.now().timestamp()
  273. frame_detections = 0
  274. frame_bytes = ffmpeg_process.stdout.read(frame_size)
  275. if not frame_bytes:
  276. break
  277. frame_time = datetime.datetime.now().timestamp()
  278. # Store frame in numpy array
  279. frame[:] = (np
  280. .frombuffer(frame_bytes, np.uint8)
  281. .reshape(frame_shape))
  282. frames += 1
  283. # look for motion
  284. motion_boxes = motion_detector.detect(frame)
  285. tracked_objects = object_tracker.tracked_objects.values()
  286. # merge areas of motion that intersect with a known tracked object into a single area to look at
  287. areas_of_interest = []
  288. used_motion_boxes = []
  289. for obj in tracked_objects:
  290. x_min, y_min, x_max, y_max = obj['box']
  291. for m_index, motion_box in enumerate(motion_boxes):
  292. if area(intersection(obj['box'], motion_box))/area(motion_box) > .5:
  293. used_motion_boxes.append(m_index)
  294. x_min = min(obj['box'][0], motion_box[0])
  295. y_min = min(obj['box'][1], motion_box[1])
  296. x_max = max(obj['box'][2], motion_box[2])
  297. y_max = max(obj['box'][3], motion_box[3])
  298. areas_of_interest.append((x_min, y_min, x_max, y_max))
  299. unused_motion_boxes = set(range(0, len(motion_boxes))).difference(used_motion_boxes)
  300. # compute motion regions
  301. motion_regions = [calculate_region(frame_shape, motion_boxes[i][0], motion_boxes[i][1], motion_boxes[i][2], motion_boxes[i][3], 1.2)
  302. for i in unused_motion_boxes]
  303. # compute tracked object regions
  304. object_regions = [calculate_region(frame_shape, a[0], a[1], a[2], a[3], 1.2)
  305. for a in areas_of_interest]
  306. # merge regions with high IOU
  307. merged_regions = motion_regions+object_regions
  308. while True:
  309. max_iou = 0.0
  310. max_indices = None
  311. region_indices = range(len(merged_regions))
  312. for a, b in itertools.combinations(region_indices, 2):
  313. iou = intersection_over_union(merged_regions[a], merged_regions[b])
  314. if iou > max_iou:
  315. max_iou = iou
  316. max_indices = (a, b)
  317. if max_iou > 0.1:
  318. a = merged_regions[max_indices[0]]
  319. b = merged_regions[max_indices[1]]
  320. merged_regions.append(calculate_region(frame_shape,
  321. min(a[0], b[0]),
  322. min(a[1], b[1]),
  323. max(a[2], b[2]),
  324. max(a[3], b[3]),
  325. 1
  326. ))
  327. del merged_regions[max(max_indices[0], max_indices[1])]
  328. del merged_regions[min(max_indices[0], max_indices[1])]
  329. else:
  330. break
  331. # resize regions and detect
  332. detections = []
  333. for region in merged_regions:
  334. tensor_input = create_tensor_input(frame, region)
  335. region_detections = object_detector.detect(tensor_input)
  336. frame_detections += 1
  337. for d in region_detections:
  338. if filtered(d):
  339. continue
  340. box = d[2]
  341. size = region[2]-region[0]
  342. x_min = int((box[1] * size) + region[0])
  343. y_min = int((box[0] * size) + region[1])
  344. x_max = int((box[3] * size) + region[0])
  345. y_max = int((box[2] * size) + region[1])
  346. detections.append((
  347. d[0],
  348. d[1],
  349. (x_min, y_min, x_max, y_max),
  350. region))
  351. #########
  352. # merge objects, check for clipped objects and look again up to N times
  353. #########
  354. refining = True
  355. refine_count = 0
  356. while refining and refine_count < 4:
  357. refining = False
  358. # group by name
  359. detected_object_groups = defaultdict(lambda: [])
  360. for detection in detections:
  361. detected_object_groups[detection[0]].append(detection)
  362. selected_objects = []
  363. for group in detected_object_groups.values():
  364. # apply non-maxima suppression to suppress weak, overlapping bounding boxes
  365. boxes = [(o[2][0], o[2][1], o[2][2]-o[2][0], o[2][3]-o[2][1])
  366. for o in group]
  367. confidences = [o[1] for o in group]
  368. idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
  369. for index in idxs:
  370. obj = group[index[0]]
  371. if clipped(obj, frame_shape): #obj['clipped']:
  372. box = obj[2]
  373. # calculate a new region that will hopefully get the entire object
  374. region = calculate_region(frame_shape,
  375. box[0], box[1],
  376. box[2], box[3])
  377. tensor_input = create_tensor_input(frame, region)
  378. # run detection on new region
  379. refined_detections = object_detector.detect(tensor_input)
  380. frame_detections += 1
  381. for d in refined_detections:
  382. if filtered(d):
  383. continue
  384. box = d[2]
  385. size = region[2]-region[0]
  386. x_min = int((box[1] * size) + region[0])
  387. y_min = int((box[0] * size) + region[1])
  388. x_max = int((box[3] * size) + region[0])
  389. y_max = int((box[2] * size) + region[1])
  390. selected_objects.append((
  391. d[0],
  392. d[1],
  393. (x_min, y_min, x_max, y_max),
  394. region))
  395. refining = True
  396. else:
  397. selected_objects.append(obj)
  398. # set the detections list to only include top, complete objects
  399. # and new detections
  400. detections = selected_objects
  401. if refining:
  402. refine_count += 1
  403. # now that we have refined our detections, we need to track objects
  404. object_tracker.match_and_update(frame_time, detections)
  405. total_detections += frame_detections
  406. frame_times.append(datetime.datetime.now().timestamp()-start_frame)
  407. # if (frames >= 700 and frames <= 1635) or (frames >= 2500):
  408. # if (frames >= 300 and frames <= 600):
  409. if (frames >= 0):
  410. # row1 = cv2.hconcat([gray, cv2.convertScaleAbs(avg_frame)])
  411. # row2 = cv2.hconcat([frameDelta, thresh])
  412. # cv2.imwrite(f"/lab/debug/output/{frames}.jpg", cv2.vconcat([row1, row2]))
  413. # # cv2.imwrite(f"/lab/debug/output/resized-frame-{frames}.jpg", resized_frame)
  414. # for region in motion_regions:
  415. # cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (255,128,0), 2)
  416. # for region in object_regions:
  417. # cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (0,128,255), 2)
  418. for region in merged_regions:
  419. cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 2)
  420. for box in motion_boxes:
  421. cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (255,0,0), 2)
  422. for detection in detections:
  423. box = detection[2]
  424. draw_box_with_label(frame, box[0], box[1], box[2], box[3], detection[0], f"{detection[1]*100}%")
  425. for obj in object_tracker.tracked_objects.values():
  426. box = obj['box']
  427. draw_box_with_label(frame, box[0], box[1], box[2], box[3], obj['label'], obj['id'], thickness=1, color=(0,0,255), position='bl')
  428. cv2.putText(frame, str(total_detections), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 0), thickness=2)
  429. cv2.putText(frame, str(frame_detections), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 0), thickness=2)
  430. cv2.imwrite(f"/lab/debug/output/frame-{frames}.jpg", frame)
  431. # break
  432. duration = datetime.datetime.now().timestamp()-start
  433. print(f"Processed {frames} frames for {duration:.2f} seconds and {(frames/duration):.2f} FPS.")
  434. print(f"Total detections: {total_detections}")
  435. print(f"Average frame processing time: {mean(frame_times)*1000:.2f}ms")
  436. if __name__ == '__main__':
  437. main()