detect_objects.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. import os
  2. import cv2
  3. import time
  4. import datetime
  5. import ctypes
  6. import logging
  7. import multiprocessing as mp
  8. import threading
  9. from contextlib import closing
  10. import numpy as np
  11. import tensorflow as tf
  12. from object_detection.utils import label_map_util
  13. from object_detection.utils import visualization_utils as vis_util
  14. from flask import Flask, Response, make_response
  15. RTSP_URL = os.getenv('RTSP_URL')
  16. # Path to frozen detection graph. This is the actual model that is used for the object detection.
  17. PATH_TO_CKPT = '/frozen_inference_graph.pb'
  18. # List of the strings that is used to add correct label for each box.
  19. PATH_TO_LABELS = '/label_map.pbtext'
  20. # TODO: make dynamic?
  21. NUM_CLASSES = 90
  22. #REGIONS = "600,0,380:600,600,380:600,1200,380"
  23. REGIONS = os.getenv('REGIONS')
  24. DETECTED_OBJECTS = []
  25. # Loading label map
  26. label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
  27. categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
  28. use_display_name=True)
  29. category_index = label_map_util.create_category_index(categories)
  30. def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_offset, region_y_offset):
  31. # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  32. image_np_expanded = np.expand_dims(cropped_frame, axis=0)
  33. image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
  34. # Each box represents a part of the image where a particular object was detected.
  35. boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
  36. # Each score represent how level of confidence for each of the objects.
  37. # Score is shown on the result image, together with the class label.
  38. scores = detection_graph.get_tensor_by_name('detection_scores:0')
  39. classes = detection_graph.get_tensor_by_name('detection_classes:0')
  40. num_detections = detection_graph.get_tensor_by_name('num_detections:0')
  41. # Actual detection.
  42. (boxes, scores, classes, num_detections) = sess.run(
  43. [boxes, scores, classes, num_detections],
  44. feed_dict={image_tensor: image_np_expanded})
  45. # build an array of detected objects
  46. objects = []
  47. for index, value in enumerate(classes[0]):
  48. score = scores[0, index]
  49. if score > 0.1:
  50. box = boxes[0, index].tolist()
  51. box[0] = (box[0] * region_size) + region_y_offset
  52. box[1] = (box[1] * region_size) + region_x_offset
  53. box[2] = (box[2] * region_size) + region_y_offset
  54. box[3] = (box[3] * region_size) + region_x_offset
  55. objects += [value, scores[0, index]] + box
  56. # only get the first 10 objects
  57. if len(objects) == 60:
  58. break
  59. return objects
  60. class ObjectParser(threading.Thread):
  61. def __init__(self, object_arrays):
  62. threading.Thread.__init__(self)
  63. self._object_arrays = object_arrays
  64. def run(self):
  65. global DETECTED_OBJECTS
  66. while True:
  67. detected_objects = []
  68. for object_array in self._object_arrays:
  69. object_index = 0
  70. while(object_index < 60 and object_array[object_index] > 0):
  71. object_class = object_array[object_index]
  72. detected_objects.append({
  73. 'name': str(category_index.get(object_class).get('name')),
  74. 'score': object_array[object_index+1],
  75. 'ymin': int(object_array[object_index+2]),
  76. 'xmin': int(object_array[object_index+3]),
  77. 'ymax': int(object_array[object_index+4]),
  78. 'xmax': int(object_array[object_index+5])
  79. })
  80. object_index += 6
  81. DETECTED_OBJECTS = detected_objects
  82. time.sleep(0.01)
  83. def main():
  84. # Parse selected regions
  85. regions = []
  86. for region_string in REGIONS.split(':'):
  87. region_parts = region_string.split(',')
  88. regions.append({
  89. 'size': int(region_parts[0]),
  90. 'x_offset': int(region_parts[1]),
  91. 'y_offset': int(region_parts[2])
  92. })
  93. # capture a single frame and check the frame shape so the correct array
  94. # size can be allocated in memory
  95. video = cv2.VideoCapture(RTSP_URL)
  96. ret, frame = video.read()
  97. if ret:
  98. frame_shape = frame.shape
  99. else:
  100. print("Unable to capture video stream")
  101. exit(1)
  102. video.release()
  103. shared_memory_objects = []
  104. for region in regions:
  105. shared_memory_objects.append({
  106. # create shared value for storing the time the frame was captured
  107. 'frame_time': mp.Value('d', 0.0),
  108. # shared value for motion detection signal (1 for motion 0 for no motion)
  109. 'motion_detected': mp.Value('i', 1),
  110. # create shared array for storing 10 detected objects
  111. # note: this must be a double even though the value you are storing
  112. # is a float. otherwise it stops updating the value in shared
  113. # memory. probably something to do with the size of the memory block
  114. 'output_array': mp.Array(ctypes.c_double, 6*10)
  115. })
  116. # compute the flattened array length from the array shape
  117. flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
  118. # create shared array for storing the full frame image data
  119. shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
  120. # shape current frame so it can be treated as an image
  121. frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
  122. capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [obj['frame_time'] for obj in shared_memory_objects], frame_shape))
  123. capture_process.daemon = True
  124. detection_processes = []
  125. for index, region in enumerate(regions):
  126. detection_process = mp.Process(target=process_frames, args=(shared_arr,
  127. shared_memory_objects[index]['output_array'],
  128. shared_memory_objects[index]['frame_time'],
  129. shared_memory_objects[index]['motion_detected'],
  130. frame_shape,
  131. region['size'], region['x_offset'], region['y_offset']))
  132. detection_process.daemon = True
  133. detection_processes.append(detection_process)
  134. object_parser = ObjectParser([obj['output_array'] for obj in shared_memory_objects])
  135. object_parser.start()
  136. capture_process.start()
  137. print("capture_process pid ", capture_process.pid)
  138. for detection_process in detection_processes:
  139. detection_process.start()
  140. print("detection_process pid ", detection_process.pid)
  141. app = Flask(__name__)
  142. @app.route('/')
  143. def index():
  144. # return a multipart response
  145. return Response(imagestream(),
  146. mimetype='multipart/x-mixed-replace; boundary=frame')
  147. def imagestream():
  148. global DETECTED_OBJECTS
  149. while True:
  150. # max out at 5 FPS
  151. time.sleep(0.2)
  152. # make a copy of the current detected objects
  153. detected_objects = DETECTED_OBJECTS.copy()
  154. # make a copy of the current frame
  155. frame = frame_arr.copy()
  156. # convert to RGB for drawing
  157. frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  158. # draw the bounding boxes on the screen
  159. for obj in DETECTED_OBJECTS:
  160. vis_util.draw_bounding_box_on_image_array(frame,
  161. obj['ymin'],
  162. obj['xmin'],
  163. obj['ymax'],
  164. obj['xmax'],
  165. color='red',
  166. thickness=2,
  167. display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
  168. use_normalized_coordinates=False)
  169. for region in regions:
  170. cv2.rectangle(frame, (region['x_offset'], region['y_offset']),
  171. (region['x_offset']+region['size'], region['y_offset']+region['size']),
  172. (255,255,255), 2)
  173. # convert back to BGR
  174. frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
  175. # encode the image into a jpg
  176. ret, jpg = cv2.imencode('.jpg', frame)
  177. yield (b'--frame\r\n'
  178. b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
  179. app.run(host='0.0.0.0', debug=False)
  180. capture_process.join()
  181. for detection_process in detection_processes:
  182. detection_process.join()
  183. object_parser.join()
  184. # convert shared memory array into numpy array
  185. def tonumpyarray(mp_arr):
  186. return np.frombuffer(mp_arr.get_obj(), dtype=np.uint16)
  187. # fetch the frames as fast a possible, only decoding the frames when the
  188. # detection_process has consumed the current frame
  189. def fetch_frames(shared_arr, shared_frame_times, frame_shape):
  190. # convert shared memory array into numpy and shape into image array
  191. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  192. # start the video capture
  193. video = cv2.VideoCapture(RTSP_URL)
  194. # keep the buffer small so we minimize old data
  195. video.set(cv2.CAP_PROP_BUFFERSIZE,1)
  196. while True:
  197. # grab the frame, but dont decode it yet
  198. ret = video.grab()
  199. # snapshot the time the frame was grabbed
  200. frame_time = datetime.datetime.now()
  201. if ret:
  202. # if the detection_process is ready for the next frame decode it
  203. # otherwise skip this frame and move onto the next one
  204. if all(shared_frame_time.value == 0.0 for shared_frame_time in shared_frame_times):
  205. # go ahead and decode the current frame
  206. ret, frame = video.retrieve()
  207. if ret:
  208. arr[:] = frame
  209. # signal to the detection_processes by setting the shared_frame_time
  210. for shared_frame_time in shared_frame_times:
  211. shared_frame_time.value = frame_time.timestamp()
  212. else:
  213. # sleep a little to reduce CPU usage
  214. time.sleep(0.01)
  215. video.release()
  216. # do the actual object detection
  217. def process_frames(shared_arr, shared_output_arr, shared_frame_time, shared_motion, frame_shape, region_size, region_x_offset, region_y_offset):
  218. # shape shared input array into frame for processing
  219. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  220. # Load a (frozen) Tensorflow model into memory before the processing loop
  221. detection_graph = tf.Graph()
  222. with detection_graph.as_default():
  223. od_graph_def = tf.GraphDef()
  224. with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
  225. serialized_graph = fid.read()
  226. od_graph_def.ParseFromString(serialized_graph)
  227. tf.import_graph_def(od_graph_def, name='')
  228. sess = tf.Session(graph=detection_graph)
  229. no_frames_available = -1
  230. while True:
  231. # if there is no motion detected
  232. if shared_motion.value == 0:
  233. time.sleep(0.01)
  234. continue
  235. # if there isnt a frame ready for processing
  236. if shared_frame_time.value == 0.0:
  237. # save the first time there were no frames available
  238. if no_frames_available == -1:
  239. no_frames_available = datetime.datetime.now().timestamp()
  240. # if there havent been any frames available in 30 seconds,
  241. # sleep to avoid using so much cpu if the camera feed is down
  242. if no_frames_available > 0 and (datetime.datetime.now().timestamp() - no_frames_available) > 30:
  243. time.sleep(1)
  244. print("sleeping because no frames have been available in a while")
  245. else:
  246. # rest a little bit to avoid maxing out the CPU
  247. time.sleep(0.01)
  248. continue
  249. # we got a valid frame, so reset the timer
  250. no_frames_available = -1
  251. # if the frame is more than 0.5 second old, discard it
  252. if (datetime.datetime.now().timestamp() - shared_frame_time.value) > 0.5:
  253. # signal that we need a new frame
  254. shared_frame_time.value = 0.0
  255. # rest a little bit to avoid maxing out the CPU
  256. time.sleep(0.01)
  257. continue
  258. # make a copy of the cropped frame
  259. cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy()
  260. frame_time = shared_frame_time.value
  261. # signal that the frame has been used so a new one will be ready
  262. shared_frame_time.value = 0.0
  263. # convert to RGB
  264. cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
  265. # do the object detection
  266. objects = detect_objects(cropped_frame_rgb, sess, detection_graph, region_size, region_x_offset, region_y_offset)
  267. # copy the detected objects to the output array, filling the array when needed
  268. shared_output_arr[:] = objects + [0.0] * (60-len(objects))
  269. if __name__ == '__main__':
  270. mp.freeze_support()
  271. main()