detect_objects.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import os
  2. import cv2
  3. import time
  4. import datetime
  5. import ctypes
  6. import logging
  7. import multiprocessing as mp
  8. from contextlib import closing
  9. import numpy as np
  10. import tensorflow as tf
  11. from object_detection.utils import label_map_util
  12. from object_detection.utils import visualization_utils as vis_util
  13. from flask import Flask, Response, make_response
  14. RTSP_URL = os.getenv('RTSP_URL')
  15. # Path to frozen detection graph. This is the actual model that is used for the object detection.
  16. PATH_TO_CKPT = '/frozen_inference_graph.pb'
  17. # List of the strings that is used to add correct label for each box.
  18. PATH_TO_LABELS = '/label_map.pbtext'
  19. # TODO: make dynamic?
  20. NUM_CLASSES = 90
  21. REGION_SIZE = 300
  22. REGION_X_OFFSET = 1250
  23. REGION_Y_OFFSET = 180
  24. # Loading label map
  25. label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
  26. categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
  27. use_display_name=True)
  28. category_index = label_map_util.create_category_index(categories)
  29. def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_offset, region_y_offset):
  30. # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  31. image_np_expanded = np.expand_dims(cropped_frame, axis=0)
  32. image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
  33. # Each box represents a part of the image where a particular object was detected.
  34. boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
  35. # Each score represent how level of confidence for each of the objects.
  36. # Score is shown on the result image, together with the class label.
  37. scores = detection_graph.get_tensor_by_name('detection_scores:0')
  38. classes = detection_graph.get_tensor_by_name('detection_classes:0')
  39. num_detections = detection_graph.get_tensor_by_name('num_detections:0')
  40. # Actual detection.
  41. (boxes, scores, classes, num_detections) = sess.run(
  42. [boxes, scores, classes, num_detections],
  43. feed_dict={image_tensor: image_np_expanded})
  44. # build an array of detected objects
  45. objects = []
  46. for index, value in enumerate(classes[0]):
  47. score = scores[0, index]
  48. if score > 0.1:
  49. box = boxes[0, index].tolist()
  50. box[0] = (box[0] * region_size) + region_y_offset
  51. box[1] = (box[1] * region_size) + region_x_offset
  52. box[2] = (box[2] * region_size) + region_y_offset
  53. box[3] = (box[3] * region_size) + region_x_offset
  54. objects += [value, scores[0, index]] + box
  55. # only get the first 10 objects
  56. if len(objects) = 60:
  57. break
  58. return objects
  59. def main():
  60. # capture a single frame and check the frame shape so the correct array
  61. # size can be allocated in memory
  62. video = cv2.VideoCapture(RTSP_URL)
  63. ret, frame = video.read()
  64. if ret:
  65. frame_shape = frame.shape
  66. else:
  67. print("Unable to capture video stream")
  68. exit(1)
  69. video.release()
  70. # create shared value for storing the time the frame was captured
  71. # note: this must be a double even though the value you are storing
  72. # is a float. otherwise it stops updating the value in shared
  73. # memory. probably something to do with the size of the memory block
  74. shared_frame_time = mp.Value('d', 0.0)
  75. # compute the flattened array length from the array shape
  76. flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
  77. # create shared array for storing the full frame image data
  78. shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
  79. # shape current frame so it can be treated as an image
  80. frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
  81. # create shared array for storing 10 detected objects
  82. shared_output_arr = mp.Array(ctypes.c_double, 6*10)
  83. capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape))
  84. capture_process.daemon = True
  85. detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET))
  86. detection_process.daemon = True
  87. capture_process.start()
  88. print("capture_process pid ", capture_process.pid)
  89. detection_process.start()
  90. print("detection_process pid ", detection_process.pid)
  91. app = Flask(__name__)
  92. @app.route('/')
  93. def index():
  94. # return a multipart response
  95. return Response(imagestream(),
  96. mimetype='multipart/x-mixed-replace; boundary=frame')
  97. def imagestream():
  98. while True:
  99. # max out at 5 FPS
  100. time.sleep(0.2)
  101. frame = frame_arr.copy()
  102. # convert to RGB for drawing
  103. frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  104. # draw the bounding boxes on the screen
  105. object_index = 0
  106. while(object_index < 60 and shared_output_arr[object_index] > 0):
  107. object_class = shared_output_arr[object_index]
  108. object_name = str(category_index.get(object_class).get('name'))
  109. score = shared_output_arr[object_index+1]
  110. display_str = '{}: {}%'.format(object_name, int(100*score))
  111. ymin = int(shared_output_arr[object_index+2])
  112. xmin = int(shared_output_arr[object_index+3])
  113. ymax = int(shared_output_arr[object_index+4])
  114. xmax = int(shared_output_arr[object_index+5])
  115. vis_util.draw_bounding_box_on_image_array(frame,
  116. ymin,
  117. xmin,
  118. ymax,
  119. xmax,
  120. color='red',
  121. thickness=2,
  122. display_str_list=[display_str],
  123. use_normalized_coordinates=False)
  124. object_index += 6
  125. cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2)
  126. # convert back to BGR
  127. frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
  128. # encode the image into a jpg
  129. ret, jpg = cv2.imencode('.jpg', frame)
  130. yield (b'--frame\r\n'
  131. b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
  132. app.run(host='0.0.0.0', debug=False)
  133. capture_process.join()
  134. detection_process.join()
  135. # convert shared memory array into numpy array
  136. def tonumpyarray(mp_arr):
  137. return np.frombuffer(mp_arr.get_obj(), dtype=np.uint16)
  138. # fetch the frames as fast a possible, only decoding the frames when the
  139. # detection_process has consumed the current frame
  140. def fetch_frames(shared_arr, shared_frame_time, frame_shape):
  141. # convert shared memory array into numpy and shape into image array
  142. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  143. # start the video capture
  144. video = cv2.VideoCapture(RTSP_URL)
  145. # keep the buffer small so we minimize old data
  146. video.set(cv2.CAP_PROP_BUFFERSIZE,1)
  147. while True:
  148. # grab the frame, but dont decode it yet
  149. ret = video.grab()
  150. # snapshot the time the frame was grabbed
  151. frame_time = datetime.datetime.now()
  152. if ret:
  153. # if the detection_process is ready for the next frame decode it
  154. # otherwise skip this frame and move onto the next one
  155. if shared_frame_time.value == 0.0:
  156. # go ahead and decode the current frame
  157. ret, frame = video.retrieve()
  158. if ret:
  159. # copy the frame into the numpy array
  160. # Position 1
  161. # cropped_frame[:] = frame[270:720, 550:1000]
  162. # Position 2
  163. # frame_cropped = frame[270:720, 100:550]
  164. arr[:] = frame
  165. # signal to the detection_process by setting the shared_frame_time
  166. shared_frame_time.value = frame_time.timestamp()
  167. video.release()
  168. # do the actual object detection
  169. def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape, region_size, region_x_offset, region_y_offset):
  170. # shape shared input array into frame for processing
  171. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  172. # Load a (frozen) Tensorflow model into memory before the processing loop
  173. detection_graph = tf.Graph()
  174. with detection_graph.as_default():
  175. od_graph_def = tf.GraphDef()
  176. with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
  177. serialized_graph = fid.read()
  178. od_graph_def.ParseFromString(serialized_graph)
  179. tf.import_graph_def(od_graph_def, name='')
  180. sess = tf.Session(graph=detection_graph)
  181. no_frames_available = -1
  182. while True:
  183. # if there isnt a frame ready for processing
  184. if shared_frame_time.value == 0.0:
  185. # save the first time there were no frames available
  186. if no_frames_available == -1:
  187. no_frames_available = datetime.datetime.now().timestamp()
  188. # if there havent been any frames available in 30 seconds,
  189. # sleep to avoid using so much cpu if the camera feed is down
  190. if no_frames_available > 0 and (datetime.datetime.now().timestamp() - no_frames_available) > 30:
  191. time.sleep(1)
  192. print("sleeping because no frames have been available in a while")
  193. else:
  194. # rest a little bit to avoid maxing out the CPU
  195. time.sleep(0.01)
  196. continue
  197. # we got a valid frame, so reset the timer
  198. no_frames_available = -1
  199. # if the frame is more than 0.5 second old, discard it
  200. if (datetime.datetime.now().timestamp() - shared_frame_time.value) > 0.5:
  201. # signal that we need a new frame
  202. shared_frame_time.value = 0.0
  203. # rest a little bit to avoid maxing out the CPU
  204. time.sleep(0.01)
  205. continue
  206. # make a copy of the cropped frame
  207. cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy()
  208. frame_time = shared_frame_time.value
  209. # signal that the frame has been used so a new one will be ready
  210. shared_frame_time.value = 0.0
  211. # convert to RGB
  212. cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
  213. # do the object detection
  214. objects = detect_objects(cropped_frame_rgb, sess, detection_graph, region_size, region_x_offset, region_y_offset)
  215. # copy the detected objects to the output array, filling the array when needed
  216. shared_output_arr[:] = objects + [0.0] * (60-len(objects))
  217. if __name__ == '__main__':
  218. mp.freeze_support()
  219. main()