detect_objects.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. import os
  2. import cv2
  3. import imutils
  4. import time
  5. import datetime
  6. import ctypes
  7. import logging
  8. import multiprocessing as mp
  9. import threading
  10. from contextlib import closing
  11. import numpy as np
  12. import tensorflow as tf
  13. from object_detection.utils import label_map_util
  14. from object_detection.utils import visualization_utils as vis_util
  15. from flask import Flask, Response, make_response
  16. RTSP_URL = os.getenv('RTSP_URL')
  17. # Path to frozen detection graph. This is the actual model that is used for the object detection.
  18. PATH_TO_CKPT = '/frozen_inference_graph.pb'
  19. # List of the strings that is used to add correct label for each box.
  20. PATH_TO_LABELS = '/label_map.pbtext'
  21. # TODO: make dynamic?
  22. NUM_CLASSES = 90
  23. REGIONS = "300,0,0:300,300,0:300,600,0"
  24. #REGIONS = os.getenv('REGIONS')
  25. DETECTED_OBJECTS = []
  26. # Loading label map
  27. label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
  28. categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
  29. use_display_name=True)
  30. category_index = label_map_util.create_category_index(categories)
  31. def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_offset, region_y_offset):
  32. # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  33. image_np_expanded = np.expand_dims(cropped_frame, axis=0)
  34. image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
  35. # Each box represents a part of the image where a particular object was detected.
  36. boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
  37. # Each score represent how level of confidence for each of the objects.
  38. # Score is shown on the result image, together with the class label.
  39. scores = detection_graph.get_tensor_by_name('detection_scores:0')
  40. classes = detection_graph.get_tensor_by_name('detection_classes:0')
  41. num_detections = detection_graph.get_tensor_by_name('num_detections:0')
  42. # Actual detection.
  43. (boxes, scores, classes, num_detections) = sess.run(
  44. [boxes, scores, classes, num_detections],
  45. feed_dict={image_tensor: image_np_expanded})
  46. # build an array of detected objects
  47. objects = []
  48. for index, value in enumerate(classes[0]):
  49. score = scores[0, index]
  50. if score > 0.1:
  51. box = boxes[0, index].tolist()
  52. box[0] = (box[0] * region_size) + region_y_offset
  53. box[1] = (box[1] * region_size) + region_x_offset
  54. box[2] = (box[2] * region_size) + region_y_offset
  55. box[3] = (box[3] * region_size) + region_x_offset
  56. objects += [value, scores[0, index]] + box
  57. # only get the first 10 objects
  58. if len(objects) == 60:
  59. break
  60. return objects
  61. class ObjectParser(threading.Thread):
  62. def __init__(self, object_arrays):
  63. threading.Thread.__init__(self)
  64. self._object_arrays = object_arrays
  65. def run(self):
  66. global DETECTED_OBJECTS
  67. while True:
  68. detected_objects = []
  69. for object_array in self._object_arrays:
  70. object_index = 0
  71. while(object_index < 60 and object_array[object_index] > 0):
  72. object_class = object_array[object_index]
  73. detected_objects.append({
  74. 'name': str(category_index.get(object_class).get('name')),
  75. 'score': object_array[object_index+1],
  76. 'ymin': int(object_array[object_index+2]),
  77. 'xmin': int(object_array[object_index+3]),
  78. 'ymax': int(object_array[object_index+4]),
  79. 'xmax': int(object_array[object_index+5])
  80. })
  81. object_index += 6
  82. DETECTED_OBJECTS = detected_objects
  83. time.sleep(0.01)
  84. def main():
  85. # Parse selected regions
  86. regions = []
  87. for region_string in REGIONS.split(':'):
  88. region_parts = region_string.split(',')
  89. regions.append({
  90. 'size': int(region_parts[0]),
  91. 'x_offset': int(region_parts[1]),
  92. 'y_offset': int(region_parts[2])
  93. })
  94. # capture a single frame and check the frame shape so the correct array
  95. # size can be allocated in memory
  96. video = cv2.VideoCapture(RTSP_URL)
  97. ret, frame = video.read()
  98. if ret:
  99. frame_shape = frame.shape
  100. else:
  101. print("Unable to capture video stream")
  102. exit(1)
  103. video.release()
  104. shared_memory_objects = []
  105. for region in regions:
  106. shared_memory_objects.append({
  107. # create shared value for storing the time the frame was captured
  108. 'frame_time': mp.Value('d', 0.0),
  109. # shared value for motion detection signal (1 for motion 0 for no motion)
  110. 'motion_detected': mp.Value('i', 1),
  111. # create shared array for storing 10 detected objects
  112. # note: this must be a double even though the value you are storing
  113. # is a float. otherwise it stops updating the value in shared
  114. # memory. probably something to do with the size of the memory block
  115. 'output_array': mp.Array(ctypes.c_double, 6*10)
  116. })
  117. # compute the flattened array length from the array shape
  118. flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
  119. # create shared array for storing the full frame image data
  120. shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
  121. # shape current frame so it can be treated as an image
  122. frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
  123. capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [obj['frame_time'] for obj in shared_memory_objects], frame_shape))
  124. capture_process.daemon = True
  125. detection_processes = []
  126. for index, region in enumerate(regions):
  127. detection_process = mp.Process(target=process_frames, args=(shared_arr,
  128. shared_memory_objects[index]['output_array'],
  129. shared_memory_objects[index]['frame_time'],
  130. shared_memory_objects[index]['motion_detected'],
  131. frame_shape,
  132. region['size'], region['x_offset'], region['y_offset']))
  133. detection_process.daemon = True
  134. detection_processes.append(detection_process)
  135. motion_processes = []
  136. for index, region in enumerate(regions):
  137. motion_process = mp.Process(target=detect_motion, args=(shared_arr,
  138. shared_memory_objects[index]['frame_time'],
  139. shared_memory_objects[index]['motion_detected'],
  140. frame_shape,
  141. region['size'], region['x_offset'], region['y_offset']))
  142. motion_process.daemon = True
  143. motion_processes.append(motion_process)
  144. object_parser = ObjectParser([obj['output_array'] for obj in shared_memory_objects])
  145. # object_parser.start()
  146. capture_process.start()
  147. print("capture_process pid ", capture_process.pid)
  148. # for detection_process in detection_processes:
  149. # detection_process.start()
  150. # print("detection_process pid ", detection_process.pid)
  151. for motion_process in motion_processes:
  152. motion_process.start()
  153. print("motion_process pid ", motion_process.pid)
  154. # app = Flask(__name__)
  155. # @app.route('/')
  156. # def index():
  157. # # return a multipart response
  158. # return Response(imagestream(),
  159. # mimetype='multipart/x-mixed-replace; boundary=frame')
  160. # def imagestream():
  161. # global DETECTED_OBJECTS
  162. # while True:
  163. # # max out at 5 FPS
  164. # time.sleep(0.2)
  165. # # make a copy of the current detected objects
  166. # detected_objects = DETECTED_OBJECTS.copy()
  167. # # make a copy of the current frame
  168. # frame = frame_arr.copy()
  169. # # convert to RGB for drawing
  170. # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  171. # # draw the bounding boxes on the screen
  172. # for obj in DETECTED_OBJECTS:
  173. # vis_util.draw_bounding_box_on_image_array(frame,
  174. # obj['ymin'],
  175. # obj['xmin'],
  176. # obj['ymax'],
  177. # obj['xmax'],
  178. # color='red',
  179. # thickness=2,
  180. # display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
  181. # use_normalized_coordinates=False)
  182. # for region in regions:
  183. # cv2.rectangle(frame, (region['x_offset'], region['y_offset']),
  184. # (region['x_offset']+region['size'], region['y_offset']+region['size']),
  185. # (255,255,255), 2)
  186. # # convert back to BGR
  187. # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
  188. # # encode the image into a jpg
  189. # ret, jpg = cv2.imencode('.jpg', frame)
  190. # yield (b'--frame\r\n'
  191. # b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
  192. # app.run(host='0.0.0.0', debug=False)
  193. capture_process.join()
  194. # for detection_process in detection_processes:
  195. # detection_process.join()
  196. for motion_process in motion_processes:
  197. motion_process.join()
  198. # object_parser.join()
  199. # convert shared memory array into numpy array
  200. def tonumpyarray(mp_arr):
  201. return np.frombuffer(mp_arr.get_obj(), dtype=np.uint16)
  202. # fetch the frames as fast a possible, only decoding the frames when the
  203. # detection_process has consumed the current frame
  204. def fetch_frames(shared_arr, shared_frame_times, frame_shape):
  205. # convert shared memory array into numpy and shape into image array
  206. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  207. # start the video capture
  208. video = cv2.VideoCapture(RTSP_URL)
  209. # keep the buffer small so we minimize old data
  210. video.set(cv2.CAP_PROP_BUFFERSIZE,1)
  211. while True:
  212. # grab the frame, but dont decode it yet
  213. ret = video.grab()
  214. # snapshot the time the frame was grabbed
  215. frame_time = datetime.datetime.now()
  216. if ret:
  217. # if the detection_process is ready for the next frame decode it
  218. # otherwise skip this frame and move onto the next one
  219. if all(shared_frame_time.value == 0.0 for shared_frame_time in shared_frame_times):
  220. # go ahead and decode the current frame
  221. ret, frame = video.retrieve()
  222. if ret:
  223. arr[:] = frame
  224. # signal to the detection_processes by setting the shared_frame_time
  225. for shared_frame_time in shared_frame_times:
  226. shared_frame_time.value = frame_time.timestamp()
  227. else:
  228. # sleep a little to reduce CPU usage
  229. time.sleep(0.01)
  230. video.release()
  231. # do the actual object detection
  232. def process_frames(shared_arr, shared_output_arr, shared_frame_time, shared_motion, frame_shape, region_size, region_x_offset, region_y_offset):
  233. # shape shared input array into frame for processing
  234. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  235. # Load a (frozen) Tensorflow model into memory before the processing loop
  236. detection_graph = tf.Graph()
  237. with detection_graph.as_default():
  238. od_graph_def = tf.GraphDef()
  239. with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
  240. serialized_graph = fid.read()
  241. od_graph_def.ParseFromString(serialized_graph)
  242. tf.import_graph_def(od_graph_def, name='')
  243. sess = tf.Session(graph=detection_graph)
  244. no_frames_available = -1
  245. while True:
  246. # if there is no motion detected
  247. if shared_motion.value == 0:
  248. time.sleep(0.01)
  249. continue
  250. # if there isnt a frame ready for processing
  251. if shared_frame_time.value == 0.0:
  252. # save the first time there were no frames available
  253. if no_frames_available == -1:
  254. no_frames_available = datetime.datetime.now().timestamp()
  255. # if there havent been any frames available in 30 seconds,
  256. # sleep to avoid using so much cpu if the camera feed is down
  257. if no_frames_available > 0 and (datetime.datetime.now().timestamp() - no_frames_available) > 30:
  258. time.sleep(1)
  259. print("sleeping because no frames have been available in a while")
  260. else:
  261. # rest a little bit to avoid maxing out the CPU
  262. time.sleep(0.01)
  263. continue
  264. # we got a valid frame, so reset the timer
  265. no_frames_available = -1
  266. # if the frame is more than 0.5 second old, discard it
  267. if (datetime.datetime.now().timestamp() - shared_frame_time.value) > 0.5:
  268. # signal that we need a new frame
  269. shared_frame_time.value = 0.0
  270. # rest a little bit to avoid maxing out the CPU
  271. time.sleep(0.01)
  272. continue
  273. # make a copy of the cropped frame
  274. cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy()
  275. frame_time = shared_frame_time.value
  276. # signal that the frame has been used so a new one will be ready
  277. shared_frame_time.value = 0.0
  278. # convert to RGB
  279. cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
  280. # do the object detection
  281. objects = detect_objects(cropped_frame_rgb, sess, detection_graph, region_size, region_x_offset, region_y_offset)
  282. # copy the detected objects to the output array, filling the array when needed
  283. shared_output_arr[:] = objects + [0.0] * (60-len(objects))
  284. # do the actual object detection
  285. def detect_motion(shared_arr, shared_frame_time, shared_motion, frame_shape, region_size, region_x_offset, region_y_offset):
  286. # shape shared input array into frame for processing
  287. arr = tonumpyarray(shared_arr).reshape(frame_shape)
  288. no_frames_available = -1
  289. avg_frame = None
  290. last_motion = -1
  291. while True:
  292. now = datetime.datetime.now().timestamp()
  293. # if it has been 30 seconds since the last motion, clear the flag
  294. if last_motion > 0 and (now - last_motion) > 30:
  295. last_motion = -1
  296. shared_motion.value = 0
  297. print("motion cleared")
  298. # if there isnt a frame ready for processing
  299. if shared_frame_time.value == 0.0:
  300. # save the first time there were no frames available
  301. if no_frames_available == -1:
  302. no_frames_available = now
  303. # if there havent been any frames available in 30 seconds,
  304. # sleep to avoid using so much cpu if the camera feed is down
  305. if no_frames_available > 0 and (now - no_frames_available) > 30:
  306. time.sleep(1)
  307. print("sleeping because no frames have been available in a while")
  308. else:
  309. # rest a little bit to avoid maxing out the CPU
  310. time.sleep(0.01)
  311. continue
  312. # we got a valid frame, so reset the timer
  313. no_frames_available = -1
  314. # if the frame is more than 0.5 second old, discard it
  315. if (now - shared_frame_time.value) > 0.5:
  316. # signal that we need a new frame
  317. shared_frame_time.value = 0.0
  318. # rest a little bit to avoid maxing out the CPU
  319. time.sleep(0.01)
  320. continue
  321. # make a copy of the cropped frame
  322. cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy()
  323. frame_time = shared_frame_time.value
  324. # signal that the frame has been used so a new one will be ready
  325. shared_frame_time.value = 0.0
  326. # convert to grayscale
  327. gray = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2GRAY)
  328. # convert to uint8
  329. gray = (gray/256).astype('uint8')
  330. # apply gaussian blur
  331. gray = cv2.GaussianBlur(gray, (21, 21), 0)
  332. if avg_frame is None:
  333. avg_frame = gray.copy().astype("float")
  334. continue
  335. # look at the delta from the avg_frame
  336. cv2.accumulateWeighted(gray, avg_frame, 0.5)
  337. frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg_frame))
  338. thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1]
  339. # dilate the thresholded image to fill in holes, then find contours
  340. # on thresholded image
  341. thresh = cv2.dilate(thresh, None, iterations=2)
  342. cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
  343. cv2.CHAIN_APPROX_SIMPLE)
  344. cnts = imutils.grab_contours(cnts)
  345. # loop over the contours
  346. for c in cnts:
  347. # if the contour is too small, ignore it
  348. if cv2.contourArea(c) < 50:
  349. continue
  350. print("motion_detected")
  351. last_motion = now
  352. shared_motion.value = 1
  353. # compute the bounding box for the contour, draw it on the frame,
  354. # and update the text
  355. (x, y, w, h) = cv2.boundingRect(c)
  356. cv2.rectangle(cropped_frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
  357. cv2.imwrite("motion%d.png" % frame_time, cropped_frame)
  358. if __name__ == '__main__':
  359. mp.freeze_support()
  360. main()