浏览代码

working dynamic regions, but messy

Blake Blackshear 5 年之前
父节点
当前提交
9cc46a71cb
共有 6 个文件被更改,包括 569 次插入99 次删除
  1. 2 1
      Dockerfile
  2. 2 1
      frigate/mqtt.py
  3. 8 6
      frigate/object_detection.py
  4. 446 74
      frigate/objects.py
  5. 38 7
      frigate/util.py
  6. 73 10
      frigate/video.py

+ 2 - 1
Dockerfile

@@ -101,7 +101,8 @@ RUN  pip install -U pip \
  Flask \
  paho-mqtt \
  PyYAML \
- matplotlib
+ matplotlib \
+ scipy
 
 WORKDIR /opt/frigate/
 ADD frigate frigate/

+ 2 - 1
frigate/mqtt.py

@@ -3,6 +3,7 @@ import cv2
 import threading
 import prctl
 from collections import Counter, defaultdict
+import itertools
 
 class MqttObjectPublisher(threading.Thread):
     def __init__(self, client, topic_prefix, objects_parsed, detected_objects, best_frames):
@@ -26,7 +27,7 @@ class MqttObjectPublisher(threading.Thread):
 
             # total up all scores by object type
             obj_counter = Counter()
-            for obj in detected_objects:
+            for obj in itertools.chain.from_iterable(detected_objects.values()):
                 obj_counter[obj['name']] += obj['score']
             
             # report on detected objects

+ 8 - 6
frigate/object_detection.py

@@ -31,7 +31,7 @@ class PreppedQueueProcessor(threading.Thread):
             frame = self.prepped_frame_queue.get()
 
             # Actual detection.
-            frame['detected_objects'] = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=5)
+            frame['detected_objects'] = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.4, top_k=5)
             self.fps.update()
             self.avg_inference_speed = (self.avg_inference_speed*9 + self.engine.get_inference_time())/10
 
@@ -56,8 +56,10 @@ class RegionRequester(threading.Thread):
             # make a copy of the frame_time
             frame_time = self.camera.frame_time.value
 
+            with self.camera.regions_in_process_lock:
+                self.camera.regions_in_process[frame_time] = len(self.camera.config['regions'])
+
             for index, region in enumerate(self.camera.config['regions']):
-                # queue with priority 1
                 self.camera.resize_queue.put({
                     'camera_name': self.camera.name,
                     'frame_time': frame_time,
@@ -88,14 +90,14 @@ class RegionPrepper(threading.Thread):
 
             # make a copy of the region
             cropped_frame = frame[resize_request['y_offset']:resize_request['y_offset']+resize_request['size'], resize_request['x_offset']:resize_request['x_offset']+resize_request['size']].copy()
-            
+
             # Resize to 300x300 if needed
             if cropped_frame.shape != (300, 300, 3):
+                # TODO: use Pillow-SIMD?
                 cropped_frame = cv2.resize(cropped_frame, dsize=(300, 300), interpolation=cv2.INTER_LINEAR)
             # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3]
             frame_expanded = np.expand_dims(cropped_frame, axis=0)
 
             # add the frame to the queue
-            if not self.prepped_frame_queue.full():
-                resize_request['frame'] = frame_expanded.flatten().copy()
-                self.prepped_frame_queue.put(resize_request)
+            resize_request['frame'] = frame_expanded.flatten().copy()
+            self.prepped_frame_queue.put(resize_request)

+ 446 - 74
frigate/objects.py

@@ -3,8 +3,10 @@ import datetime
 import threading
 import cv2
 import prctl
+import itertools
 import numpy as np
-from . util import draw_box_with_label, LABELS
+from scipy.spatial import distance as dist
+from . util import draw_box_with_label, LABELS, compute_intersection_rectangle, compute_intersection_over_union, calculate_region
 
 class ObjectCleaner(threading.Thread):
     def __init__(self, objects_parsed, detected_objects):
@@ -25,14 +27,13 @@ class ObjectCleaner(threading.Thread):
             # (newest objects are appended to the end)
             detected_objects = self._detected_objects.copy()
 
-            num_to_delete = 0
-            for obj in detected_objects:
-                if now-obj['frame_time']<2:
-                    break
-                num_to_delete += 1
-            if num_to_delete > 0:
-                del self._detected_objects[:num_to_delete]
+            objects_removed = False
+            for frame_time in detected_objects.keys():
+                if now-frame_time>2:
+                    del self._detected_objects[frame_time]
+                    objects_removed = True
 
+            if objects_removed:
                 # notify that parsed objects were changed
                 with self._objects_parsed:
                     self._objects_parsed.notify_all()
@@ -49,88 +50,459 @@ class DetectedObjectsProcessor(threading.Thread):
 
             objects = frame['detected_objects']
 
-            if len(objects) == 0:
-                return
+            # print(f"Processing objects for: {frame['size']} {frame['x_offset']} {frame['y_offset']}")
+
+            # if len(objects) == 0:
+            #     continue
 
             for raw_obj in objects:
                 obj = {
-                    'score': float(raw_obj.score),
-                    'box': raw_obj.bounding_box.flatten().tolist(),
                     'name': str(LABELS[raw_obj.label_id]),
+                    'score': float(raw_obj.score),
+                    'box': {
+                        'xmin': int((raw_obj.bounding_box[0][0] * frame['size']) + frame['x_offset']),
+                        'ymin': int((raw_obj.bounding_box[0][1] * frame['size']) + frame['y_offset']),
+                        'xmax': int((raw_obj.bounding_box[1][0] * frame['size']) + frame['x_offset']),
+                        'ymax': int((raw_obj.bounding_box[1][1] * frame['size']) + frame['y_offset'])
+                    },
+                    'region': {
+                        'xmin': frame['x_offset'],
+                        'ymin': frame['y_offset'],
+                        'xmax': frame['x_offset']+frame['size'],
+                        'ymax': frame['y_offset']+frame['size']
+                    },
                     'frame_time': frame['frame_time'],
                     'region_id': frame['region_id']
                 }
 
-                # find the matching region
-                region = self.camera.regions[frame['region_id']]
-
-                # Compute some extra properties
-                obj.update({
-                    'xmin': int((obj['box'][0] * frame['size']) + frame['x_offset']),
-                    'ymin': int((obj['box'][1] * frame['size']) + frame['y_offset']),
-                    'xmax': int((obj['box'][2] * frame['size']) + frame['x_offset']),
-                    'ymax': int((obj['box'][3] * frame['size']) + frame['y_offset'])
-                })
+                if not obj['name'] == 'bicycle':
+                    continue
+                
+                # if the object is within 5 pixels of the region border, and the region is not on the edge
+                # consider the object to be clipped
+                obj['clipped'] = False
+                if ((obj['region']['xmin'] > 5 and obj['box']['xmin']-obj['region']['xmin'] <= 5) or 
+                    (obj['region']['ymin'] > 5 and obj['box']['ymin']-obj['region']['ymin'] <= 5) or
+                    (self.camera.frame_shape[1]-obj['region']['xmax'] > 5 and obj['region']['xmax']-obj['box']['xmax'] <= 5) or
+                    (self.camera.frame_shape[0]-obj['region']['ymax'] > 5 and obj['region']['ymax']-obj['box']['ymax'] <= 5)):
+                    obj['clipped'] = True
                 
                 # Compute the area
-                obj['area'] = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin'])
+                obj['area'] = (obj['box']['xmax']-obj['box']['xmin'])*(obj['box']['ymax']-obj['box']['ymin'])
 
-                object_name = obj['name']
+                # find the matching region
+                # region = self.camera.regions[frame['region_id']]
+                
 
-                if object_name in region['objects']:
-                    obj_settings = region['objects'][object_name]
+                # object_name = obj['name']
+                # TODO: move all this to wherever we manage "tracked objects"
+                # if object_name in region['objects']:
+                #     obj_settings = region['objects'][object_name]
 
-                    # if the min area is larger than the
-                    # detected object, don't add it to detected objects
-                    if obj_settings.get('min_area',-1) > obj['area']:
-                        continue
+                #     # if the min area is larger than the
+                #     # detected object, don't add it to detected objects
+                #     if obj_settings.get('min_area',-1) > obj['area']:
+                #         continue
                     
-                    # if the detected object is larger than the
-                    # max area, don't add it to detected objects
-                    if obj_settings.get('max_area', region['size']**2) < obj['area']:
-                        continue
-
-                    # if the score is lower than the threshold, skip
-                    if obj_settings.get('threshold', 0) > obj['score']:
-                        continue
-                
-                    # compute the coordinates of the object and make sure
-                    # the location isnt outside the bounds of the image (can happen from rounding)
-                    y_location = min(int(obj['ymax']), len(self.mask)-1)
-                    x_location = min(int((obj['xmax']-obj['xmin'])/2.0)+obj['xmin'], len(self.mask[0])-1)
-
-                    # if the object is in a masked location, don't add it to detected objects
-                    if self.camera.mask[y_location][x_location] == [0]:
-                        continue
+                #     # if the detected object is larger than the
+                #     # max area, don't add it to detected objects
+                #     if obj_settings.get('max_area', region['size']**2) < obj['area']:
+                #         continue
+
+                #     # if the score is lower than the threshold, skip
+                #     if obj_settings.get('threshold', 0) > obj['score']:
+                #         continue
                 
-                # look to see if the bounding box is too close to the region border and the region border is not the edge of the frame
-                # if ((frame['x_offset'] > 0 and obj['box'][0] < 0.01) or 
-                #     (frame['y_offset'] > 0 and obj['box'][1] < 0.01) or
-                #     (frame['x_offset']+frame['size'] < self.frame_shape[1] and obj['box'][2] > 0.99) or
-                #     (frame['y_offset']+frame['size'] < self.frame_shape[0] and obj['box'][3] > 0.99)):
-
-                #     size, x_offset, y_offset = calculate_region(self.frame_shape, obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'])
-                    # This triggers WAY too often with stationary objects on the edge of a region. 
-                    # Every frame triggers it and fills the queue...
-                    # I need to create a new region and add it to the list of regions, but 
-                    # it needs to check for a duplicate region first.
-
-                    # self.resize_queue.put({
-                    #     'camera_name': self.name,
-                    #     'frame_time': frame['frame_time'],
-                    #     'region_id': frame['region_id'],
-                    #     'size': size,
-                    #     'x_offset': x_offset,
-                    #     'y_offset': y_offset
-                    # })
-                    # print('object too close to region border')
-                    #continue
-
-                self.camera.detected_objects.append(obj)
+                #     # compute the coordinates of the object and make sure
+                #     # the location isnt outside the bounds of the image (can happen from rounding)
+                #     y_location = min(int(obj['ymax']), len(self.mask)-1)
+                #     x_location = min(int((obj['xmax']-obj['xmin'])/2.0)+obj['xmin'], len(self.mask[0])-1)
+
+                #     # if the object is in a masked location, don't add it to detected objects
+                #     if self.camera.mask[y_location][x_location] == [0]:
+                #         continue
+
+                # see if the current object is a duplicate
+                # TODO: still need to decide which copy to keep
+                obj['duplicate'] = False
+                for existing_obj in self.camera.detected_objects[frame['frame_time']]:
+                    # compute intersection rectangle with existing object and new objects region
+                    existing_obj_current_region = compute_intersection_rectangle(existing_obj['box'], obj['region'])
+
+                    # compute intersection rectangle with new object and existing objects region
+                    new_obj_existing_region = compute_intersection_rectangle(obj['box'], existing_obj['region'])
+
+                    # compute iou for the two intersection rectangles that were just computed
+                    iou = compute_intersection_over_union(existing_obj_current_region, new_obj_existing_region)
+
+                    # if intersection is greater than ?, flag as duplicate
+                    if iou > .7:
+                        obj['duplicate'] = True
+                        break
+
+                self.camera.detected_objects[frame['frame_time']].append(obj)
+            
+            with self.camera.regions_in_process_lock:
+                self.camera.regions_in_process[frame['frame_time']] -= 1
+                # print(f"Remaining regions for {frame['frame_time']}: {self.camera.regions_in_process[frame['frame_time']]}")
+
+                if self.camera.regions_in_process[frame['frame_time']] == 0:
+                    del self.camera.regions_in_process[frame['frame_time']]
+                    # print('Finished frame: ', frame['frame_time'])
+                    self.camera.finished_frame_queue.put(frame['frame_time'])
 
             with self.camera.objects_parsed:
                 self.camera.objects_parsed.notify_all()
 
+# Thread that checks finished frames for clipped objects and sends back
+# for processing if needed
+class RegionRefiner(threading.Thread):
+    def __init__(self, camera):
+        threading.Thread.__init__(self)
+        self.camera = camera
+
+    def run(self):
+        prctl.set_name(self.__class__.__name__)
+        while True:
+            # TODO: I need to process the frames in order for tracking...
+            frame_time = self.camera.finished_frame_queue.get()
+
+            # print(f"{frame_time} finished")
+
+            object_groups = []
+
+            # group all the duplicate objects together
+            # TODO: should I be grouping by object type too? also, the order can determine how well they group...
+            for new_obj in self.camera.detected_objects[frame_time]:
+                matching_group = self.find_group(new_obj, object_groups)
+                if matching_group is None:
+                    object_groups.append([new_obj])
+                else:
+                    object_groups[matching_group].append(new_obj)
+            
+            # just keep the unclipped objects
+            self.camera.detected_objects[frame_time] = [obj for obj in self.camera.detected_objects[frame_time] if obj['clipped'] == False]
+
+            # print(f"{frame_time} found {len(object_groups)} groups {object_groups}")
+            clipped_object = False
+            # deduped_objects = []
+            # find the largest unclipped object in each group
+            for group in object_groups:
+                unclipped_objects = [obj for obj in group if obj['clipped'] == False]
+                # if no unclipped objects, we need to look again
+                if len(unclipped_objects) == 0:
+                    # print(f"{frame_time} no unclipped objects in group")
+                    with self.camera.regions_in_process_lock:
+                        if not frame_time in self.camera.regions_in_process:
+                            self.camera.regions_in_process[frame_time] = 1
+                        else:
+                            self.camera.regions_in_process[frame_time] += 1
+                    xmin = min([obj['box']['xmin'] for obj in group])
+                    ymin = min([obj['box']['ymin'] for obj in group])
+                    xmax = max([obj['box']['xmax'] for obj in group])
+                    ymax = max([obj['box']['ymax'] for obj in group])
+                    # calculate a new region that will hopefully get the entire object
+                    (size, x_offset, y_offset) = calculate_region(self.camera.frame_shape, 
+                        xmin, ymin,
+                        xmax, ymax)
+                    # print(f"{frame_time} new region: {size} {x_offset} {y_offset}")
+
+                    # add it to the queue
+                    self.camera.resize_queue.put({
+                        'camera_name': self.camera.name,
+                        'frame_time': frame_time,
+                        'region_id': -1,
+                        'size': size,
+                        'x_offset': x_offset,
+                        'y_offset': y_offset
+                    })
+                    self.camera.dynamic_region_fps.update()
+                    clipped_object = True
+
+                # add the largest unclipped object
+                # TODO: this makes no sense
+                # deduped_objects.append(max(unclipped_objects, key=lambda obj: obj['area']))
+
+            # if we found a clipped object, then this frame is not ready for processing
+            if clipped_object:
+                continue
+            
+            # print(f"{frame_time} is actually finished")
+            # self.camera.detected_objects[frame_time] = deduped_objects
+
+            # keep adding frames to the refined queue as long as they are finished
+            with self.camera.regions_in_process_lock:
+                while self.camera.frame_queue.qsize() > 0 and self.camera.frame_queue.queue[0] not in self.camera.regions_in_process:
+                    self.camera.refined_frame_queue.put(self.camera.frame_queue.get())
+             
+    def has_overlap(self, new_obj, obj, overlap=0):
+        # compute intersection rectangle with existing object and new objects region
+        existing_obj_current_region = compute_intersection_rectangle(obj['box'], new_obj['region'])
+
+        # compute intersection rectangle with new object and existing objects region
+        new_obj_existing_region = compute_intersection_rectangle(new_obj['box'], obj['region'])
+
+        # compute iou for the two intersection rectangles that were just computed
+        iou = compute_intersection_over_union(existing_obj_current_region, new_obj_existing_region)
+
+        # if intersection is greater than overlap
+        if iou > overlap:
+            return True
+        else:
+            return False
+    
+    def find_group(self, new_obj, groups):
+        for index, group in enumerate(groups):
+            for obj in group:
+                if self.has_overlap(new_obj, obj):
+                    return index
+        return None
+
+class ObjectTracker(threading.Thread):
+    def __init__(self, camera, max_disappeared):
+        threading.Thread.__init__(self)
+        self.camera = camera
+        self.tracked_objects = {}
+        self.disappeared = {}
+        self.max_disappeared = max_disappeared
+    
+    def run(self):
+        prctl.set_name(self.__class__.__name__)
+        while True:
+            # TODO: track objects
+            frame_time = self.camera.refined_frame_queue.get()
+            f = open(f"/debug/{str(frame_time)}.jpg", 'wb')
+            f.write(self.camera.frame_with_objects(frame_time))
+            f.close()
+
+
+    def register(self, index, obj):
+        id = f"{str(obj.frame_time)}-{index}"
+        self.tracked_objects[id] = obj
+        self.disappeared[id] = 0
+
+    def deregister(self, id):
+        del self.disappeared[id]
+        del self.tracked_objects[id]
+    
+    def update(self, id, new_obj):
+        new_obj.detections = self.tracked_objects[id].detections
+        new_obj.detections.append({
+
+        })
+
+    def match_and_update(self, new_objects):
+        # check to see if the list of input bounding box rectangles
+        # is empty
+        if len(new_objects) == 0:
+            # loop over any existing tracked objects and mark them
+            # as disappeared
+            for objectID in list(self.disappeared.keys()):
+                self.disappeared[objectID] += 1
+
+                # if we have reached a maximum number of consecutive
+                # frames where a given object has been marked as
+                # missing, deregister it
+                if self.disappeared[objectID] > self.max_disappeared:
+                    self.deregister(objectID)
+
+            # return early as there are no centroids or tracking info
+            # to update
+            return
+
+        # compute centroids
+        for obj in new_objects:
+            centroid_x = int((obj['box']['xmin']+obj['box']['xmax']) / 2.0)
+            centroid_y = int((obj['box']['ymin']+obj['box']['ymax']) / 2.0)
+            obj.centroid = (centroid_x, centroid_y)
+
+        if len(self.tracked_objects) == 0:
+            for index, obj in enumerate(new_objects):
+                self.register(index, obj)
+            return
+        
+        new_centroids = np.array([o.centroid for o in new_objects])
+        current_ids = list(self.tracked_objects.keys())
+        current_centroids = np.array([o.centroid for o in self.tracked_objects])
+
+        # compute the distance between each pair of tracked
+        # centroids and new centroids, respectively -- our
+        # goal will be to match each new centroid to an existing
+        # object centroid
+        D = dist.cdist(current_centroids, new_centroids)
+
+        # in order to perform this matching we must (1) find the
+        # smallest value in each row and then (2) sort the row
+        # indexes based on their minimum values so that the row
+        # with the smallest value is at the *front* of the index
+        # list
+        rows = D.min(axis=1).argsort()
+
+        # next, we perform a similar process on the columns by
+        # finding the smallest value in each column and then
+        # sorting using the previously computed row index list
+        cols = D.argmin(axis=1)[rows]
+
+        # in order to determine if we need to update, register,
+        # or deregister an object we need to keep track of which
+        # of the rows and column indexes we have already examined
+        usedRows = set()
+        usedCols = set()
+
+        # loop over the combination of the (row, column) index
+        # tuples
+        for (row, col) in zip(rows, cols):
+            # if we have already examined either the row or
+            # column value before, ignore it
+            # val
+            if row in usedRows or col in usedCols:
+                continue
+
+            # otherwise, grab the object ID for the current row,
+            # set its new centroid, and reset the disappeared
+            # counter
+            objectID = current_ids[row]
+            self.update(objectID, new_objects[col])
+            self.disappeared[objectID] = 0
+
+            # indicate that we have examined each of the row and
+            # column indexes, respectively
+            usedRows.add(row)
+            usedCols.add(col)
+
+        # compute both the row and column index we have NOT yet
+        # examined
+        unusedRows = set(range(0, D.shape[0])).difference(usedRows)
+        unusedCols = set(range(0, D.shape[1])).difference(usedCols)
+
+        # in the event that the number of object centroids is
+        # equal or greater than the number of input centroids
+        # we need to check and see if some of these objects have
+        # potentially disappeared
+        if D.shape[0] >= D.shape[1]:
+            # loop over the unused row indexes
+            for row in unusedRows:
+                # grab the object ID for the corresponding row
+                # index and increment the disappeared counter
+                objectID = current_ids[row]
+                self.disappeared[objectID] += 1
+
+                # check to see if the number of consecutive
+                # frames the object has been marked "disappeared"
+                # for warrants deregistering the object
+                if self.disappeared[objectID] > self.max_disappeared:
+                    self.deregister(objectID)
+
+        # otherwise, if the number of input centroids is greater
+        # than the number of existing object centroids we need to
+        # register each new input centroid as a trackable object
+        else:
+            for col in unusedCols:
+                self.register(col, new_objects[col])
+
+
+        # -------------
+
+        # # initialize an array of input centroids for the current frame
+        # inputCentroids = np.zeros((len(rects), 2), dtype="int")
+
+        # # loop over the bounding box rectangles
+        # for (i, (startX, startY, endX, endY)) in enumerate(rects):
+        #     # use the bounding box coordinates to derive the centroid
+        #     cX = int((startX + endX) / 2.0)
+        #     cY = int((startY + endY) / 2.0)
+        #     inputCentroids[i] = (cX, cY)
+
+        # # if we are currently not tracking any objects take the input
+        # # centroids and register each of them
+        # if len(self.objects) == 0:
+        #     for i in range(0, len(inputCentroids)):
+        #         self.register(inputCentroids[i])
+        # # otherwise, are are currently tracking objects so we need to
+        # # try to match the input centroids to existing object
+        # # centroids
+        # else:
+        #     # grab the set of object IDs and corresponding centroids
+        #     objectIDs = list(self.objects.keys())
+        #     objectCentroids = list(self.objects.values())
+
+        #     # compute the distance between each pair of object
+        #     # centroids and input centroids, respectively -- our
+        #     # goal will be to match an input centroid to an existing
+        #     # object centroid
+        #     D = dist.cdist(np.array(objectCentroids), inputCentroids)
+
+        #     # in order to perform this matching we must (1) find the
+        #     # smallest value in each row and then (2) sort the row
+        #     # indexes based on their minimum values so that the row
+        #     # with the smallest value is at the *front* of the index
+        #     # list
+        #     rows = D.min(axis=1).argsort()
+
+        #     # next, we perform a similar process on the columns by
+        #     # finding the smallest value in each column and then
+        #     # sorting using the previously computed row index list
+        #     cols = D.argmin(axis=1)[rows]
+
+        #     # in order to determine if we need to update, register,
+        #     # or deregister an object we need to keep track of which
+        #     # of the rows and column indexes we have already examined
+        #     usedRows = set()
+        #     usedCols = set()
+
+        #     # loop over the combination of the (row, column) index
+        #     # tuples
+        #     for (row, col) in zip(rows, cols):
+        #         # if we have already examined either the row or
+        #         # column value before, ignore it
+        #         # val
+        #         if row in usedRows or col in usedCols:
+        #             continue
+
+        #         # otherwise, grab the object ID for the current row,
+        #         # set its new centroid, and reset the disappeared
+        #         # counter
+        #         objectID = objectIDs[row]
+        #         self.objects[objectID] = inputCentroids[col]
+        #         self.disappeared[objectID] = 0
+
+        #         # indicate that we have examined each of the row and
+        #         # column indexes, respectively
+        #         usedRows.add(row)
+        #         usedCols.add(col)
+
+        #     # compute both the row and column index we have NOT yet
+        #     # examined
+        #     unusedRows = set(range(0, D.shape[0])).difference(usedRows)
+        #     unusedCols = set(range(0, D.shape[1])).difference(usedCols)
+
+        #     # in the event that the number of object centroids is
+        #     # equal or greater than the number of input centroids
+        #     # we need to check and see if some of these objects have
+        #     # potentially disappeared
+        #     if D.shape[0] >= D.shape[1]:
+        #         # loop over the unused row indexes
+        #         for row in unusedRows:
+        #             # grab the object ID for the corresponding row
+        #             # index and increment the disappeared counter
+        #             objectID = objectIDs[row]
+        #             self.disappeared[objectID] += 1
+
+        #             # check to see if the number of consecutive
+        #             # frames the object has been marked "disappeared"
+        #             # for warrants deregistering the object
+        #             if self.disappeared[objectID] > self.maxDisappeared:
+        #                 self.deregister(objectID)
+
+        #     # otherwise, if the number of input centroids is greater
+        #     # than the number of existing object centroids we need to
+        #     # register each new input centroid as a trackable object
+        #     else:
+        #         for col in unusedCols:
+        #             self.register(inputCentroids[col])
+
+        # # return the set of trackable objects
+        # return self.objects
 
 # Maintains the frame and object with the highest score
 class BestFrames(threading.Thread):
@@ -153,7 +525,7 @@ class BestFrames(threading.Thread):
             # make a copy of detected objects
             detected_objects = self.detected_objects.copy()
 
-            for obj in detected_objects:
+            for obj in itertools.chain.from_iterable(detected_objects.values()):
                 if obj['name'] in self.best_objects:
                     now = datetime.datetime.now().timestamp()
                     # if the object is a higher score than the current best score 
@@ -170,8 +542,8 @@ class BestFrames(threading.Thread):
                 if obj['frame_time'] in recent_frames:
                     best_frame = recent_frames[obj['frame_time']] #, np.zeros((720,1280,3), np.uint8))
 
-                    draw_box_with_label(best_frame, obj['xmin'], obj['ymin'], 
-                        obj['xmax'], obj['ymax'], obj['name'], obj['score'], obj['area'])
+                    draw_box_with_label(best_frame, obj['box']['xmin'], obj['box']['ymin'], 
+                        obj['box']['xmax'], obj['box']['ymax'], obj['name'], f"{int(obj['score']*100)}% {obj['area']}")
                     
                     # print a timestamp
                     time_to_show = datetime.datetime.fromtimestamp(obj['frame_time']).strftime("%m/%d/%Y %H:%M:%S")

+ 38 - 7
frigate/util.py

@@ -16,22 +16,22 @@ def ReadLabelFile(file_path):
     return ret
 
 def calculate_region(frame_shape, xmin, ymin, xmax, ymax):    
-    # size is 50% larger than longest edge
-    size = max(xmax-xmin, ymax-ymin)
+    # size is larger than longest edge
+    size = int(max(xmax-xmin, ymax-ymin)*1.5)
     # if the size is too big to fit in the frame
     if size > min(frame_shape[0], frame_shape[1]):
         size = min(frame_shape[0], frame_shape[1])
 
     # x_offset is midpoint of bounding box minus half the size
-    x_offset = int(((xmax-xmin)/2+xmin)-size/2)
+    x_offset = int((xmax-xmin)/2.0+xmin-size/2.0)
     # if outside the image
     if x_offset < 0:
         x_offset = 0
     elif x_offset > (frame_shape[1]-size):
         x_offset = (frame_shape[1]-size)
 
-    # x_offset is midpoint of bounding box minus half the size
-    y_offset = int(((ymax-ymin)/2+ymin)-size/2)
+    # y_offset is midpoint of bounding box minus half the size
+    y_offset = int((ymax-ymin)/2.0+ymin-size/2.0)
     # if outside the image
     if y_offset < 0:
         y_offset = 0
@@ -40,13 +40,44 @@ def calculate_region(frame_shape, xmin, ymin, xmax, ymax):
 
     return (size, x_offset, y_offset)
 
+def compute_intersection_rectangle(box_a, box_b):
+    return {
+        'xmin': max(box_a['xmin'], box_b['xmin']),
+        'ymin': max(box_a['ymin'], box_b['ymin']),
+        'xmax': min(box_a['xmax'], box_b['xmax']),
+        'ymax': min(box_a['ymax'], box_b['ymax'])
+    }
+    
+def compute_intersection_over_union(box_a, box_b):
+    # determine the (x, y)-coordinates of the intersection rectangle
+    intersect = compute_intersection_rectangle(box_a, box_b)
+
+    # compute the area of intersection rectangle
+    inter_area = max(0, intersect['xmax'] - intersect['xmin'] + 1) * max(0, intersect['ymax'] - intersect['ymin'] + 1)
+
+    if inter_area == 0:
+        return 0.0
+    
+    # compute the area of both the prediction and ground-truth
+    # rectangles
+    box_a_area = (box_a['xmax'] - box_a['xmin'] + 1) * (box_a['ymax'] - box_a['ymin'] + 1)
+    box_b_area = (box_b['xmax'] - box_b['xmin'] + 1) * (box_b['ymax'] - box_b['ymin'] + 1)
+
+    # compute the intersection over union by taking the intersection
+    # area and dividing it by the sum of prediction + ground-truth
+    # areas - the interesection area
+    iou = inter_area / float(box_a_area + box_b_area - inter_area)
+
+    # return the intersection over union value
+    return iou
+
 # convert shared memory array into numpy array
 def tonumpyarray(mp_arr):
     return np.frombuffer(mp_arr.get_obj(), dtype=np.uint8)
 
-def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, score, area):
+def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, info):
     color = COLOR_MAP[label]
-    display_text = "{}: {}% {}".format(label,int(score*100),int(area))
+    display_text = "{}: {}".format(label, info)
     cv2.rectangle(frame, (x_min, y_min), 
         (x_max, y_max), 
         color, 2)

+ 73 - 10
frigate/video.py

@@ -9,10 +9,11 @@ import multiprocessing as mp
 import subprocess as sp
 import numpy as np
 import prctl
+import itertools
 from collections import defaultdict
 from . util import tonumpyarray, LABELS, draw_box_with_label, calculate_region, EventsPerSecond
 from . object_detection import RegionPrepper, RegionRequester
-from . objects import ObjectCleaner, BestFrames, DetectedObjectsProcessor
+from . objects import ObjectCleaner, BestFrames, DetectedObjectsProcessor, RegionRefiner, ObjectTracker
 from . mqtt import MqttObjectPublisher
 
 # Stores 2 seconds worth of frames so they can be used for other threads
@@ -24,7 +25,7 @@ class FrameTracker(threading.Thread):
         self.frame_ready = frame_ready
         self.frame_lock = frame_lock
         self.recent_frames = recent_frames
-
+    
     def run(self):
         prctl.set_name("FrameTracker")
         while True:
@@ -36,7 +37,7 @@ class FrameTracker(threading.Thread):
             # delete any old frames
             stored_frame_times = list(self.recent_frames.keys())
             for k in stored_frame_times:
-                if (now - k) > 2:
+                if (now - k) > 10:
                     del self.recent_frames[k]
 
 def get_frame_shape(source):
@@ -101,6 +102,7 @@ class CameraCapture(threading.Thread):
                     .reshape(self.camera.frame_shape)
                 )
                 self.camera.frame_cache[self.camera.frame_time.value] = self.camera.current_frame.copy()
+                self.camera.frame_queue.put(self.camera.frame_time.value)
             # Notify with the condition that a new frame is ready
             with self.camera.frame_ready:
                 self.camera.frame_ready.notify_all()
@@ -111,8 +113,17 @@ class Camera:
     def __init__(self, name, ffmpeg_config, global_objects_config, config, prepped_frame_queue, mqtt_client, mqtt_prefix):
         self.name = name
         self.config = config
-        self.detected_objects = []
+        self.detected_objects = defaultdict(lambda: [])
+        self.tracked_objects = []
         self.frame_cache = {}
+        # queue for re-assembling frames in order
+        self.frame_queue = queue.Queue()
+        # track how many regions have been requested for a frame so we know when a frame is complete
+        self.regions_in_process = {}
+        # Lock to control access
+        self.regions_in_process_lock = mp.Lock()
+        self.finished_frame_queue = queue.Queue()
+        self.refined_frame_queue = queue.Queue()
 
         self.ffmpeg = config.get('ffmpeg', {})
         self.ffmpeg_input = get_ffmpeg_input(self.ffmpeg['input'])
@@ -149,7 +160,7 @@ class Camera:
         self.detected_objects_queue = queue.Queue()
         self.detected_objects_processor = DetectedObjectsProcessor(self)
         self.detected_objects_processor.start()
-        
+
         # initialize the frame cache
         self.cached_frame_with_objects = {
             'frame_bytes': [],
@@ -193,6 +204,16 @@ class Camera:
         self.object_cleaner = ObjectCleaner(self.objects_parsed, self.detected_objects)
         self.object_cleaner.start()
 
+        # start a thread to refine regions when objects are clipped
+        self.dynamic_region_fps = EventsPerSecond()
+        self.region_refiner = RegionRefiner(self)
+        self.region_refiner.start()
+        self.dynamic_region_fps.start()
+
+        # start a thread to track objects
+        self.object_tracker = ObjectTracker(self, 10)
+        self.object_tracker.start()
+
         # start a thread to publish object scores
         mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self.objects_parsed, self.detected_objects, self.best_frames)
         mqtt_publisher.start()
@@ -270,12 +291,47 @@ class Camera:
     def stats(self):
         return {
             'camera_fps': self.fps.eps(60),
-            'resize_queue': self.resize_queue.qsize()
+            'resize_queue': self.resize_queue.qsize(),
+            'frame_queue': self.frame_queue.qsize(),
+            'finished_frame_queue': self.finished_frame_queue.qsize(),
+            'refined_frame_queue': self.refined_frame_queue.qsize(),
+            'regions_in_process': self.regions_in_process,
+            'dynamic_regions_per_sec': self.dynamic_region_fps.eps()
         }
     
+    def frame_with_objects(self, frame_time):
+        frame = self.frame_cache[frame_time].copy()
+
+        for region in self.regions:
+            color = (255,255,255)
+            cv2.rectangle(frame, (region['x_offset'], region['y_offset']), 
+                (region['x_offset']+region['size'], region['y_offset']+region['size']), 
+                color, 2)
+
+        # draw the bounding boxes on the screen
+        for obj in self.detected_objects[frame_time]:
+        # for obj in detected_objects[frame_time]:
+            cv2.rectangle(frame, (obj['region']['xmin'], obj['region']['ymin']), 
+                (obj['region']['xmax'], obj['region']['ymax']), 
+                (0,255,0), 1)
+            draw_box_with_label(frame, obj['box']['xmin'], obj['box']['ymin'], obj['box']['xmax'], obj['box']['ymax'], obj['name'], f"{int(obj['score']*100)}% {obj['area']} {obj['clipped']}")
+            
+        # print a timestamp
+        time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S")
+        cv2.putText(frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
+        
+        # print fps
+        cv2.putText(frame, str(self.fps.eps())+'FPS', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
+
+        # convert to BGR
+        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+
+        # encode the image into a jpg
+        ret, jpg = cv2.imencode('.jpg', frame)
+
+        return jpg.tobytes()
+
     def get_current_frame_with_objects(self):
-        # make a copy of the current detected objects
-        detected_objects = self.detected_objects.copy()
         # lock and make a copy of the current frame
         with self.frame_lock:
             frame = self.current_frame.copy()
@@ -284,9 +340,16 @@ class Camera:
         if frame_time == self.cached_frame_with_objects['frame_time']:
             return self.cached_frame_with_objects['frame_bytes']
 
+        # make a copy of the current detected objects
+        detected_objects = self.detected_objects.copy()
+
         # draw the bounding boxes on the screen
-        for obj in detected_objects:
-            draw_box_with_label(frame, obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], obj['name'], obj['score'], obj['area'])
+        for obj in [obj for frame_list in detected_objects.values() for obj in frame_list]:
+        # for obj in detected_objects[frame_time]:
+            draw_box_with_label(frame, obj['box']['xmin'], obj['box']['ymin'], obj['box']['xmax'], obj['box']['ymax'], obj['name'], f"{int(obj['score']*100)}% {obj['area']} {obj['clipped']}")
+            cv2.rectangle(frame, (obj['region']['xmin'], obj['region']['ymin']), 
+                (obj['region']['xmax'], obj['region']['ymax']), 
+                (0,255,0), 2)
 
         for region in self.regions:
             color = (255,255,255)