AnnasBlackHat commited on
Commit
25a2e4b
·
1 Parent(s): 738af6e
Files changed (3) hide show
  1. app.py +184 -0
  2. requirements.txt +70 -0
  3. util/sort.py +248 -0
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import cv2
4
+ from ultralytics import YOLO
5
+ from util.sort import Sort
6
+ import time
7
+ import psutil
8
+ import tempfile
9
+ import os
10
+ from pathlib import Path
11
+
12
+ def get_yolo_models():
13
+ models = {
14
+ 'YOLOv8': ['n', 'm', 'x'],
15
+ 'YOLOv9': ['t', 'm', 'e'], # as of 2024
16
+ 'YOLOv10': ['-N', '-M', '-X'],
17
+ 'YOLO11': ['n', 'm', 'x']
18
+ }
19
+
20
+ choices = []
21
+ for version, sizes in models.items():
22
+ v_num = version[4:] # extract number
23
+ choices.extend([f"{version}{size}.pt" for size in sizes])
24
+ return choices
25
+
26
+ def process_video(video_path, model_choice):
27
+ # Create temporary directory for outputs
28
+ temp_dir = tempfile.mkdtemp()
29
+ output_video_path = os.path.join(temp_dir, "output.mp4")
30
+ faces_dir = os.path.join(temp_dir, "faces")
31
+ os.makedirs(faces_dir, exist_ok=True)
32
+
33
+ # Initialize models and tracker
34
+ model = YOLO(model_choice)
35
+ tracker = Sort()
36
+ all_tracked_ids = set()
37
+ face_images = []
38
+
39
+ # Start timing and resource monitoring
40
+ start_time = time.time()
41
+ initial_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
42
+
43
+ # Video processing setup
44
+ cap = cv2.VideoCapture(video_path)
45
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
46
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
47
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
48
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
49
+
50
+ # Create video writer
51
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
52
+ out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
53
+
54
+ frame_count = 0
55
+ face_count = 0
56
+
57
+ start_time = time.time()
58
+ while cap.isOpened():
59
+ status, frame = cap.read()
60
+ if not status:
61
+ break
62
+
63
+ elapsed_time = time.time() - start_time
64
+ print(f'time elapsed: {elapsed_time}')
65
+ if elapsed_time >= 60: break
66
+
67
+ frame_count += 1
68
+
69
+ # Create dark overlay for text
70
+ overlay_height = 80
71
+ overlay = frame.copy()
72
+ overlay[:overlay_height] = (0, 0, 0)
73
+ cv2.addWeighted(overlay, 0.5, frame, 0.5, 0, frame)
74
+
75
+ results = model(frame, stream=True)
76
+
77
+ for res in results:
78
+ detections = res.boxes.cpu().numpy()
79
+ person_indices = np.where((detections.cls == 0) & (detections.conf > 0.3))[0]
80
+
81
+ if len(person_indices) > 0:
82
+ person_boxes = detections.xyxy[person_indices].astype(int)
83
+ tracks = tracker.update(person_boxes)
84
+ tracks = tracks.astype(int)
85
+
86
+ current_ids = set(tracks[:, 4])
87
+ all_tracked_ids.update(current_ids)
88
+
89
+ # Save face crops (simplified - using upper portion of bounding box)
90
+ for xmin, ymin, xmax, ymax, track_id in tracks:
91
+ face_height = int((ymax - ymin) * 0.3) # Take top 30% as face
92
+ face_crop = frame[ymin:ymin+face_height, xmin:xmax]
93
+ if face_crop.size > 0: # Check if crop is valid
94
+ face_path = os.path.join(faces_dir, f"face_{track_id}.jpg")
95
+ if not os.path.exists(face_path): # Save only first occurrence
96
+ cv2.imwrite(face_path, face_crop)
97
+ face_images.append(face_path)
98
+ face_count += 1
99
+
100
+ # Draw tracking info
101
+ cv2.putText(frame, f"Current People: {len(tracks)}", (20, 35),
102
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
103
+ cv2.putText(frame, f"Total People: {len(all_tracked_ids)}", (20, 70),
104
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
105
+
106
+ for xmin, ymin, xmax, ymax, track_id in tracks:
107
+ cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
108
+ cv2.putText(frame, f"Person #{track_id}", (xmin, ymin - 10),
109
+ cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
110
+
111
+ else:
112
+ cv2.putText(frame, "Current People: 0", (20, 35),
113
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
114
+ cv2.putText(frame, f"Total People: {len(all_tracked_ids)}", (20, 70),
115
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
116
+
117
+ out.write(frame)
118
+
119
+ # Cleanup
120
+ cap.release()
121
+ out.release()
122
+
123
+ # Calculate statistics
124
+ end_time = time.time()
125
+ process_time = end_time - start_time
126
+ final_memory = psutil.Process().memory_info().rss / 1024 / 1024
127
+ memory_used = final_memory - initial_memory
128
+ cpu_percent = psutil.Process().cpu_percent()
129
+
130
+ # Prepare statistics text
131
+ stats = f"""
132
+ Processing Statistics:
133
+ ---------------------
134
+ Total People Detected: {len(all_tracked_ids)}
135
+ Total Frames Processed: {frame_count}
136
+ Processing Time: {process_time:.2f} seconds
137
+ FPS: {frame_count/process_time:.2f}
138
+ CPU Usage: {cpu_percent:.1f}%
139
+ Memory Usage: {memory_used:.1f} MB
140
+ Faces Captured: {face_count}
141
+ """
142
+
143
+ return stats, output_video_path, face_images
144
+
145
+ # Create Gradio interface
146
+ with gr.Blocks(title="Person Tracking System") as demo:
147
+ gr.Markdown("# Person Tracking and Analysis System")
148
+
149
+ with gr.Row():
150
+ with gr.Column():
151
+ video_input = gr.Video(label="Upload Video (Max. 30 seconds)")
152
+ model_choice = gr.Dropdown(
153
+ choices=get_yolo_models(),
154
+ value=0,
155
+ label="Select YOLO Model"
156
+ )
157
+ submit_btn = gr.Button("Process Video")
158
+
159
+ with gr.Column():
160
+ stats_output = gr.Textbox(
161
+ label="Processing Statistics",
162
+ lines=10,
163
+ interactive=False
164
+ )
165
+
166
+ with gr.Row():
167
+ video_output = gr.Video(label="Processed Video")
168
+ gallery_output = gr.Gallery(
169
+ label="Detected Faces",
170
+ show_label=True,
171
+ elem_id="gallery",
172
+ columns=5,
173
+ rows=2
174
+ )
175
+
176
+ submit_btn.click(
177
+ fn=process_video,
178
+ inputs=[video_input, model_choice],
179
+ outputs=[stats_output, video_output, gallery_output]
180
+ )
181
+
182
+ # Launch the interface
183
+ if __name__ == "__main__":
184
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.6.2.post1
4
+ certifi==2024.8.30
5
+ charset-normalizer==3.4.0
6
+ click==8.1.7
7
+ contourpy==1.3.1
8
+ cycler==0.12.1
9
+ fastapi==0.115.5
10
+ ffmpy==0.4.0
11
+ filelock==3.16.1
12
+ filterpy==1.4.5
13
+ fonttools==4.55.0
14
+ fsspec==2024.10.0
15
+ gradio==5.6.0
16
+ gradio_client==1.4.3
17
+ h11==0.14.0
18
+ httpcore==1.0.7
19
+ httpx==0.27.2
20
+ huggingface-hub==0.26.2
21
+ idna==3.10
22
+ Jinja2==3.1.4
23
+ kiwisolver==1.4.7
24
+ markdown-it-py==3.0.0
25
+ MarkupSafe==2.1.5
26
+ matplotlib==3.9.2
27
+ mdurl==0.1.2
28
+ mpmath==1.3.0
29
+ networkx==3.4.2
30
+ numpy==1.26.4
31
+ opencv-python==4.10.0.84
32
+ orjson==3.10.11
33
+ packaging==24.2
34
+ pandas==2.2.3
35
+ pillow==11.0.0
36
+ psutil==6.1.0
37
+ py-cpuinfo==9.0.0
38
+ pydantic==2.9.2
39
+ pydantic_core==2.23.4
40
+ pydub==0.25.1
41
+ Pygments==2.18.0
42
+ pyparsing==3.2.0
43
+ python-dateutil==2.9.0.post0
44
+ python-multipart==0.0.12
45
+ pytz==2024.2
46
+ PyYAML==6.0.2
47
+ requests==2.32.3
48
+ rich==13.9.4
49
+ ruff==0.7.4
50
+ safehttpx==0.1.1
51
+ scipy==1.14.1
52
+ seaborn==0.13.2
53
+ semantic-version==2.10.0
54
+ shellingham==1.5.4
55
+ six==1.16.0
56
+ sniffio==1.3.1
57
+ starlette==0.41.3
58
+ sympy==1.13.3
59
+ tomlkit==0.12.0
60
+ torch==2.2.2
61
+ torchvision==0.17.2
62
+ tqdm==4.67.0
63
+ typer==0.13.1
64
+ typing_extensions==4.12.2
65
+ tzdata==2024.2
66
+ ultralytics==8.3.33
67
+ ultralytics-thop==2.0.11
68
+ urllib3==2.2.3
69
+ uvicorn==0.32.0
70
+ websockets==12.0
util/sort.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SORT: A Simple, Online and Realtime Tracker
3
+ Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ """
18
+ from __future__ import print_function
19
+
20
+ import os
21
+ import numpy as np
22
+ from filterpy.kalman import KalmanFilter
23
+
24
+ np.random.seed(0)
25
+
26
+
27
+ def linear_assignment(cost_matrix):
28
+ try:
29
+ import lap
30
+ _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
31
+ return np.array([[y[i], i] for i in x if i >= 0]) #
32
+ except ImportError:
33
+ from scipy.optimize import linear_sum_assignment
34
+ x, y = linear_sum_assignment(cost_matrix)
35
+ return np.array(list(zip(x, y)))
36
+
37
+
38
+ def iou_batch(bb_test, bb_gt):
39
+ """
40
+ From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
41
+ """
42
+ bb_gt = np.expand_dims(bb_gt, 0)
43
+ bb_test = np.expand_dims(bb_test, 1)
44
+
45
+ xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
46
+ yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
47
+ xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
48
+ yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
49
+ w = np.maximum(0., xx2 - xx1)
50
+ h = np.maximum(0., yy2 - yy1)
51
+ wh = w * h
52
+ o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
53
+ + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)
54
+ return (o)
55
+
56
+
57
+ def convert_bbox_to_z(bbox):
58
+ """
59
+ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
60
+ [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
61
+ the aspect ratio
62
+ """
63
+ w = bbox[2] - bbox[0]
64
+ h = bbox[3] - bbox[1]
65
+ x = bbox[0] + w / 2.
66
+ y = bbox[1] + h / 2.
67
+ s = w * h # scale is just area
68
+ r = w / float(h)
69
+ return np.array([x, y, s, r]).reshape((4, 1))
70
+
71
+
72
+ def convert_x_to_bbox(x, score=None):
73
+ """
74
+ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
75
+ [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
76
+ """
77
+ w = np.sqrt(x[2] * x[3])
78
+ h = x[2] / w
79
+ if (score == None):
80
+ return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
81
+ else:
82
+ return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))
83
+
84
+
85
+ class KalmanBoxTracker(object):
86
+ """
87
+ This class represents the internal state of individual tracked objects observed as bbox.
88
+ """
89
+ count = 0
90
+
91
+ def __init__(self, bbox):
92
+ """
93
+ Initialises a tracker using initial bounding box.
94
+ """
95
+ # define constant velocity model
96
+ self.kf = KalmanFilter(dim_x=7, dim_z=4)
97
+ self.kf.F = np.array(
98
+ [[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0],
99
+ [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]])
100
+ self.kf.H = np.array(
101
+ [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]])
102
+
103
+ self.kf.R[2:, 2:] *= 10.
104
+ self.kf.P[4:, 4:] *= 1000. # give high uncertainty to the unobservable initial velocities
105
+ self.kf.P *= 10.
106
+ self.kf.Q[-1, -1] *= 0.01
107
+ self.kf.Q[4:, 4:] *= 0.01
108
+
109
+ self.kf.x[:4] = convert_bbox_to_z(bbox)
110
+ self.time_since_update = 0
111
+ self.id = KalmanBoxTracker.count
112
+ KalmanBoxTracker.count += 1
113
+ self.history = []
114
+ self.hits = 0
115
+ self.hit_streak = 0
116
+ self.age = 0
117
+
118
+ def update(self, bbox):
119
+ """
120
+ Updates the state vector with observed bbox.
121
+ """
122
+ self.time_since_update = 0
123
+ self.history = []
124
+ self.hits += 1
125
+ self.hit_streak += 1
126
+ self.kf.update(convert_bbox_to_z(bbox))
127
+
128
+ def predict(self):
129
+ """
130
+ Advances the state vector and returns the predicted bounding box estimate.
131
+ """
132
+ if ((self.kf.x[6] + self.kf.x[2]) <= 0):
133
+ self.kf.x[6] *= 0.0
134
+ self.kf.predict()
135
+ self.age += 1
136
+ if (self.time_since_update > 0):
137
+ self.hit_streak = 0
138
+ self.time_since_update += 1
139
+ self.history.append(convert_x_to_bbox(self.kf.x))
140
+ return self.history[-1]
141
+
142
+ def get_state(self):
143
+ """
144
+ Returns the current bounding box estimate.
145
+ """
146
+ return convert_x_to_bbox(self.kf.x)
147
+
148
+
149
+ def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
150
+ """
151
+ Assigns detections to tracked object (both represented as bounding boxes)
152
+
153
+ Returns 3 lists of matches, unmatched_detections and unmatched_trackers
154
+ """
155
+ if (len(trackers) == 0):
156
+ return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
157
+
158
+ iou_matrix = iou_batch(detections, trackers)
159
+
160
+ if min(iou_matrix.shape) > 0:
161
+ a = (iou_matrix > iou_threshold).astype(np.int32)
162
+ if a.sum(1).max() == 1 and a.sum(0).max() == 1:
163
+ matched_indices = np.stack(np.where(a), axis=1)
164
+ else:
165
+ matched_indices = linear_assignment(-iou_matrix)
166
+ else:
167
+ matched_indices = np.empty(shape=(0, 2))
168
+
169
+ unmatched_detections = []
170
+ for d, det in enumerate(detections):
171
+ if (d not in matched_indices[:, 0]):
172
+ unmatched_detections.append(d)
173
+ unmatched_trackers = []
174
+ for t, trk in enumerate(trackers):
175
+ if (t not in matched_indices[:, 1]):
176
+ unmatched_trackers.append(t)
177
+
178
+ # filter out matched with low IOU
179
+ matches = []
180
+ for m in matched_indices:
181
+ if (iou_matrix[m[0], m[1]] < iou_threshold):
182
+ unmatched_detections.append(m[0])
183
+ unmatched_trackers.append(m[1])
184
+ else:
185
+ matches.append(m.reshape(1, 2))
186
+ if (len(matches) == 0):
187
+ matches = np.empty((0, 2), dtype=int)
188
+ else:
189
+ matches = np.concatenate(matches, axis=0)
190
+
191
+ return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
192
+
193
+
194
+ class Sort(object):
195
+ def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
196
+ """
197
+ Sets key parameters for SORT
198
+ """
199
+ self.max_age = max_age
200
+ self.min_hits = min_hits
201
+ self.iou_threshold = iou_threshold
202
+ self.trackers = []
203
+ self.frame_count = 0
204
+
205
+ def update(self, dets=np.empty((0, 5))):
206
+ """
207
+ Params:
208
+ dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
209
+ Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
210
+ Returns the a similar array, where the last column is the object ID.
211
+
212
+ NOTE: The number of objects returned may differ from the number of detections provided.
213
+ """
214
+ self.frame_count += 1
215
+ # get predicted locations from existing trackers.
216
+ trks = np.zeros((len(self.trackers), 5))
217
+ to_del = []
218
+ ret = []
219
+ for t, trk in enumerate(trks):
220
+ pos = self.trackers[t].predict()[0]
221
+ trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
222
+ if np.any(np.isnan(pos)):
223
+ to_del.append(t)
224
+ trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
225
+ for t in reversed(to_del):
226
+ self.trackers.pop(t)
227
+ matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks, self.iou_threshold)
228
+
229
+ # update matched trackers with assigned detections
230
+ for m in matched:
231
+ self.trackers[m[1]].update(dets[m[0], :])
232
+
233
+ # create and initialise new trackers for unmatched detections
234
+ for i in unmatched_dets:
235
+ trk = KalmanBoxTracker(dets[i, :])
236
+ self.trackers.append(trk)
237
+ i = len(self.trackers)
238
+ for trk in reversed(self.trackers):
239
+ d = trk.get_state()[0]
240
+ if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
241
+ ret.append(np.concatenate((d, [trk.id + 1])).reshape(1, -1)) # +1 as MOT benchmark requires positive
242
+ i -= 1
243
+ # remove dead tracklet
244
+ if (trk.time_since_update > self.max_age):
245
+ self.trackers.pop(i)
246
+ if (len(ret) > 0):
247
+ return np.concatenate(ret)
248
+ return np.empty((0, 5))