arnavam commited on
Commit
514a298
·
1 Parent(s): 6e7969f

made seld into fastapi

Browse files
.python-version DELETED
@@ -1 +0,0 @@
1
- 3.13
 
 
API_DOCS.md → documents/API_DOCS.md RENAMED
File without changes
REFACTORING_SUMMARY.md → documents/REFACTORING_SUMMARY.md RENAMED
File without changes
pyproject.toml DELETED
@@ -1,23 +0,0 @@
1
- [project]
2
- name = "backend"
3
- version = "0.1.0"
4
- description = "Add your description here"
5
- readme = "README.md"
6
- requires-python = ">=3.13"
7
- dependencies = [
8
- "deepface>=0.0.99",
9
- "fastapi>=0.135.3",
10
- "pymongo>=4.9.0",
11
- "numpy>=2.4.4",
12
- "opencv-python>=4.13.0.92",
13
- "passlib[bcrypt]>=1.7.4",
14
- "python-jose[cryptography]>=3.5.0",
15
- "tf-keras>=2.21.0",
16
- "ultralytics>=8.4.33",
17
- "uvicorn>=0.43.0",
18
- "websockets>=16.0",
19
- "lap>=0.5.13",
20
- "bcrypt>=5.0.0",
21
- "python-dotenv>=1.2.2",
22
- "python-multipart>=0.0.22",
23
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,16 +1,366 @@
1
- # Backend dependencies
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  fastapi==0.135.3
3
- uvicorn==0.43.0
4
- websockets==16.0
5
- opencv-python==4.13.0.92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  numpy==2.4.4
7
- scipy==1.17.1
8
- ultralytics==8.4.33
9
- deepface==0.0.99
10
- tf-keras
11
- python-multipart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  pymongo==4.16.0
13
- passlib[bcrypt]==1.7.4
14
- bcrypt==5.0.0
15
- python-jose[cryptography]==3.5.0
 
 
 
 
 
 
16
  python-dotenv==1.2.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o backend/requirements.txt
3
+ absl-py==2.4.0
4
+ # via
5
+ # keras
6
+ # tensorflow
7
+ annotated-doc==0.0.4
8
+ # via
9
+ # fastapi
10
+ # typer
11
+ annotated-types==0.7.0
12
+ # via pydantic
13
+ anyio==4.13.0
14
+ # via
15
+ # httpx
16
+ # starlette
17
+ astunparse==1.6.3
18
+ # via tensorflow
19
+ bcrypt==5.0.0
20
+ # via
21
+ # backend (pyproject.toml)
22
+ # passlib
23
+ beautifulsoup4==4.14.3
24
+ # via gdown
25
+ blinker==1.9.0
26
+ # via flask
27
+ certifi==2026.4.22
28
+ # via
29
+ # httpcore
30
+ # httpx
31
+ # requests
32
+ cffi==2.0.0
33
+ # via cryptography
34
+ charset-normalizer==3.4.7
35
+ # via requests
36
+ click==8.3.3
37
+ # via
38
+ # flask
39
+ # typer
40
+ # uvicorn
41
+ contourpy==1.3.3
42
+ # via matplotlib
43
+ cryptography==48.0.0
44
+ # via python-jose
45
+ cycler==0.12.1
46
+ # via matplotlib
47
+ deepface==0.0.99
48
+ # via backend (pyproject.toml)
49
+ dnspython==2.8.0
50
+ # via pymongo
51
+ ecdsa==0.19.2
52
+ # via python-jose
53
  fastapi==0.135.3
54
+ # via backend (pyproject.toml)
55
+ filelock==3.29.0
56
+ # via
57
+ # gdown
58
+ # huggingface-hub
59
+ # torch
60
+ fire==0.7.1
61
+ # via deepface
62
+ flask==3.1.3
63
+ # via
64
+ # deepface
65
+ # flask-cors
66
+ flask-cors==6.0.2
67
+ # via deepface
68
+ flatbuffers==25.12.19
69
+ # via tensorflow
70
+ fonttools==4.62.1
71
+ # via matplotlib
72
+ fsspec==2026.4.0
73
+ # via
74
+ # huggingface-hub
75
+ # torch
76
+ gast==0.7.0
77
+ # via tensorflow
78
+ gdown==6.0.0
79
+ # via
80
+ # deepface
81
+ # retina-face
82
+ google-pasta==0.2.0
83
+ # via tensorflow
84
+ grpcio==1.80.0
85
+ # via tensorflow
86
+ gunicorn==26.0.0
87
+ # via deepface
88
+ h11==0.16.0
89
+ # via
90
+ # httpcore
91
+ # uvicorn
92
+ h5py==3.14.0
93
+ # via
94
+ # keras
95
+ # tensorflow
96
+ hf-xet==1.4.3
97
+ # via huggingface-hub
98
+ httpcore==1.0.9
99
+ # via httpx
100
+ httpx==0.28.1
101
+ # via huggingface-hub
102
+ huggingface-hub==1.13.0
103
+ # via
104
+ # tokenizers
105
+ # transformers
106
+ idna==3.13
107
+ # via
108
+ # anyio
109
+ # httpx
110
+ # requests
111
+ itsdangerous==2.2.0
112
+ # via flask
113
+ jinja2==3.1.6
114
+ # via
115
+ # flask
116
+ # torch
117
+ joblib==1.5.3
118
+ # via mtcnn
119
+ keras==3.14.0
120
+ # via
121
+ # deepface
122
+ # tensorflow
123
+ kiwisolver==1.5.0
124
+ # via matplotlib
125
+ lap==0.5.13
126
+ # via backend (pyproject.toml)
127
+ libclang==18.1.1
128
+ # via tensorflow
129
+ lightdsa==0.0.3
130
+ # via deepface
131
+ lightecc==0.0.6
132
+ # via
133
+ # lightdsa
134
+ # lightphe
135
+ lightphe==0.0.24
136
+ # via deepface
137
+ lz4==4.4.5
138
+ # via mtcnn
139
+ markdown-it-py==4.0.0
140
+ # via rich
141
+ markupsafe==3.0.3
142
+ # via
143
+ # flask
144
+ # jinja2
145
+ # werkzeug
146
+ matplotlib==3.10.9
147
+ # via ultralytics
148
+ mdurl==0.1.2
149
+ # via markdown-it-py
150
+ ml-dtypes==0.5.4
151
+ # via
152
+ # keras
153
+ # tensorflow
154
+ mpmath==1.3.0
155
+ # via sympy
156
+ mtcnn==1.0.0
157
+ # via deepface
158
+ namex==0.1.0
159
+ # via keras
160
+ networkx==3.6.1
161
+ # via torch
162
  numpy==2.4.4
163
+ # via
164
+ # backend (pyproject.toml)
165
+ # contourpy
166
+ # deepface
167
+ # h5py
168
+ # keras
169
+ # lap
170
+ # matplotlib
171
+ # ml-dtypes
172
+ # opencv-python
173
+ # pandas
174
+ # retina-face
175
+ # scipy
176
+ # tensorflow
177
+ # torchvision
178
+ # transformers
179
+ # ultralytics
180
+ # ultralytics-thop
181
+ opencv-python==4.13.0.92
182
+ # via
183
+ # backend (pyproject.toml)
184
+ # deepface
185
+ # retina-face
186
+ # ultralytics
187
+ opt-einsum==3.4.0
188
+ # via tensorflow
189
+ optree==0.19.0
190
+ # via keras
191
+ packaging==26.2
192
+ # via
193
+ # gunicorn
194
+ # huggingface-hub
195
+ # keras
196
+ # matplotlib
197
+ # tensorflow
198
+ # transformers
199
+ # wheel
200
+ pandas==3.0.2
201
+ # via deepface
202
+ passlib==1.7.4
203
+ # via backend (pyproject.toml)
204
+ pillow==12.2.0
205
+ # via
206
+ # deepface
207
+ # matplotlib
208
+ # retina-face
209
+ # torchvision
210
+ # ultralytics
211
+ polars==1.40.1
212
+ # via ultralytics
213
+ polars-runtime-32==1.40.1
214
+ # via polars
215
+ protobuf==7.34.1
216
+ # via tensorflow
217
+ psutil==7.2.2
218
+ # via ultralytics
219
+ pyasn1==0.6.3
220
+ # via
221
+ # python-jose
222
+ # rsa
223
+ pycparser==3.0
224
+ # via cffi
225
+ pydantic==2.13.3
226
+ # via fastapi
227
+ pydantic-core==2.46.3
228
+ # via pydantic
229
+ pygments==2.20.0
230
+ # via rich
231
  pymongo==4.16.0
232
+ # via backend (pyproject.toml)
233
+ pyparsing==3.3.2
234
+ # via matplotlib
235
+ pysocks==1.7.1
236
+ # via requests
237
+ python-dateutil==2.9.0.post0
238
+ # via
239
+ # matplotlib
240
+ # pandas
241
  python-dotenv==1.2.2
242
+ # via
243
+ # backend (pyproject.toml)
244
+ # deepface
245
+ python-jose==3.5.0
246
+ # via backend (pyproject.toml)
247
+ python-multipart==0.0.27
248
+ # via backend (pyproject.toml)
249
+ pyyaml==6.0.3
250
+ # via
251
+ # huggingface-hub
252
+ # transformers
253
+ # ultralytics
254
+ regex==2026.4.4
255
+ # via transformers
256
+ requests==2.33.1
257
+ # via
258
+ # deepface
259
+ # gdown
260
+ # tensorflow
261
+ # ultralytics
262
+ retina-face==0.0.17
263
+ # via deepface
264
+ rich==15.0.0
265
+ # via
266
+ # keras
267
+ # typer
268
+ rsa==4.9.1
269
+ # via python-jose
270
+ safetensors==0.7.0
271
+ # via transformers
272
+ scipy==1.17.1
273
+ # via ultralytics
274
+ setuptools==81.0.0
275
+ # via
276
+ # tensorflow
277
+ # torch
278
+ shellingham==1.5.4
279
+ # via typer
280
+ six==1.17.0
281
+ # via
282
+ # astunparse
283
+ # ecdsa
284
+ # google-pasta
285
+ # python-dateutil
286
+ # tensorflow
287
+ soupsieve==2.8.3
288
+ # via beautifulsoup4
289
+ starlette==1.0.0
290
+ # via fastapi
291
+ sympy==1.14.0
292
+ # via
293
+ # lightdsa
294
+ # lightphe
295
+ # torch
296
+ tensorflow==2.21.0
297
+ # via
298
+ # deepface
299
+ # retina-face
300
+ # tf-keras
301
+ termcolor==3.3.0
302
+ # via
303
+ # fire
304
+ # tensorflow
305
+ tf-keras==2.21.0
306
+ # via backend (pyproject.toml)
307
+ tokenizers==0.22.2
308
+ # via transformers
309
+ torch==2.11.0
310
+ # via
311
+ # torchvision
312
+ # ultralytics
313
+ # ultralytics-thop
314
+ torchaudio==2.11.0
315
+ # via backend (pyproject.toml)
316
+ torchvision==0.26.0
317
+ # via ultralytics
318
+ tqdm==4.67.3
319
+ # via
320
+ # deepface
321
+ # gdown
322
+ # huggingface-hub
323
+ # lightdsa
324
+ # lightecc
325
+ # lightphe
326
+ # transformers
327
+ transformers==5.7.0
328
+ # via backend (pyproject.toml)
329
+ typer==0.25.1
330
+ # via
331
+ # huggingface-hub
332
+ # transformers
333
+ typing-extensions==4.15.0
334
+ # via
335
+ # beautifulsoup4
336
+ # fastapi
337
+ # grpcio
338
+ # huggingface-hub
339
+ # optree
340
+ # pydantic
341
+ # pydantic-core
342
+ # tensorflow
343
+ # torch
344
+ # typing-inspection
345
+ typing-inspection==0.4.2
346
+ # via
347
+ # fastapi
348
+ # pydantic
349
+ ultralytics==8.4.33
350
+ # via backend (pyproject.toml)
351
+ ultralytics-thop==2.0.19
352
+ # via ultralytics
353
+ urllib3==2.6.3
354
+ # via requests
355
+ uvicorn==0.43.0
356
+ # via backend (pyproject.toml)
357
+ websockets==16.0
358
+ # via backend (pyproject.toml)
359
+ werkzeug==3.1.8
360
+ # via
361
+ # flask
362
+ # flask-cors
363
+ wheel==0.47.0
364
+ # via astunparse
365
+ wrapt==2.1.2
366
+ # via tensorflow
server.py CHANGED
@@ -57,8 +57,9 @@ current_target_angle = None
57
  current_target_distance = None
58
 
59
  # Configurable parameters for smooth panning
60
- SMOOTHING_FACTOR = 0.1 # Lower is smoother but slower (similar to Dart's TweenAnimation)
61
- TARGET_ASPECT_RATIO = 16.0 / 9.0 # Assuming output is meant to be 16:9
 
62
 
63
  app = FastAPI(title="AFS Tracking Backend")
64
 
@@ -87,7 +88,8 @@ audio_angles_collection = None
87
  pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
88
 
89
  # JWT Configuration
90
- SECRET_KEY = os.getenv("JWT_SECRET_KEY", "your-secret-key-change-in-production")
 
91
  ALGORITHM = "HS256"
92
  ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
93
 
@@ -157,7 +159,7 @@ def create_access_token(data: dict, expires_delta: timedelta | None = None):
157
  async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
158
  collection = require_users_collection()
159
  token = credentials.credentials
160
-
161
  try:
162
  payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
163
  user_id: str = payload.get("sub")
@@ -171,7 +173,7 @@ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(s
171
  status_code=status.HTTP_401_UNAUTHORIZED,
172
  detail="Invalid or expired token",
173
  )
174
-
175
  from bson import ObjectId
176
  try:
177
  user_doc = await collection.find_one({"_id": ObjectId(user_id)})
@@ -180,19 +182,20 @@ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(s
180
  status_code=status.HTTP_401_UNAUTHORIZED,
181
  detail="User not found",
182
  )
183
-
184
  if user_doc is None:
185
  raise HTTPException(
186
  status_code=status.HTTP_401_UNAUTHORIZED,
187
  detail="User not found",
188
  )
189
-
190
  return UserPublic(
191
  id=str(user_doc["_id"]),
192
  full_name=user_doc["full_name"],
193
  email=user_doc["email"],
194
  )
195
 
 
196
  def decode_binary_image(img_data: bytes):
197
  """Decodes raw JPEG bytes into an OpenCV numpy array."""
198
  try:
@@ -203,6 +206,7 @@ def decode_binary_image(img_data: bytes):
203
  logger.error(f"Failed to decode image: {e}")
204
  return None
205
 
 
206
  def apply_center_stage_crop(frame, tracking_data):
207
  """
208
  Applies an exponential moving average (EMA) to smoothly pan and zoom
@@ -210,16 +214,16 @@ def apply_center_stage_crop(frame, tracking_data):
210
  Returns the cropped frame.
211
  """
212
  global current_cx, current_cy, current_scale, current_target_angle, current_target_distance
213
-
214
  h, w = frame.shape[:2]
215
-
216
  # Defaults
217
  target_cx = 0.5
218
  target_cy = 0.5
219
  target_scale = 1.0
220
-
221
  target_found = False
222
-
223
  # Calculate target state based on tracking data
224
  boxes = tracking_data.get("boxes", [])
225
  if tracking_data.get("mode") == "multi":
@@ -229,44 +233,45 @@ def apply_center_stage_crop(frame, tracking_data):
229
  box_cy = (ab["y1"] + ab["y2"]) / 2.0
230
  box_w = ab["x2"] - ab["x1"]
231
  box_h = ab["y2"] - ab["y1"]
232
-
233
  target_cx = box_cx / w
234
  target_cy = box_cy / h
235
  target_found = True
236
-
237
  # Target scale logic (from Dart): max dimension proportion * 1.5 margin
238
  max_dim = max(box_w / w, box_h / h)
239
  target_scale = 1.0 / (max_dim * 1.5)
240
  # Clamp scale
241
  target_scale = max(1.0, min(target_scale, 3.0))
242
- else: # single
243
  target_box = None
244
  for b in boxes:
245
  if b.get("is_target"):
246
  target_box = b
247
  break
248
-
249
  if target_box:
250
  box_cx = (target_box["x1"] + target_box["x2"]) / 2.0
251
  box_cy = (target_box["y1"] + target_box["y2"]) / 2.0
252
  box_w = target_box["x2"] - target_box["x1"]
253
  box_h = target_box["y2"] - target_box["y1"]
254
-
255
  target_cx = box_cx / w
256
  target_cy = box_cy / h
257
  target_found = True
258
-
259
  max_dim = max(box_w / w, box_h / h)
260
- target_scale = 1.0 / (max_dim * 2.0) # slightly tighter for single person
 
261
  target_scale = max(1.0, min(target_scale, 3.0))
262
 
263
  if target_found:
264
  # Calculate distance and angle from the frame center (w/2, h/2) to the target bounding box center (box_cx, box_cy)
265
  center_x, center_y = w / 2.0, h / 2.0
266
-
267
  dx = box_cx - center_x
268
  dy = box_cy - center_y
269
-
270
  current_target_distance = math.hypot(dx, dy)
271
  # Convert atan2 result to 0-360 degrees
272
  angle = math.degrees(math.atan2(dy, dx))
@@ -284,7 +289,7 @@ def apply_center_stage_crop(frame, tracking_data):
284
  # When scale is S, the crop width is w / S
285
  crop_w = int(w / current_scale)
286
  crop_h = int(h / current_scale)
287
-
288
  # Enforce aspect ratio
289
  # If crop_w / crop_h is not 16:9, adjust one to match
290
  current_ar = crop_w / max(1, crop_h)
@@ -298,10 +303,10 @@ def apply_center_stage_crop(frame, tracking_data):
298
  # Calculate top-left point of crop, clamping to frame boundaries
299
  center_px_x = int(current_cx * w)
300
  center_px_y = int(current_cy * h)
301
-
302
  start_x = max(0, center_px_x - crop_w // 2)
303
  start_y = max(0, center_px_y - crop_h // 2)
304
-
305
  # Adjust if crop box goes out of bounds
306
  if start_x + crop_w > w:
307
  start_x = w - crop_w
@@ -312,6 +317,7 @@ def apply_center_stage_crop(frame, tracking_data):
312
  cropped = frame[start_y:start_y+crop_h, start_x:start_x+crop_w]
313
  return cropped
314
 
 
315
  async def generate_obs_stream():
316
  """Generator for the MJPEG stream used by OBS."""
317
  global latest_obs_frame
@@ -325,13 +331,15 @@ async def generate_obs_stream():
325
  await asyncio.sleep(0.1)
326
  continue
327
  # Use asyncio sleep to prevent blocking the event loop
328
- await asyncio.sleep(0.033) # roughly 30 fps
 
329
 
330
  @app.get("/obs_feed")
331
  async def obs_feed():
332
  """Endpoint for OBS Media Source to connect to."""
333
  return StreamingResponse(generate_obs_stream(), media_type="multipart/x-mixed-replace; boundary=frame")
334
 
 
335
  async def vcam_generator_loop():
336
  """Background task to push frames to the virtual camera at 30fps."""
337
  global is_obs_active, vcam, latest_vcam_frame
@@ -343,6 +351,7 @@ async def vcam_generator_loop():
343
  logger.error(f"vcam loop error: {e}")
344
  await asyncio.sleep(1/30)
345
 
 
346
  @app.get("/")
347
  async def health_check():
348
  """Health check endpoint."""
@@ -353,6 +362,7 @@ async def health_check():
353
  "mongodb": status_db
354
  }
355
 
 
356
  async def mongodb_reconnect_loop():
357
  """Background task to attempt MongoDB reconnection if disconnected."""
358
  global mongo_client, users_collection, audio_recordings_collection, audio_settings_collection
@@ -362,10 +372,11 @@ async def mongodb_reconnect_loop():
362
  mongo_db_name = os.getenv("MONGODB_DB", "afs")
363
  try:
364
  logger.info("Attempting to reconnect to MongoDB...")
365
- client = AsyncMongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
 
366
  # Ping to force connection verification
367
  await client.admin.command('ping')
368
-
369
  # Re-initialize
370
  mongo_client = client
371
  db = mongo_client[mongo_db_name]
@@ -373,7 +384,7 @@ async def mongodb_reconnect_loop():
373
  audio_recordings_collection = db["audio_recordings"]
374
  audio_settings_collection = db["audio_settings"]
375
  audio_angles_collection = db["audio_angles"]
376
-
377
  await users_collection.create_index("email", unique=True)
378
  logger.info("Successfully reconnected to MongoDB.")
379
  except Exception as e:
@@ -383,10 +394,11 @@ async def mongodb_reconnect_loop():
383
  audio_recordings_collection = None
384
  audio_settings_collection = None
385
  audio_angles_collection = None
386
-
387
  # Wait before next check (e.g., 10 seconds)
388
  await asyncio.sleep(10)
389
 
 
390
  @app.on_event("startup")
391
  async def startup_event():
392
  global mongo_client, users_collection, audio_recordings_collection, audio_settings_collection, audio_angles_collection
@@ -394,20 +406,22 @@ async def startup_event():
394
  mongo_db_name = os.getenv("MONGODB_DB", "afs")
395
 
396
  try:
397
- mongo_client = AsyncMongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
 
398
  # Ping to force connection verification
399
  await mongo_client.admin.command('ping')
400
-
401
  db = mongo_client[mongo_db_name]
402
  users_collection = db["users"]
403
  audio_recordings_collection = db["audio_recordings"]
404
  audio_settings_collection = db["audio_settings"]
405
  audio_angles_collection = db["audio_angles"]
406
-
407
  await users_collection.create_index("email", unique=True)
408
  logger.info("Connected to MongoDB and initialized collections.")
409
  except Exception as e:
410
- logger.warning(f"MongoDB connection failed on startup: {e}. Starting reconnection loop.")
 
411
  mongo_client = None
412
  users_collection = None
413
  audio_recordings_collection = None
@@ -447,7 +461,7 @@ async def register(payload: RegisterRequest):
447
  "updated_at": now,
448
  }
449
  insert_result = await collection.insert_one(user_doc)
450
-
451
  user_id = str(insert_result.inserted_id)
452
  access_token = create_access_token(data={"sub": user_id})
453
 
@@ -474,7 +488,7 @@ async def login(payload: LoginRequest):
474
  status_code=status.HTTP_401_UNAUTHORIZED,
475
  detail="Invalid email or password.",
476
  )
477
-
478
  user_id = str(user_doc["_id"])
479
  access_token = create_access_token(data={"sub": user_id})
480
 
@@ -495,14 +509,15 @@ async def verify_token(current_user: UserPublic = Depends(get_current_user)):
495
  """Verify JWT token and return user info"""
496
  return current_user
497
 
 
498
  @app.websocket("/ws")
499
  async def websocket_endpoint(websocket: WebSocket):
500
  global is_recording, video_writer, recording_filename, latest_obs_frame, is_obs_active
501
-
502
  await websocket.accept()
503
  logger.info("New WebSocket connection established.")
504
-
505
- current_mode = "single" # Default mode
506
 
507
  try:
508
  while True:
@@ -513,7 +528,8 @@ async def websocket_endpoint(websocket: WebSocket):
513
  try:
514
  payload = json.loads(message["text"])
515
  if "mode" in payload and payload["mode"] != current_mode:
516
- logger.info(f"Switching mode from {current_mode} to {payload['mode']}")
 
517
  current_mode = payload["mode"]
518
  await websocket.send_json({"type": "mode_ack", "mode": current_mode})
519
  elif "command" in payload:
@@ -522,8 +538,10 @@ async def websocket_endpoint(websocket: WebSocket):
522
  if command == "start_recording":
523
  if not is_recording:
524
  is_recording = True
525
- recording_filename = f"capture_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
526
- logger.info(f"Started recording to {recording_filename}")
 
 
527
  await websocket.send_json({"type": "recording_ack", "status": "started"})
528
  elif command == "stop_recording":
529
  if is_recording:
@@ -531,7 +549,8 @@ async def websocket_endpoint(websocket: WebSocket):
531
  if video_writer is not None:
532
  video_writer.release()
533
  video_writer = None
534
- logger.info(f"Stopped recording. File saved as {recording_filename}")
 
535
  elif command == "start_obs":
536
  if not is_obs_active:
537
  is_obs_active = True
@@ -549,7 +568,7 @@ async def websocket_endpoint(websocket: WebSocket):
549
  elif "bytes" in message:
550
  frame_data = message["bytes"]
551
  frame = decode_binary_image(frame_data)
552
-
553
  if frame is None:
554
  await websocket.send_json({"error": "Failed to decode binary frame"})
555
  continue
@@ -570,38 +589,44 @@ async def websocket_endpoint(websocket: WebSocket):
570
  executor, run_inference, frame, current_mode
571
  )
572
  except Exception as e:
573
- logger.error(f"Error processing frame in {current_mode} mode: {e}")
 
574
  response_data = {"error": str(e)}
575
 
576
  # Send results back to client
577
  response_data["mode"] = current_mode
578
  await websocket.send_json(response_data)
579
-
580
  # Apply Crop and Handle OBS / Recording
581
  try:
582
- cropped_frame = apply_center_stage_crop(frame, response_data)
583
-
 
584
  # 1. Update OBS Feed
585
  if is_obs_active:
586
  ret, buffer = cv2.imencode('.jpg', cropped_frame)
587
  if ret:
588
  with obs_frame_lock:
589
  latest_obs_frame = buffer.tobytes()
590
-
591
  # 2. Update Recording Output
592
  if is_recording:
593
  h, w = cropped_frame.shape[:2]
594
  if video_writer is None:
595
  # Initialize writer with the exact dimensions of the FIRST cropped frame
596
  fourcc = cv2.VideoWriter_fourcc(*'avc1')
597
- video_writer = cv2.VideoWriter(recording_filename, fourcc, 5.0, (w, h))
598
-
 
599
  # Ensure we try to resize cleanly if aspect ratio forces slight off-by-one errors over time
600
  if video_writer is not None:
601
- target_w = int(video_writer.get(cv2.CAP_PROP_FRAME_WIDTH))
602
- target_h = int(video_writer.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
 
603
  if (w, h) != (target_w, target_h):
604
- cropped_frame = cv2.resize(cropped_frame, (target_w, target_h))
 
605
  video_writer.write(cropped_frame)
606
  except Exception as e:
607
  logger.error(f"Error handling post-process crops: {e}")
@@ -621,6 +646,7 @@ async def websocket_endpoint(websocket: WebSocket):
621
 
622
  # === FACE RECOGNITION ENDPOINTS ===
623
 
 
624
  @app.post("/api/face/upload-video")
625
  async def upload_reference_video(
626
  file: UploadFile = File(...),
@@ -628,20 +654,23 @@ async def upload_reference_video(
628
  ):
629
  """Upload a 360-degree reference video for face recognition training."""
630
  if not file.filename.endswith(('.mp4', '.avi', '.mov', '.mkv')):
631
- raise HTTPException(status_code=400, detail="Invalid video format. Use mp4, avi, mov, or mkv")
632
-
 
633
  video_path = MODEL_DIR / "my_scan.mp4"
634
-
635
  try:
636
  with open(video_path, 'wb') as f:
637
  shutil.copyfileobj(file.file, f)
638
-
639
  embeddings, num_frames = await asyncio.get_event_loop().run_in_executor(
640
- executor, face_service.extract_embeddings_from_video, str(video_path)
 
641
  )
642
-
643
- face_service.save_embeddings_cache(embeddings, str(video_path), num_frames)
644
-
 
645
  return {
646
  "ok": True,
647
  "message": "Video processed successfully",
@@ -652,6 +681,7 @@ async def upload_reference_video(
652
  logger.error(f"Error processing video: {e}")
653
  raise HTTPException(status_code=500, detail=str(e))
654
 
 
655
  @app.post("/api/face/upload-image")
656
  async def upload_reference_image(
657
  file: UploadFile = File(...),
@@ -659,18 +689,20 @@ async def upload_reference_image(
659
  ):
660
  """Upload a reference image for face recognition."""
661
  if not file.filename.endswith(('.jpg', '.jpeg', '.png')):
662
- raise HTTPException(status_code=400, detail="Invalid image format. Use jpg, jpeg, or png")
663
-
 
664
  image_path = MODEL_DIR / f"ref_{file.filename}"
665
-
666
  try:
667
  with open(image_path, 'wb') as f:
668
  shutil.copyfileobj(file.file, f)
669
-
670
  embeddings = await asyncio.get_event_loop().run_in_executor(
671
- executor, face_service.extract_embeddings_from_image, str(image_path)
 
672
  )
673
-
674
  return {
675
  "ok": True,
676
  "message": "Image processed successfully",
@@ -681,11 +713,12 @@ async def upload_reference_image(
681
  logger.error(f"Error processing image: {e}")
682
  raise HTTPException(status_code=500, detail=str(e))
683
 
 
684
  @app.get("/api/face/cache-status")
685
  async def get_cache_status(current_user: UserPublic = Depends(get_current_user)):
686
  """Get the current face recognition cache status."""
687
  cache_data = face_service.load_embeddings_cache()
688
-
689
  if cache_data:
690
  return {
691
  "ok": True,
@@ -704,6 +737,7 @@ async def get_cache_status(current_user: UserPublic = Depends(get_current_user))
704
 
705
  # === AUDIO STREAMING ENDPOINTS ===
706
 
 
707
  @app.post("/api/audio/start-stream")
708
  async def start_audio_stream(
709
  sample_rate: int = Form(16000),
@@ -712,9 +746,10 @@ async def start_audio_stream(
712
  ):
713
  """Start a new audio recording stream."""
714
  session_id = str(uuid.uuid4())
715
-
716
  try:
717
- filename = audio_processor.create_audio_stream(session_id, sample_rate, channels)
 
718
  return {
719
  "ok": True,
720
  "session_id": session_id,
@@ -726,46 +761,50 @@ async def start_audio_stream(
726
  logger.error(f"Error starting audio stream: {e}")
727
  raise HTTPException(status_code=500, detail=str(e))
728
 
 
729
  @app.websocket("/ws/audio/{session_id}")
730
  async def websocket_audio_stream(websocket: WebSocket, session_id: str):
731
  """WebSocket endpoint for streaming audio with angle data."""
732
  await websocket.accept()
733
- logger.info(f"Audio WebSocket connection established for session {session_id}")
734
-
 
735
  # Auto-create stream if not exists
736
  if session_id not in audio_processor.active_streams:
737
  audio_processor.create_audio_stream(session_id)
738
  logger.info(f"Auto-created audio stream for session {session_id}")
739
-
740
  try:
741
  while True:
742
  message = await websocket.receive()
743
-
744
  if "bytes" in message:
745
  audio_data = message["bytes"]
746
  audio_processor.write_audio_chunk(session_id, audio_data)
747
  await websocket.send_json({"status": "received", "bytes": len(audio_data)})
748
-
749
  elif "text" in message:
750
  try:
751
  payload = json.loads(message["text"])
752
-
753
  if "audio_data" in payload and "angle" in payload:
754
  audio_bytes = base64.b64decode(payload["audio_data"])
755
  angle = float(payload["angle"])
756
- audio_processor.write_audio_chunk(session_id, audio_bytes, angle)
 
757
  await websocket.send_json({"status": "received", "angle": angle})
758
-
759
  elif payload.get("command") == "stop":
760
  audio_processor.close_audio_stream(session_id)
761
  await websocket.send_json({"status": "stopped", "message": "Stream closed"})
762
  break
763
-
764
  except json.JSONDecodeError:
765
  logger.error("Invalid JSON in audio stream")
766
-
767
  except WebSocketDisconnect:
768
- logger.info(f"Audio WebSocket client disconnected for session {session_id}")
 
769
  if session_id in audio_processor.active_streams:
770
  audio_processor.close_audio_stream(session_id)
771
  except Exception as e:
@@ -773,6 +812,7 @@ async def websocket_audio_stream(websocket: WebSocket, session_id: str):
773
  if session_id in audio_processor.active_streams:
774
  audio_processor.close_audio_stream(session_id)
775
 
 
776
  @app.post("/api/audio/stop-stream/{session_id}")
777
  async def stop_audio_stream(
778
  session_id: str,
@@ -789,6 +829,7 @@ async def stop_audio_stream(
789
  logger.error(f"Error stopping audio stream: {e}")
790
  raise HTTPException(status_code=500, detail=str(e))
791
 
 
792
  @app.get("/api/audio/recordings")
793
  async def list_audio_recordings(current_user: UserPublic = Depends(get_current_user)):
794
  """List all audio recordings."""
@@ -803,6 +844,7 @@ async def list_audio_recordings(current_user: UserPublic = Depends(get_current_u
803
  logger.error(f"Error listing recordings: {e}")
804
  raise HTTPException(status_code=500, detail=str(e))
805
 
 
806
  @app.get("/api/audio/active-sessions")
807
  async def get_active_sessions():
808
  """Get currently active audio recording sessions."""
@@ -817,23 +859,24 @@ async def get_active_sessions():
817
  logger.error(f"Error getting active sessions: {e}")
818
  raise HTTPException(status_code=500, detail=str(e))
819
 
 
820
  @app.get("/api/audio/angles")
821
  async def get_audio_angles():
822
  """Get angle metadata for the latest audio session."""
823
  try:
824
  audio_dir = MODEL_DIR / "audio_recordings"
825
  metadata_files = list(audio_dir.glob("*_metadata.txt"))
826
-
827
  if not metadata_files:
828
  raise HTTPException(
829
  status_code=404,
830
  detail="No metadata found"
831
  )
832
-
833
  # Get the most recently modified metadata file
834
  import os
835
  metadata_file = max(metadata_files, key=os.path.getmtime)
836
-
837
  angles_data = []
838
  with open(metadata_file, 'r') as f:
839
  lines = f.readlines()
@@ -846,10 +889,11 @@ async def get_audio_angles():
846
  try:
847
  timestamp = float(parts[0])
848
  angle = float(parts[1])
849
- angles_data.append({"timestamp": timestamp, "angle": angle})
 
850
  except ValueError:
851
  continue
852
-
853
  return {
854
  "ok": True,
855
  "file": metadata_file.name,
@@ -862,6 +906,7 @@ async def get_audio_angles():
862
  logger.error(f"Error retrieving angles: {e}")
863
  raise HTTPException(status_code=500, detail=str(e))
864
 
 
865
  @app.post("/api/audio/upload")
866
  async def upload_audio_file(
867
  file: UploadFile = File(...)
@@ -870,15 +915,15 @@ async def upload_audio_file(
870
  try:
871
  # Read file content for DB persistence
872
  file_content = await file.read()
873
-
874
  if audio_recordings_collection is not None:
875
  await audio_recordings_collection.insert_one({
876
  "filename": file.filename,
877
- "content": file_content, # Saved as binary in MongoDB
878
  "content_type": file.content_type,
879
  "timestamp": datetime.utcnow()
880
  })
881
-
882
  return {
883
  "ok": True,
884
  "message": "Audio file saved to database successfully",
@@ -889,6 +934,7 @@ async def upload_audio_file(
889
  logger.error(f"Error saving audio to DB: {e}")
890
  raise HTTPException(status_code=500, detail=str(e))
891
 
 
892
  @app.post("/api/audio/set-angle")
893
  async def set_desired_angle(
894
  angle: float = Form(...)
@@ -900,16 +946,16 @@ async def set_desired_angle(
900
  status_code=400,
901
  detail="Angle must be between 0 and 360 degrees"
902
  )
903
-
904
  if audio_angles_collection is not None:
905
  await audio_angles_collection.update_one(
906
  {"key": "latest_angle"},
907
  {"$set": {"value": angle, "updated_at": datetime.utcnow()}},
908
  upsert=True
909
  )
910
-
911
  logger.info(f"Set and persisted desired angle {angle}° to DB")
912
-
913
  return {
914
  "ok": True,
915
  "message": f"Desired angle set to {angle}° and saved to DB",
@@ -921,6 +967,7 @@ async def set_desired_angle(
921
  logger.error(f"Error setting angle in DB: {e}")
922
  raise HTTPException(status_code=500, detail=str(e))
923
 
 
924
  @app.get("/api/audio/get-angle")
925
  async def get_current_angle():
926
  """
@@ -929,7 +976,8 @@ async def get_current_angle():
929
  """
930
  try:
931
  global current_target_angle, current_target_distance
932
-
 
933
  # If a person is actively being tracked, return their real-time angle
934
  if current_target_angle is not None:
935
  return {
@@ -938,7 +986,7 @@ async def get_current_angle():
938
  "angle": round(current_target_angle, 2),
939
  "distance": round(current_target_distance, 2)
940
  }
941
-
942
  # Fallback to the saved angle if no target is actively tracked
943
  if audio_angles_collection is not None:
944
  saved_angle_doc = await audio_angles_collection.find_one({"key": "latest_angle"})
@@ -949,7 +997,7 @@ async def get_current_angle():
949
  "angle": float(saved_angle_doc["value"]),
950
  "distance": None
951
  }
952
-
953
  return {
954
  "ok": False,
955
  "message": "No active tracking and no saved angle found",
@@ -960,19 +1008,21 @@ async def get_current_angle():
960
  logger.error(f"Error retrieving angle: {e}")
961
  raise HTTPException(status_code=500, detail=str(e))
962
 
 
963
  @app.get("/api/audio/settings")
964
  async def get_audio_settings():
965
  """Retrieve all audio settings from MongoDB."""
966
  try:
967
  if audio_settings_collection is None:
968
  return {"ok": False, "message": "Database not connected"}
969
-
970
  cursor = audio_settings_collection.find({}, {"_id": 0})
971
  settings_list = await cursor.to_list(length=100)
972
-
973
  # Convert list to dictionary
974
- settings_dict = {s["key"]: s["value"] for s in settings_list if "key" in s}
975
-
 
976
  return {
977
  "ok": True,
978
  "settings": settings_dict
@@ -981,6 +1031,7 @@ async def get_audio_settings():
981
  logger.error(f"Error retrieving audio settings: {e}")
982
  raise HTTPException(status_code=500, detail=str(e))
983
 
 
984
  @app.post("/api/audio/settings")
985
  async def update_audio_settings(
986
  settings: dict = Body(...)
@@ -988,15 +1039,16 @@ async def update_audio_settings(
988
  """Update general audio settings in MongoDB."""
989
  try:
990
  if audio_settings_collection is None:
991
- raise HTTPException(status_code=503, detail="Database not connected")
992
-
 
993
  for key, value in settings.items():
994
  await audio_settings_collection.update_one(
995
  {"key": key},
996
  {"$set": {"value": value, "updated_at": datetime.utcnow()}},
997
  upsert=True
998
  )
999
-
1000
  return {
1001
  "ok": True,
1002
  "message": "Audio settings updated successfully",
 
57
  current_target_distance = None
58
 
59
  # Configurable parameters for smooth panning
60
+ # Lower is smoother but slower (similar to Dart's TweenAnimation)
61
+ SMOOTHING_FACTOR = 0.1
62
+ TARGET_ASPECT_RATIO = 16.0 / 9.0 # Assuming output is meant to be 16:9
63
 
64
  app = FastAPI(title="AFS Tracking Backend")
65
 
 
88
  pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
89
 
90
  # JWT Configuration
91
+ SECRET_KEY = os.getenv(
92
+ "JWT_SECRET_KEY", "your-secret-key-change-in-production")
93
  ALGORITHM = "HS256"
94
  ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 * 7 # 7 days
95
 
 
159
  async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
160
  collection = require_users_collection()
161
  token = credentials.credentials
162
+
163
  try:
164
  payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
165
  user_id: str = payload.get("sub")
 
173
  status_code=status.HTTP_401_UNAUTHORIZED,
174
  detail="Invalid or expired token",
175
  )
176
+
177
  from bson import ObjectId
178
  try:
179
  user_doc = await collection.find_one({"_id": ObjectId(user_id)})
 
182
  status_code=status.HTTP_401_UNAUTHORIZED,
183
  detail="User not found",
184
  )
185
+
186
  if user_doc is None:
187
  raise HTTPException(
188
  status_code=status.HTTP_401_UNAUTHORIZED,
189
  detail="User not found",
190
  )
191
+
192
  return UserPublic(
193
  id=str(user_doc["_id"]),
194
  full_name=user_doc["full_name"],
195
  email=user_doc["email"],
196
  )
197
 
198
+
199
  def decode_binary_image(img_data: bytes):
200
  """Decodes raw JPEG bytes into an OpenCV numpy array."""
201
  try:
 
206
  logger.error(f"Failed to decode image: {e}")
207
  return None
208
 
209
+
210
  def apply_center_stage_crop(frame, tracking_data):
211
  """
212
  Applies an exponential moving average (EMA) to smoothly pan and zoom
 
214
  Returns the cropped frame.
215
  """
216
  global current_cx, current_cy, current_scale, current_target_angle, current_target_distance
217
+
218
  h, w = frame.shape[:2]
219
+
220
  # Defaults
221
  target_cx = 0.5
222
  target_cy = 0.5
223
  target_scale = 1.0
224
+
225
  target_found = False
226
+
227
  # Calculate target state based on tracking data
228
  boxes = tracking_data.get("boxes", [])
229
  if tracking_data.get("mode") == "multi":
 
233
  box_cy = (ab["y1"] + ab["y2"]) / 2.0
234
  box_w = ab["x2"] - ab["x1"]
235
  box_h = ab["y2"] - ab["y1"]
236
+
237
  target_cx = box_cx / w
238
  target_cy = box_cy / h
239
  target_found = True
240
+
241
  # Target scale logic (from Dart): max dimension proportion * 1.5 margin
242
  max_dim = max(box_w / w, box_h / h)
243
  target_scale = 1.0 / (max_dim * 1.5)
244
  # Clamp scale
245
  target_scale = max(1.0, min(target_scale, 3.0))
246
+ else: # single
247
  target_box = None
248
  for b in boxes:
249
  if b.get("is_target"):
250
  target_box = b
251
  break
252
+
253
  if target_box:
254
  box_cx = (target_box["x1"] + target_box["x2"]) / 2.0
255
  box_cy = (target_box["y1"] + target_box["y2"]) / 2.0
256
  box_w = target_box["x2"] - target_box["x1"]
257
  box_h = target_box["y2"] - target_box["y1"]
258
+
259
  target_cx = box_cx / w
260
  target_cy = box_cy / h
261
  target_found = True
262
+
263
  max_dim = max(box_w / w, box_h / h)
264
+ # slightly tighter for single person
265
+ target_scale = 1.0 / (max_dim * 2.0)
266
  target_scale = max(1.0, min(target_scale, 3.0))
267
 
268
  if target_found:
269
  # Calculate distance and angle from the frame center (w/2, h/2) to the target bounding box center (box_cx, box_cy)
270
  center_x, center_y = w / 2.0, h / 2.0
271
+
272
  dx = box_cx - center_x
273
  dy = box_cy - center_y
274
+
275
  current_target_distance = math.hypot(dx, dy)
276
  # Convert atan2 result to 0-360 degrees
277
  angle = math.degrees(math.atan2(dy, dx))
 
289
  # When scale is S, the crop width is w / S
290
  crop_w = int(w / current_scale)
291
  crop_h = int(h / current_scale)
292
+
293
  # Enforce aspect ratio
294
  # If crop_w / crop_h is not 16:9, adjust one to match
295
  current_ar = crop_w / max(1, crop_h)
 
303
  # Calculate top-left point of crop, clamping to frame boundaries
304
  center_px_x = int(current_cx * w)
305
  center_px_y = int(current_cy * h)
306
+
307
  start_x = max(0, center_px_x - crop_w // 2)
308
  start_y = max(0, center_px_y - crop_h // 2)
309
+
310
  # Adjust if crop box goes out of bounds
311
  if start_x + crop_w > w:
312
  start_x = w - crop_w
 
317
  cropped = frame[start_y:start_y+crop_h, start_x:start_x+crop_w]
318
  return cropped
319
 
320
+
321
  async def generate_obs_stream():
322
  """Generator for the MJPEG stream used by OBS."""
323
  global latest_obs_frame
 
331
  await asyncio.sleep(0.1)
332
  continue
333
  # Use asyncio sleep to prevent blocking the event loop
334
+ await asyncio.sleep(0.033) # roughly 30 fps
335
+
336
 
337
  @app.get("/obs_feed")
338
  async def obs_feed():
339
  """Endpoint for OBS Media Source to connect to."""
340
  return StreamingResponse(generate_obs_stream(), media_type="multipart/x-mixed-replace; boundary=frame")
341
 
342
+
343
  async def vcam_generator_loop():
344
  """Background task to push frames to the virtual camera at 30fps."""
345
  global is_obs_active, vcam, latest_vcam_frame
 
351
  logger.error(f"vcam loop error: {e}")
352
  await asyncio.sleep(1/30)
353
 
354
+
355
  @app.get("/")
356
  async def health_check():
357
  """Health check endpoint."""
 
362
  "mongodb": status_db
363
  }
364
 
365
+
366
  async def mongodb_reconnect_loop():
367
  """Background task to attempt MongoDB reconnection if disconnected."""
368
  global mongo_client, users_collection, audio_recordings_collection, audio_settings_collection
 
372
  mongo_db_name = os.getenv("MONGODB_DB", "afs")
373
  try:
374
  logger.info("Attempting to reconnect to MongoDB...")
375
+ client = AsyncMongoClient(
376
+ mongo_uri, serverSelectionTimeoutMS=5000)
377
  # Ping to force connection verification
378
  await client.admin.command('ping')
379
+
380
  # Re-initialize
381
  mongo_client = client
382
  db = mongo_client[mongo_db_name]
 
384
  audio_recordings_collection = db["audio_recordings"]
385
  audio_settings_collection = db["audio_settings"]
386
  audio_angles_collection = db["audio_angles"]
387
+
388
  await users_collection.create_index("email", unique=True)
389
  logger.info("Successfully reconnected to MongoDB.")
390
  except Exception as e:
 
394
  audio_recordings_collection = None
395
  audio_settings_collection = None
396
  audio_angles_collection = None
397
+
398
  # Wait before next check (e.g., 10 seconds)
399
  await asyncio.sleep(10)
400
 
401
+
402
  @app.on_event("startup")
403
  async def startup_event():
404
  global mongo_client, users_collection, audio_recordings_collection, audio_settings_collection, audio_angles_collection
 
406
  mongo_db_name = os.getenv("MONGODB_DB", "afs")
407
 
408
  try:
409
+ mongo_client = AsyncMongoClient(
410
+ mongo_uri, serverSelectionTimeoutMS=5000)
411
  # Ping to force connection verification
412
  await mongo_client.admin.command('ping')
413
+
414
  db = mongo_client[mongo_db_name]
415
  users_collection = db["users"]
416
  audio_recordings_collection = db["audio_recordings"]
417
  audio_settings_collection = db["audio_settings"]
418
  audio_angles_collection = db["audio_angles"]
419
+
420
  await users_collection.create_index("email", unique=True)
421
  logger.info("Connected to MongoDB and initialized collections.")
422
  except Exception as e:
423
+ logger.warning(f"MongoDB connection failed on startup: {
424
+ e}. Starting reconnection loop.")
425
  mongo_client = None
426
  users_collection = None
427
  audio_recordings_collection = None
 
461
  "updated_at": now,
462
  }
463
  insert_result = await collection.insert_one(user_doc)
464
+
465
  user_id = str(insert_result.inserted_id)
466
  access_token = create_access_token(data={"sub": user_id})
467
 
 
488
  status_code=status.HTTP_401_UNAUTHORIZED,
489
  detail="Invalid email or password.",
490
  )
491
+
492
  user_id = str(user_doc["_id"])
493
  access_token = create_access_token(data={"sub": user_id})
494
 
 
509
  """Verify JWT token and return user info"""
510
  return current_user
511
 
512
+
513
  @app.websocket("/ws")
514
  async def websocket_endpoint(websocket: WebSocket):
515
  global is_recording, video_writer, recording_filename, latest_obs_frame, is_obs_active
516
+
517
  await websocket.accept()
518
  logger.info("New WebSocket connection established.")
519
+
520
+ current_mode = "single" # Default mode
521
 
522
  try:
523
  while True:
 
528
  try:
529
  payload = json.loads(message["text"])
530
  if "mode" in payload and payload["mode"] != current_mode:
531
+ logger.info(f"Switching mode from {
532
+ current_mode} to {payload['mode']}")
533
  current_mode = payload["mode"]
534
  await websocket.send_json({"type": "mode_ack", "mode": current_mode})
535
  elif "command" in payload:
 
538
  if command == "start_recording":
539
  if not is_recording:
540
  is_recording = True
541
+ recording_filename = f"capture_{
542
+ datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
543
+ logger.info(f"Started recording to {
544
+ recording_filename}")
545
  await websocket.send_json({"type": "recording_ack", "status": "started"})
546
  elif command == "stop_recording":
547
  if is_recording:
 
549
  if video_writer is not None:
550
  video_writer.release()
551
  video_writer = None
552
+ logger.info(f'''Stopped recording. File saved as {
553
+ recording_filename}''')
554
  elif command == "start_obs":
555
  if not is_obs_active:
556
  is_obs_active = True
 
568
  elif "bytes" in message:
569
  frame_data = message["bytes"]
570
  frame = decode_binary_image(frame_data)
571
+
572
  if frame is None:
573
  await websocket.send_json({"error": "Failed to decode binary frame"})
574
  continue
 
589
  executor, run_inference, frame, current_mode
590
  )
591
  except Exception as e:
592
+ logger.error(f"Error processing frame in {
593
+ current_mode} mode: {e}")
594
  response_data = {"error": str(e)}
595
 
596
  # Send results back to client
597
  response_data["mode"] = current_mode
598
  await websocket.send_json(response_data)
599
+
600
  # Apply Crop and Handle OBS / Recording
601
  try:
602
+ cropped_frame = apply_center_stage_crop(
603
+ frame, response_data)
604
+
605
  # 1. Update OBS Feed
606
  if is_obs_active:
607
  ret, buffer = cv2.imencode('.jpg', cropped_frame)
608
  if ret:
609
  with obs_frame_lock:
610
  latest_obs_frame = buffer.tobytes()
611
+
612
  # 2. Update Recording Output
613
  if is_recording:
614
  h, w = cropped_frame.shape[:2]
615
  if video_writer is None:
616
  # Initialize writer with the exact dimensions of the FIRST cropped frame
617
  fourcc = cv2.VideoWriter_fourcc(*'avc1')
618
+ video_writer = cv2.VideoWriter(
619
+ recording_filename, fourcc, 5.0, (w, h))
620
+
621
  # Ensure we try to resize cleanly if aspect ratio forces slight off-by-one errors over time
622
  if video_writer is not None:
623
+ target_w = int(video_writer.get(
624
+ cv2.CAP_PROP_FRAME_WIDTH))
625
+ target_h = int(video_writer.get(
626
+ cv2.CAP_PROP_FRAME_HEIGHT))
627
  if (w, h) != (target_w, target_h):
628
+ cropped_frame = cv2.resize(
629
+ cropped_frame, (target_w, target_h))
630
  video_writer.write(cropped_frame)
631
  except Exception as e:
632
  logger.error(f"Error handling post-process crops: {e}")
 
646
 
647
  # === FACE RECOGNITION ENDPOINTS ===
648
 
649
+
650
  @app.post("/api/face/upload-video")
651
  async def upload_reference_video(
652
  file: UploadFile = File(...),
 
654
  ):
655
  """Upload a 360-degree reference video for face recognition training."""
656
  if not file.filename.endswith(('.mp4', '.avi', '.mov', '.mkv')):
657
+ raise HTTPException(
658
+ status_code=400, detail="Invalid video format. Use mp4, avi, mov, or mkv")
659
+
660
  video_path = MODEL_DIR / "my_scan.mp4"
661
+
662
  try:
663
  with open(video_path, 'wb') as f:
664
  shutil.copyfileobj(file.file, f)
665
+
666
  embeddings, num_frames = await asyncio.get_event_loop().run_in_executor(
667
+ executor, face_service.extract_embeddings_from_video, str(
668
+ video_path)
669
  )
670
+
671
+ face_service.save_embeddings_cache(
672
+ embeddings, str(video_path), num_frames)
673
+
674
  return {
675
  "ok": True,
676
  "message": "Video processed successfully",
 
681
  logger.error(f"Error processing video: {e}")
682
  raise HTTPException(status_code=500, detail=str(e))
683
 
684
+
685
  @app.post("/api/face/upload-image")
686
  async def upload_reference_image(
687
  file: UploadFile = File(...),
 
689
  ):
690
  """Upload a reference image for face recognition."""
691
  if not file.filename.endswith(('.jpg', '.jpeg', '.png')):
692
+ raise HTTPException(
693
+ status_code=400, detail="Invalid image format. Use jpg, jpeg, or png")
694
+
695
  image_path = MODEL_DIR / f"ref_{file.filename}"
696
+
697
  try:
698
  with open(image_path, 'wb') as f:
699
  shutil.copyfileobj(file.file, f)
700
+
701
  embeddings = await asyncio.get_event_loop().run_in_executor(
702
+ executor, face_service.extract_embeddings_from_image, str(
703
+ image_path)
704
  )
705
+
706
  return {
707
  "ok": True,
708
  "message": "Image processed successfully",
 
713
  logger.error(f"Error processing image: {e}")
714
  raise HTTPException(status_code=500, detail=str(e))
715
 
716
+
717
  @app.get("/api/face/cache-status")
718
  async def get_cache_status(current_user: UserPublic = Depends(get_current_user)):
719
  """Get the current face recognition cache status."""
720
  cache_data = face_service.load_embeddings_cache()
721
+
722
  if cache_data:
723
  return {
724
  "ok": True,
 
737
 
738
  # === AUDIO STREAMING ENDPOINTS ===
739
 
740
+
741
  @app.post("/api/audio/start-stream")
742
  async def start_audio_stream(
743
  sample_rate: int = Form(16000),
 
746
  ):
747
  """Start a new audio recording stream."""
748
  session_id = str(uuid.uuid4())
749
+
750
  try:
751
+ filename = audio_processor.create_audio_stream(
752
+ session_id, sample_rate, channels)
753
  return {
754
  "ok": True,
755
  "session_id": session_id,
 
761
  logger.error(f"Error starting audio stream: {e}")
762
  raise HTTPException(status_code=500, detail=str(e))
763
 
764
+
765
  @app.websocket("/ws/audio/{session_id}")
766
  async def websocket_audio_stream(websocket: WebSocket, session_id: str):
767
  """WebSocket endpoint for streaming audio with angle data."""
768
  await websocket.accept()
769
+ logger.info(
770
+ f"Audio WebSocket connection established for session {session_id}")
771
+
772
  # Auto-create stream if not exists
773
  if session_id not in audio_processor.active_streams:
774
  audio_processor.create_audio_stream(session_id)
775
  logger.info(f"Auto-created audio stream for session {session_id}")
776
+
777
  try:
778
  while True:
779
  message = await websocket.receive()
780
+
781
  if "bytes" in message:
782
  audio_data = message["bytes"]
783
  audio_processor.write_audio_chunk(session_id, audio_data)
784
  await websocket.send_json({"status": "received", "bytes": len(audio_data)})
785
+
786
  elif "text" in message:
787
  try:
788
  payload = json.loads(message["text"])
789
+
790
  if "audio_data" in payload and "angle" in payload:
791
  audio_bytes = base64.b64decode(payload["audio_data"])
792
  angle = float(payload["angle"])
793
+ audio_processor.write_audio_chunk(
794
+ session_id, audio_bytes, angle)
795
  await websocket.send_json({"status": "received", "angle": angle})
796
+
797
  elif payload.get("command") == "stop":
798
  audio_processor.close_audio_stream(session_id)
799
  await websocket.send_json({"status": "stopped", "message": "Stream closed"})
800
  break
801
+
802
  except json.JSONDecodeError:
803
  logger.error("Invalid JSON in audio stream")
804
+
805
  except WebSocketDisconnect:
806
+ logger.info(
807
+ f"Audio WebSocket client disconnected for session {session_id}")
808
  if session_id in audio_processor.active_streams:
809
  audio_processor.close_audio_stream(session_id)
810
  except Exception as e:
 
812
  if session_id in audio_processor.active_streams:
813
  audio_processor.close_audio_stream(session_id)
814
 
815
+
816
  @app.post("/api/audio/stop-stream/{session_id}")
817
  async def stop_audio_stream(
818
  session_id: str,
 
829
  logger.error(f"Error stopping audio stream: {e}")
830
  raise HTTPException(status_code=500, detail=str(e))
831
 
832
+
833
  @app.get("/api/audio/recordings")
834
  async def list_audio_recordings(current_user: UserPublic = Depends(get_current_user)):
835
  """List all audio recordings."""
 
844
  logger.error(f"Error listing recordings: {e}")
845
  raise HTTPException(status_code=500, detail=str(e))
846
 
847
+
848
  @app.get("/api/audio/active-sessions")
849
  async def get_active_sessions():
850
  """Get currently active audio recording sessions."""
 
859
  logger.error(f"Error getting active sessions: {e}")
860
  raise HTTPException(status_code=500, detail=str(e))
861
 
862
+
863
  @app.get("/api/audio/angles")
864
  async def get_audio_angles():
865
  """Get angle metadata for the latest audio session."""
866
  try:
867
  audio_dir = MODEL_DIR / "audio_recordings"
868
  metadata_files = list(audio_dir.glob("*_metadata.txt"))
869
+
870
  if not metadata_files:
871
  raise HTTPException(
872
  status_code=404,
873
  detail="No metadata found"
874
  )
875
+
876
  # Get the most recently modified metadata file
877
  import os
878
  metadata_file = max(metadata_files, key=os.path.getmtime)
879
+
880
  angles_data = []
881
  with open(metadata_file, 'r') as f:
882
  lines = f.readlines()
 
889
  try:
890
  timestamp = float(parts[0])
891
  angle = float(parts[1])
892
+ angles_data.append(
893
+ {"timestamp": timestamp, "angle": angle})
894
  except ValueError:
895
  continue
896
+
897
  return {
898
  "ok": True,
899
  "file": metadata_file.name,
 
906
  logger.error(f"Error retrieving angles: {e}")
907
  raise HTTPException(status_code=500, detail=str(e))
908
 
909
+
910
  @app.post("/api/audio/upload")
911
  async def upload_audio_file(
912
  file: UploadFile = File(...)
 
915
  try:
916
  # Read file content for DB persistence
917
  file_content = await file.read()
918
+
919
  if audio_recordings_collection is not None:
920
  await audio_recordings_collection.insert_one({
921
  "filename": file.filename,
922
+ "content": file_content, # Saved as binary in MongoDB
923
  "content_type": file.content_type,
924
  "timestamp": datetime.utcnow()
925
  })
926
+
927
  return {
928
  "ok": True,
929
  "message": "Audio file saved to database successfully",
 
934
  logger.error(f"Error saving audio to DB: {e}")
935
  raise HTTPException(status_code=500, detail=str(e))
936
 
937
+
938
  @app.post("/api/audio/set-angle")
939
  async def set_desired_angle(
940
  angle: float = Form(...)
 
946
  status_code=400,
947
  detail="Angle must be between 0 and 360 degrees"
948
  )
949
+
950
  if audio_angles_collection is not None:
951
  await audio_angles_collection.update_one(
952
  {"key": "latest_angle"},
953
  {"$set": {"value": angle, "updated_at": datetime.utcnow()}},
954
  upsert=True
955
  )
956
+
957
  logger.info(f"Set and persisted desired angle {angle}° to DB")
958
+
959
  return {
960
  "ok": True,
961
  "message": f"Desired angle set to {angle}° and saved to DB",
 
967
  logger.error(f"Error setting angle in DB: {e}")
968
  raise HTTPException(status_code=500, detail=str(e))
969
 
970
+
971
  @app.get("/api/audio/get-angle")
972
  async def get_current_angle():
973
  """
 
976
  """
977
  try:
978
  global current_target_angle, current_target_distance
979
+
980
+ logger.info(current_target_angle, current_target_distance)
981
  # If a person is actively being tracked, return their real-time angle
982
  if current_target_angle is not None:
983
  return {
 
986
  "angle": round(current_target_angle, 2),
987
  "distance": round(current_target_distance, 2)
988
  }
989
+
990
  # Fallback to the saved angle if no target is actively tracked
991
  if audio_angles_collection is not None:
992
  saved_angle_doc = await audio_angles_collection.find_one({"key": "latest_angle"})
 
997
  "angle": float(saved_angle_doc["value"]),
998
  "distance": None
999
  }
1000
+
1001
  return {
1002
  "ok": False,
1003
  "message": "No active tracking and no saved angle found",
 
1008
  logger.error(f"Error retrieving angle: {e}")
1009
  raise HTTPException(status_code=500, detail=str(e))
1010
 
1011
+
1012
  @app.get("/api/audio/settings")
1013
  async def get_audio_settings():
1014
  """Retrieve all audio settings from MongoDB."""
1015
  try:
1016
  if audio_settings_collection is None:
1017
  return {"ok": False, "message": "Database not connected"}
1018
+
1019
  cursor = audio_settings_collection.find({}, {"_id": 0})
1020
  settings_list = await cursor.to_list(length=100)
1021
+
1022
  # Convert list to dictionary
1023
+ settings_dict = {s["key"]: s["value"]
1024
+ for s in settings_list if "key" in s}
1025
+
1026
  return {
1027
  "ok": True,
1028
  "settings": settings_dict
 
1031
  logger.error(f"Error retrieving audio settings: {e}")
1032
  raise HTTPException(status_code=500, detail=str(e))
1033
 
1034
+
1035
  @app.post("/api/audio/settings")
1036
  async def update_audio_settings(
1037
  settings: dict = Body(...)
 
1039
  """Update general audio settings in MongoDB."""
1040
  try:
1041
  if audio_settings_collection is None:
1042
+ raise HTTPException(
1043
+ status_code=503, detail="Database not connected")
1044
+
1045
  for key, value in settings.items():
1046
  await audio_settings_collection.update_one(
1047
  {"key": key},
1048
  {"$set": {"value": value, "updated_at": datetime.utcnow()}},
1049
  upsert=True
1050
  )
1051
+
1052
  return {
1053
  "ok": True,
1054
  "message": "Audio settings updated successfully",