""" VisionAI — Object Detection & Human Pose Estimation using YOLO Semester Project Key features: • weapon_detection.pt — custom weapon model (bundled) • Pose Threat Analysis — classifies each detected person's pose as: 🟢 NORMAL — relaxed / standing / walking 🟡 SUSPICIOUS — crouching / leaning / unusual angle 🔴 THREATENING — raised arms / aggressive / weapon + person together • FPS-based video scanning (choose how many frames/sec to analyse) • Works on HuggingFace free tier (CPU-safe) """ import cv2 import json import math import tempfile import numpy as np from PIL import Image, ImageDraw, ImageFont import gradio as gr from ultralytics import YOLO try: import spaces except ImportError: class spaces: @staticmethod def GPU(fn): return fn # ══════════════════════════════════════════════════════════════════ # MODEL LOADING # ══════════════════════════════════════════════════════════════════ print("=" * 60) print("[VisionAI] Loading models ...") def _load(path, label): try: m = YOLO(path) print(f" ✅ {label} ({path})") return m except Exception as e: print(f" ⚠️ {label} skipped — {e}") return None MODEL_OD = _load("yolo11m.pt", "Object Detection") MODEL_POSE = _load("yolo11m-pose.pt", "Pose Estimation") MODEL_SEG = _load("yolo11m-seg.pt", "Segmentation") MODEL_CLS = _load("yolo11m-cls.pt", "Classification") MODEL_OBB = _load("yolo11m-obb.pt", "OBB Detection") MODEL_WEAPON = _load("weapon_detection.pt", "Weapon Detection ★") # Ordered task registry (always includes weapon if loaded) MODELS = {} if MODEL_OD: MODELS["object_detection"] = MODEL_OD if MODEL_POSE: MODELS["pose"] = MODEL_POSE if MODEL_SEG: MODELS["segmentation"] = MODEL_SEG if MODEL_CLS: MODELS["classification"] = MODEL_CLS if MODEL_OBB: MODELS["obb"] = MODEL_OBB if MODEL_WEAPON: MODELS["weapon"] = MODEL_WEAPON TASK_DISPLAY = { "object_detection": "🔍 Object Detection", "pose": "🦴 Pose Estimation", "segmentation": "🎭 Segmentation", "classification": "🏷️ Classification", "obb": "📦 OBB Detection", "weapon": "🔫 Weapon Detection", } OVERLAY_TASKS = [t for t in ["object_detection","pose","segmentation","obb","weapon"] if t in MODELS] ALL_TASKS = list(MODELS.keys()) print(f"[VisionAI] ✅ {len(MODELS)} models loaded: {ALL_TASKS}") print("=" * 60) # ══════════════════════════════════════════════════════════════════ # POSE THREAT ANALYSER # COCO 17 keypoints: # 0-nose 1-left_eye 2-right_eye 3-left_ear 4-right_ear # 5-left_shoulder 6-right_shoulder # 7-left_elbow 8-right_elbow # 9-left_wrist 10-right_wrist # 11-left_hip 12-right_hip # 13-left_knee 14-right_knee # 15-left_ankle 16-right_ankle # ══════════════════════════════════════════════════════════════════ THREAT_NORMAL = "NORMAL" THREAT_SUSPICIOUS = "SUSPICIOUS" THREAT_THREATENING = "THREATENING" THREAT_COLOR = { THREAT_NORMAL: (34, 197, 94), # green THREAT_SUSPICIOUS: (234, 179, 8), # yellow THREAT_THREATENING: (239, 68, 68), # red } THREAT_EMOJI = { THREAT_NORMAL: "🟢", THREAT_SUSPICIOUS: "🟡", THREAT_THREATENING: "🔴", } def _kp(kps, idx): """Return (x, y, visible) for keypoint index. visible=True if coords > 0.""" if idx >= len(kps): return 0, 0, False x, y = float(kps[idx][0]), float(kps[idx][1]) return x, y, (x > 1 and y > 1) def _angle(a, b, c): """Angle at point b formed by a-b-c (degrees).""" ax, ay = a[0]-b[0], a[1]-b[1] cx, cy = c[0]-b[0], c[1]-b[1] dot = ax*cx + ay*cy mag = (math.hypot(ax,ay) * math.hypot(cx,cy)) + 1e-6 return math.degrees(math.acos(max(-1, min(1, dot/mag)))) def analyse_pose_threat(kps, weapon_in_frame=False): """ Returns (threat_level, reason_string) for a single person's keypoints. kps: list of [x, y] for 17 COCO keypoints. """ # ── Extract key points ── nose_x, nose_y, nose_v = _kp(kps, 0) ls_x, ls_y, ls_v = _kp(kps, 5) # left shoulder rs_x, rs_y, rs_v = _kp(kps, 6) # right shoulder le_x, le_y, le_v = _kp(kps, 7) # left elbow re_x, re_y, re_v = _kp(kps, 8) # right elbow lw_x, lw_y, lw_v = _kp(kps, 9) # left wrist rw_x, rw_y, rw_v = _kp(kps, 10) # right wrist lh_x, lh_y, lh_v = _kp(kps, 11) # left hip rh_x, rh_y, rh_v = _kp(kps, 12) # right hip lk_x, lk_y, lk_v = _kp(kps, 13) # left knee rk_x, rk_y, rk_v = _kp(kps, 14) # right knee la_x, la_y, la_v = _kp(kps, 15) # left ankle ra_x, ra_y, ra_v = _kp(kps, 16) # right ankle reasons = [] score = 0 # accumulate threat score # ── 1. ARMS RAISED (wrists above shoulders) ── arms_raised = 0 if lw_v and ls_v and lw_y < ls_y - 20: # y decreases upward in image coords arms_raised += 1 if rw_v and rs_v and rw_y < rs_y - 20: arms_raised += 1 if arms_raised == 2: score += 3 reasons.append("both arms raised") elif arms_raised == 1: score += 1 reasons.append("one arm raised") # ── 2. ARMS EXTENDED FORWARD / POINTING ── # Wrists far from body centre horizontally = reaching/pointing body_cx = 0 if ls_v and rs_v: body_cx = (ls_x + rs_x) / 2 if body_cx > 0: if lw_v and abs(lw_x - body_cx) > 120: score += 1 reasons.append("left arm extended") if rw_v and abs(rw_x - body_cx) > 120: score += 1 reasons.append("right arm extended") # ── 3. ELBOW ANGLE (acute = punching / striking pose) ── if lw_v and le_v and ls_v: ang = _angle((ls_x,ls_y),(le_x,le_y),(lw_x,lw_y)) if ang < 70: score += 2 reasons.append(f"left arm bent aggressively ({ang:.0f}°)") if rw_v and re_v and rs_v: ang = _angle((rs_x,rs_y),(re_x,re_y),(rw_x,rw_y)) if ang < 70: score += 2 reasons.append(f"right arm bent aggressively ({ang:.0f}°)") # ── 4. CROUCHING (knees higher than hips relative to ankles) ── if lk_v and lh_v and la_v: torso_h = abs(lh_y - la_y) + 1e-6 crouch_ratio = (lk_y - lh_y) / torso_h if crouch_ratio < 0.15: # knee close to hip → crouching score += 1 reasons.append("crouching posture") # ── 5. LEANING / TILTED BODY ── if ls_v and rs_v: shoulder_tilt = abs(ls_y - rs_y) / (abs(ls_x - rs_x) + 1e-6) if shoulder_tilt > 0.45: score += 1 reasons.append(f"body tilted ({shoulder_tilt:.2f})") # ── 6. WEAPON IN SAME FRAME ── if weapon_in_frame: score += 4 reasons.append("weapon detected nearby") # ── 7. WIDE STANCE (feet far apart) ── if la_v and ra_v and ls_v and rs_v: shoulder_w = abs(ls_x - rs_x) + 1e-6 stance_w = abs(la_x - ra_x) if stance_w / shoulder_w > 1.8: score += 1 reasons.append("wide aggressive stance") # ── Map score → threat level ── if score >= 6: level = THREAT_THREATENING elif score >= 2: level = THREAT_SUSPICIOUS else: level = THREAT_NORMAL reason_str = ", ".join(reasons) if reasons else "relaxed posture" return level, reason_str, score # ══════════════════════════════════════════════════════════════════ # OVERLAY DRAWING # ══════════════════════════════════════════════════════════════════ def draw_threat_overlay(frame_bgr, persons): """ Draw a threat status badge per person on the frame. persons: list of dicts with keys: bbox, threat, reason, score Returns annotated BGR frame. """ out = frame_bgr.copy() for p in persons: x1, y1, x2, y2 = [int(v) for v in p["bbox"]] threat = p["threat"] color = THREAT_COLOR[threat] # (R,G,B) bgr = (color[2], color[1], color[0]) # cv2 BGR emoji = THREAT_EMOJI[threat] # Bounding box border cv2.rectangle(out, (x1,y1), (x2,y2), bgr, 2) # Label background label = f"{emoji} {threat}" (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 0.6, 1) cv2.rectangle(out, (x1, y1-th-8), (x1+tw+8, y1), bgr, -1) cv2.putText(out, label, (x1+4, y1-4), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA) # Reason sub-label (smaller, below box) reason_short = p["reason"][:50] cv2.putText(out, reason_short, (x1+2, y2+16), cv2.FONT_HERSHEY_SIMPLEX, 0.42, bgr, 1, cv2.LINE_AA) # ── Overall frame status banner (top of frame) ── if persons: worst = max(persons, key=lambda p: p["score"]) w_threat = worst["threat"] w_color = THREAT_COLOR[w_threat] w_bgr = (w_color[2], w_color[1], w_color[0]) banner = f" {THREAT_EMOJI[w_threat]} OVERALL: {w_threat} ({len(persons)} person(s) detected)" (bw, bh), _ = cv2.getTextSize(banner, cv2.FONT_HERSHEY_DUPLEX, 0.7, 1) cv2.rectangle(out, (0,0), (bw+16, bh+12), w_bgr, -1) cv2.putText(out, banner, (8, bh+4), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 1, cv2.LINE_AA) return out def run_combined_analysis(frame_np, conf, iou, img_size): """ Run Object Detection + Pose + Weapon on one frame. Returns annotated PIL image + analysis dict. """ # ── Step 1: Weapon detection ── weapon_in_frame = False weapon_dets = [] if MODEL_WEAPON: w_res = MODEL_WEAPON.predict(source=frame_np, conf=conf, iou=iou, imgsz=img_size, verbose=False) for r in w_res: if r.boxes is not None and len(r.boxes): weapon_in_frame = True for box in r.boxes: weapon_dets.append({ "label": MODEL_WEAPON.names[int(box.cls)], "confidence": round(float(box.conf), 3), "bbox": [round(v,1) for v in box.xyxy[0].tolist()], }) # ── Step 2: Pose estimation ── persons = [] pose_anno = frame_np.copy() if MODEL_POSE: p_res = MODEL_POSE.predict(source=frame_np, conf=conf, iou=iou, imgsz=img_size, verbose=False) for r in p_res: pose_anno = r.plot() # skeleton overlay if r.boxes is None or r.keypoints is None: continue for i, box in enumerate(r.boxes): if MODEL_POSE.names[int(box.cls)] != "person": continue kps = r.keypoints.xy[i].tolist() threat, reason, score = analyse_pose_threat(kps, weapon_in_frame) persons.append({ "id": i, "bbox": [round(v,1) for v in box.xyxy[0].tolist()], "threat": threat, "reason": reason, "score": score, "keypoints_count": sum(1 for k in kps if k[0]>1 and k[1]>1), }) # Convert pose_anno (may be BGR from r.plot()) to BGR numpy if isinstance(pose_anno, np.ndarray) and pose_anno.shape[2] == 3: anno_bgr = pose_anno if pose_anno.dtype == np.uint8 else (pose_anno*255).astype(np.uint8) # r.plot() returns RGB; convert to BGR for cv2 anno_bgr = cv2.cvtColor(anno_bgr, cv2.COLOR_RGB2BGR) else: anno_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR) # ── Step 3: Draw weapon boxes on top ── for wd in weapon_dets: x1,y1,x2,y2 = [int(v) for v in wd["bbox"]] cv2.rectangle(anno_bgr, (x1,y1), (x2,y2), (0,0,220), 3) lbl = f"🔫 {wd['label']} {wd['confidence']:.0%}" cv2.putText(anno_bgr, lbl, (x1, y1-6), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0,0,220), 1) # ── Step 4: Draw threat overlays ── anno_bgr = draw_threat_overlay(anno_bgr, persons) # Back to RGB PIL out_pil = Image.fromarray(cv2.cvtColor(anno_bgr, cv2.COLOR_BGR2RGB)) analysis = { "persons_detected": len(persons), "weapon_detected": weapon_in_frame, "weapons": weapon_dets, "persons": persons, "overall_threat": max((p["threat"] for p in persons), key=lambda t: [THREAT_NORMAL,THREAT_SUSPICIOUS,THREAT_THREATENING].index(t)) if persons else THREAT_NORMAL, } return out_pil, analysis # ══════════════════════════════════════════════════════════════════ # CORE HELPERS (single-model path) # ══════════════════════════════════════════════════════════════════ def predict(model, frame_np, conf, iou, img_size): return model.predict(source=frame_np, conf=conf, iou=iou, imgsz=img_size, verbose=False, show_labels=True, show_conf=True) def extract_dets(results, task, model): dets = [] for r in results: if task == "classification": if r.probs is not None: for idx, c in zip(r.probs.top5, r.probs.top5conf.tolist()): dets.append({"label": model.names[idx], "confidence": round(float(c),3)}) else: if r.boxes is not None: for i, box in enumerate(r.boxes): d = {"id": i, "label": model.names[int(box.cls)], "confidence": round(float(box.conf),3), "bbox": [round(v,1) for v in box.xyxy[0].tolist()]} if task == "pose" and r.keypoints is not None: kps = r.keypoints.xy[i].tolist() d["keypoints"] = [[round(x,1),round(y,1)] for x,y in kps] dets.append(d) return dets def to_pil(results): for r in results: return Image.fromarray(r.plot()[..., ::-1]) return None def resize_frame(frame, src_w, src_h, max_side=640): scale = min(max_side / max(src_w, src_h), 1.0) if scale < 1.0: ow = int(src_w*scale)&~1; oh = int(src_h*scale)&~1 if frame is None: return None, ow, oh, scale return cv2.resize(frame,(ow,oh)), ow, oh, scale if frame is None: return None, src_w&~1, src_h&~1, 1.0 return frame, src_w&~1, src_h&~1, 1.0 def _frame_interval(src_fps, scan_fps): return max(1, round(src_fps / min(scan_fps, src_fps))) # ══════════════════════════════════════════════════════════════════ # INFERENCE FUNCTIONS # ══════════════════════════════════════════════════════════════════ # ── COMBINED IMAGE (Pose + OD + Weapon + Threat) ────────────────── @spaces.GPU def infer_combined_image(image, conf, iou, img_size): if image is None: return None, '{"error":"No image"}' img_np = np.array(image.convert("RGB")) out_pil, analysis = run_combined_analysis(img_np, conf, iou, img_size) return out_pil, json.dumps(analysis, indent=2) # ── SINGLE MODEL IMAGE ──────────────────────────────────────────── @spaces.GPU def infer_image(image, task, conf, iou, img_size): if image is None: return None, '{"error":"No image"}' img_np = np.array(image.convert("RGB")) model = MODELS[task] results = predict(model, img_np, conf, iou, img_size) dets = extract_dets(results, task, model) out_img = to_pil(results) payload = {"task": TASK_DISPLAY[task], "count": len(dets), "detections": dets} return out_img, json.dumps(payload, indent=2) # ── COMBINED VIDEO (Pose Threat per frame) ──────────────────────── @spaces.GPU def infer_combined_video(video_path, conf, iou, img_size, scan_fps=1, max_frames=300, progress=gr.Progress()): if video_path is None: return None, '{"error":"No video"}' cap = cv2.VideoCapture(video_path) src_fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 src_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) or 640 src_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) or 480 total_src= max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 1) # Lock to 1 fps: only process & write one frame per second scan_fps = float(scan_fps) if scan_fps else 1.0 interval = _frame_interval(src_fps, scan_fps) out_fps = max(src_fps / interval, 1.0) _, out_w, out_h, scale = resize_frame(None, src_w, src_h) tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name # Try H.264 first (smaller + browser-compatible), fall back to mp4v fourcc = cv2.VideoWriter_fourcc(*"avc1") vw = cv2.VideoWriter(tmp, fourcc, out_fps, (out_w, out_h)) if not vw.isOpened(): fourcc = cv2.VideoWriter_fourcc(*"mp4v") vw = cv2.VideoWriter(tmp, fourcc, out_fps, (out_w, out_h)) frame_idx = 0 proc_count = 0 threat_counts = {THREAT_NORMAL:0, THREAT_SUSPICIOUS:0, THREAT_THREATENING:0} total_weapons = 0 progress(0, desc="Starting …") while True: ret, frame = cap.read() if not ret or proc_count >= int(max_frames): break if scale < 1.0: frame = cv2.resize(frame, (out_w, out_h)) # Only process and write frames at the target scan rate if frame_idx % interval == 0: frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) _, analysis = run_combined_analysis(frame_rgb, conf, iou, img_size) anno_bgr = frame.copy() if MODEL_POSE: pr = MODEL_POSE.predict(source=frame_rgb, conf=conf, iou=iou, imgsz=img_size, verbose=False) for r in pr: plotted = r.plot() # RGB anno_bgr = cv2.cvtColor(plotted, cv2.COLOR_RGB2BGR) for wd in analysis["weapons"]: x1,y1,x2,y2 = [int(v) for v in wd["bbox"]] cv2.rectangle(anno_bgr,(x1,y1),(x2,y2),(0,0,220),3) cv2.putText(anno_bgr, f"WEAPON {wd['confidence']:.0%}", (x1,y1-6), cv2.FONT_HERSHEY_DUPLEX, 0.6,(0,0,220),1) anno_bgr = draw_threat_overlay(anno_bgr, analysis["persons"]) # Only write this annotated frame (skip raw in-between frames entirely) vw.write(anno_bgr) for p in analysis["persons"]: threat_counts[p["threat"]] += 1 total_weapons += len(analysis["weapons"]) proc_count += 1 ot = analysis["overall_threat"] progress(min(frame_idx/total_src, 0.99), desc=f"Frame {frame_idx}/{total_src} | {THREAT_EMOJI[ot]} {ot}") frame_idx += 1 cap.release() vw.release() progress(1.0, desc="✓ Done!") payload = { "source_fps": round(src_fps,2), "scan_fps": round(scan_fps,2), "frame_interval": interval, "frames_scanned": proc_count, "total_frames": frame_idx, "resolution": f"{out_w}x{out_h}", "weapon_detections":total_weapons, "pose_threat_summary": { f"{THREAT_EMOJI[THREAT_NORMAL]} NORMAL": threat_counts[THREAT_NORMAL], f"{THREAT_EMOJI[THREAT_SUSPICIOUS]} SUSPICIOUS": threat_counts[THREAT_SUSPICIOUS], f"{THREAT_EMOJI[THREAT_THREATENING]} THREATENING": threat_counts[THREAT_THREATENING], }, } return tmp, json.dumps(payload, indent=2) # ── SINGLE MODEL VIDEO ──────────────────────────────────────────── @spaces.GPU def infer_video(video_path, task, conf, iou, img_size, scan_fps=1, max_frames=300, progress=gr.Progress()): if video_path is None: return None, '{"error":"No video"}' model = MODELS[task] cap = cv2.VideoCapture(video_path) src_fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 src_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) or 640 src_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) or 480 total_src= max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),1) scan_fps = float(scan_fps) if scan_fps else 1.0 interval = _frame_interval(src_fps, scan_fps) out_fps = max(src_fps / interval, 1.0) _, out_w, out_h, scale = resize_frame(None, src_w, src_h) tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name # Try H.264 first (smaller + browser-compatible), fall back to mp4v fourcc = cv2.VideoWriter_fourcc(*"avc1") vw = cv2.VideoWriter(tmp, fourcc, out_fps, (out_w, out_h)) if not vw.isOpened(): fourcc = cv2.VideoWriter_fourcc(*"mp4v") vw = cv2.VideoWriter(tmp, fourcc, out_fps, (out_w, out_h)) frame_idx=0; proc_count=0; total_dets=0 progress(0, desc="Starting …") while True: ret, frame = cap.read() if not ret or proc_count >= int(max_frames): break if scale < 1.0: frame = cv2.resize(frame,(out_w,out_h)) # Only process and write frames at the target scan rate if frame_idx % interval == 0: results = predict(model, frame, conf, iou, img_size) for r in results: plotted = r.plot() # r.plot() returns RGB; convert to BGR for VideoWriter annotated_bgr = cv2.cvtColor(plotted, cv2.COLOR_RGB2BGR) if r.boxes is not None: total_dets += len(r.boxes) vw.write(annotated_bgr if 'annotated_bgr' in dir() else frame) proc_count += 1 progress(min(frame_idx/total_src,0.99), desc=f"Frame {frame_idx}/{total_src} | {total_dets} dets") frame_idx += 1 cap.release(); vw.release() progress(1.0, desc="✓ Done!") payload = { "task": TASK_DISPLAY[task], "source_fps": round(src_fps,2), "scan_fps": round(scan_fps,2), "frame_interval": interval, "frames_scanned": proc_count, "resolution": f"{out_w}x{out_h}", "total_detections": total_dets, "avg_detections_per_scanned_frame": round(total_dets/max(proc_count,1),2), } return tmp, json.dumps(payload, indent=2) # ── WEBCAM — COMBINED (Pose Threat + Weapon live) ───────────────── @spaces.GPU def stream_webcam_combined(frame, conf, iou, img_size): if frame is None: return None out_pil, _ = run_combined_analysis(frame, conf, iou, img_size) return np.array(out_pil) # ── WEBCAM — SINGLE MODEL ───────────────────────────────────────── @spaces.GPU def stream_webcam(frame, task, conf, iou, img_size): if frame is None: return None model = MODELS[task] results = predict(model, frame, conf, iou, img_size) for r in results: return r.plot()[..., ::-1] return frame # ══════════════════════════════════════════════════════════════════ # UI HELPERS # ══════════════════════════════════════════════════════════════════ def shared_controls(default_conf=0.25): with gr.Row(): conf = gr.Slider(0.05, 0.95, value=default_conf, step=0.05, label="Confidence") iou = gr.Slider(0.05, 0.95, value=0.45, step=0.05, label="IoU Threshold") isize = gr.Slider(320, 1280, value=640, step=32, label="Image Size") return conf, iou, isize def video_controls(): with gr.Row(): scan_fps = gr.Radio( choices=[1,2,3,5,8,10,15,24], value=5, type="value", label="Scan FPS · frames per second to analyse · higher = thorough but slower" ) max_frames = gr.Slider(50, 600, value=200, step=50, label="Max Frames Cap") return scan_fps, max_frames _order = ["object_detection","pose","segmentation","classification","obb","weapon"] TASK_CHOICES = [(TASK_DISPLAY[t],t) for t in _order if t in MODELS] # ══════════════════════════════════════════════════════════════════ # CSS # ══════════════════════════════════════════════════════════════════ CSS = """ body,.gradio-container{ background:#060c1a!important;color:#e2e8f0!important; font-family:'Segoe UI',system-ui,sans-serif } .hero{ background:linear-gradient(135deg,#0d1b2a,#1a2744,#0f3460); border-radius:16px;padding:2rem;margin-bottom:1rem; border:1px solid #1e3a5f;text-align:center } .hero h1{ font-size:2rem;font-weight:800; background:linear-gradient(90deg,#38bdf8,#818cf8,#34d399); -webkit-background-clip:text;-webkit-text-fill-color:transparent;margin:0 } .hero p{color:#94a3b8;margin:.4rem 0 0} .threat-banner{ background:linear-gradient(135deg,rgba(99,102,241,.12),rgba(34,211,238,.08)); border:1px solid rgba(99,102,241,.4);border-radius:12px; padding:.85rem 1.25rem;margin-bottom:.75rem;font-size:.9rem } .threat-legend{ display:flex;gap:1rem;flex-wrap:wrap;margin-top:.5rem;font-size:.82rem } .tl-normal{color:#22c55e} .tl-sus{color:#eab308} .tl-threat{color:#ef4444} .tip{ background:rgba(52,211,153,.08);border:1px solid rgba(52,211,153,.3); border-radius:8px;padding:.5rem 1rem;color:#6ee7b7;font-size:.84rem;margin-bottom:.5rem } .weapon-note{ background:rgba(239,68,68,.08);border:1px solid rgba(239,68,68,.25); border-radius:8px;padding:.5rem 1rem;color:#fca5a5;font-size:.84rem;margin-bottom:.5rem } """ # ══════════════════════════════════════════════════════════════════ # GRADIO UI # ══════════════════════════════════════════════════════════════════ THREAT_LEGEND_HTML = """
🟢 NORMAL — relaxed / standing / walking 🟡 SUSPICIOUS — crouching / leaning / unusual posture 🔴 THREATENING — raised arms / aggressive / weapon present
""" with gr.Blocks(css=CSS, title="VisionAI — Object Detection & Pose Estimation") as app: gr.HTML("""

🤖 VisionAI — Object Detection & Human Pose Estimation

YOLO11 · Pose Threat Analysis · Weapon Detection (weapon_detection.pt) · FPS-based Video Scanning
Semester Project — all models pre-loaded at startup

""") with gr.Tabs(): # ════════════════════════════════════════════════════════ # TAB 1 — POSE THREAT ANALYSIS (primary feature) # ════════════════════════════════════════════════════════ with gr.Tab("🎯 Pose Threat Analysis"): gr.HTML(f"""
Pose Threat Analysis — Runs Pose Estimation + Weapon Detection together. Each detected person is classified by posture: {THREAT_LEGEND_HTML}
""") with gr.Tabs(): # IMAGE with gr.Tab("📷 Image"): with gr.Row(): with gr.Column(): ta_img_in = gr.Image(type="pil", label="Upload Image") conf_tai, iou_tai, sz_tai = shared_controls() btn_tai = gr.Button("🎯 Analyse Threat", variant="primary") with gr.Column(): ta_img_out = gr.Image(type="pil", label="Annotated Result") ta_img_json = gr.Code(label="Threat Analysis JSON", language="json") btn_tai.click(infer_combined_image, [ta_img_in, conf_tai, iou_tai, sz_tai], [ta_img_out, ta_img_json]) # VIDEO with gr.Tab("🎬 Video"): gr.HTML('
⚡ Pose threat is evaluated on every scanned frame. Use Scan FPS 3–5 on free tier.
') with gr.Row(): with gr.Column(): ta_vid_in = gr.Video(label="Upload Video") conf_tav, iou_tav, sz_tav = shared_controls() fs_tav, mf_tav = video_controls() btn_tav = gr.Button("🎯 Analyse Video Threats", variant="primary") with gr.Column(): ta_vid_out = gr.Video(label="Annotated Output") ta_vid_json = gr.Code(label="Threat Summary JSON", language="json") btn_tav.click(infer_combined_video, [ta_vid_in, conf_tav, iou_tav, sz_tav, fs_tav, mf_tav], [ta_vid_out, ta_vid_json]) # WEBCAM with gr.Tab("📡 Live Webcam"): gr.HTML(f"""
📡 Live Pose Threat Detection — real-time per-person threat classification. {THREAT_LEGEND_HTML}
""") with gr.Row(): with gr.Column(scale=1): conf_taw, iou_taw, sz_taw = shared_controls(default_conf=0.30) gr.Markdown(""" **Tips for live accuracy:** - Stand in full view of camera - Ensure good lighting - Image Size 320 = faster on CPU - Raise both arms to test 🔴 THREATENING """) with gr.Column(scale=2): ta_cam_in = gr.Image(sources=["webcam"], streaming=True, type="numpy", label="Webcam Feed") ta_cam_out = gr.Image(streaming=True, label="🎯 Live Threat Analysis") ta_cam_in.stream(stream_webcam_combined, [ta_cam_in, conf_taw, iou_taw, sz_taw], [ta_cam_out]) # ════════════════════════════════════════════════════════ # TAB 2 — WEAPON DETECTION # ════════════════════════════════════════════════════════ with gr.Tab("🔫 Weapon Detection"): gr.HTML("""
🔫 Custom Weapon Detection Model (weapon_detection.pt) — detects firearms and other weapons. Combined with pose analysis for full threat assessment.
""") with gr.Tabs(): with gr.Tab("📷 Image"): with gr.Row(): with gr.Column(): wp_in = gr.Image(type="pil", label="Upload Image") conf_wp, iou_wp, sz_wp = shared_controls(default_conf=0.20) btn_wp = gr.Button("🔫 Detect Weapons", variant="primary") with gr.Column(): wp_out = gr.Image(type="pil", label="Result") wp_json = gr.Code(label="Detection JSON", language="json") btn_wp.click(infer_image, [wp_in, gr.State("weapon"), conf_wp, iou_wp, sz_wp], [wp_out, wp_json]) with gr.Tab("🎬 Video"): with gr.Row(): with gr.Column(): wpv_in = gr.Video(label="Upload Video") conf_wpv, iou_wpv, sz_wpv = shared_controls(default_conf=0.20) fs_wpv, mf_wpv = video_controls() btn_wpv = gr.Button("🔫 Detect Weapons in Video", variant="primary") with gr.Column(): wpv_out = gr.Video(label="Annotated Video") wpv_json = gr.Code(label="Summary JSON", language="json") btn_wpv.click(infer_video, [wpv_in, gr.State("weapon"), conf_wpv, iou_wpv, sz_wpv, fs_wpv, mf_wpv], [wpv_out, wpv_json]) with gr.Tab("📡 Webcam"): with gr.Row(): with gr.Column(scale=1): conf_wpc, iou_wpc, sz_wpc = shared_controls(default_conf=0.20) with gr.Column(scale=2): wpc_in = gr.Image(sources=["webcam"], streaming=True, type="numpy", label="Webcam") wpc_out = gr.Image(streaming=True, label="🔫 Weapon Detection Live") wpc_in.stream(lambda f,c,i,s: stream_webcam(f,"weapon",c,i,s), [wpc_in, conf_wpc, iou_wpc, sz_wpc], [wpc_out]) # ════════════════════════════════════════════════════════ # TAB 3 — OBJECT DETECTION # ════════════════════════════════════════════════════════ with gr.Tab("🔍 Object Detection"): with gr.Tabs(): with gr.Tab("📷 Image"): with gr.Row(): with gr.Column(): od_in = gr.Image(type="pil", label="Upload Image") conf_od, iou_od, sz_od = shared_controls() btn_od = gr.Button("▶ Run Detection", variant="primary") with gr.Column(): od_out = gr.Image(type="pil", label="Result") od_json = gr.Code(label="JSON", language="json") btn_od.click(infer_image, [od_in, gr.State("object_detection"), conf_od, iou_od, sz_od], [od_out, od_json]) with gr.Tab("🎬 Video"): with gr.Row(): with gr.Column(): odv_in = gr.Video(label="Upload Video") conf_odv, iou_odv, sz_odv = shared_controls() fs_odv, mf_odv = video_controls() btn_odv = gr.Button("▶ Process Video", variant="primary") with gr.Column(): odv_out = gr.Video(label="Annotated Video") odv_json = gr.Code(label="Summary JSON", language="json") btn_odv.click(infer_video, [odv_in, gr.State("object_detection"), conf_odv, iou_odv, sz_odv, fs_odv, mf_odv], [odv_out, odv_json]) with gr.Tab("📡 Webcam"): with gr.Row(): with gr.Column(scale=1): conf_odc, iou_odc, sz_odc = shared_controls() with gr.Column(scale=2): odc_in = gr.Image(sources=["webcam"], streaming=True, type="numpy", label="Webcam") odc_out = gr.Image(streaming=True, label="Live Detection") odc_in.stream(lambda f,c,i,s: stream_webcam(f,"object_detection",c,i,s), [odc_in, conf_odc, iou_odc, sz_odc], [odc_out]) # ════════════════════════════════════════════════════════ # TAB 4 — POSE ESTIMATION (standalone) # ════════════════════════════════════════════════════════ with gr.Tab("🦴 Pose Estimation"): with gr.Tabs(): with gr.Tab("📷 Image"): with gr.Row(): with gr.Column(): pe_in = gr.Image(type="pil", label="Upload Image") conf_pe, iou_pe, sz_pe = shared_controls() btn_pe = gr.Button("▶ Estimate Pose", variant="primary") with gr.Column(): pe_out = gr.Image(type="pil", label="Skeleton Result") pe_json = gr.Code(label="Keypoints JSON", language="json") btn_pe.click(infer_image, [pe_in, gr.State("pose"), conf_pe, iou_pe, sz_pe], [pe_out, pe_json]) with gr.Tab("🎬 Video"): with gr.Row(): with gr.Column(): pev_in = gr.Video(label="Upload Video") conf_pev, iou_pev, sz_pev = shared_controls() fs_pev, mf_pev = video_controls() btn_pev = gr.Button("▶ Process Video", variant="primary") with gr.Column(): pev_out = gr.Video(label="Annotated Video") pev_json = gr.Code(label="Summary JSON", language="json") btn_pev.click(infer_video, [pev_in, gr.State("pose"), conf_pev, iou_pev, sz_pev, fs_pev, mf_pev], [pev_out, pev_json]) with gr.Tab("📡 Webcam"): with gr.Row(): with gr.Column(scale=1): conf_pec, iou_pec, sz_pec = shared_controls() with gr.Column(scale=2): pec_in = gr.Image(sources=["webcam"], streaming=True, type="numpy", label="Webcam") pec_out = gr.Image(streaming=True, label="Live Skeleton") pec_in.stream(lambda f,c,i,s: stream_webcam(f,"pose",c,i,s), [pec_in, conf_pec, iou_pec, sz_pec], [pec_out]) # ════════════════════════════════════════════════════════ # TAB 5 — OTHER MODELS # ════════════════════════════════════════════════════════ with gr.Tab("🧩 More Models"): with gr.Tabs(): with gr.Tab("📷 Image"): other_choices = [(TASK_DISPLAY[t],t) for t in ["segmentation","classification","obb"] if t in MODELS] if other_choices: task_om = gr.Radio(choices=other_choices, value=other_choices[0][1], label="Select Model") with gr.Row(): with gr.Column(): om_in = gr.Image(type="pil", label="Upload Image") conf_om, iou_om, sz_om = shared_controls() btn_om = gr.Button("▶ Run", variant="primary") with gr.Column(): om_out = gr.Image(type="pil", label="Result") om_json = gr.Code(label="JSON", language="json") btn_om.click(infer_image, [om_in, task_om, conf_om, iou_om, sz_om], [om_out, om_json]) with gr.Tab("🎬 Video"): other_choices_v = [(TASK_DISPLAY[t],t) for t in ["segmentation","classification","obb"] if t in MODELS] if other_choices_v: task_omv = gr.Radio(choices=other_choices_v, value=other_choices_v[0][1], label="Select Model") with gr.Row(): with gr.Column(): omv_in = gr.Video(label="Upload Video") conf_omv, iou_omv, sz_omv = shared_controls() fs_omv, mf_omv = video_controls() btn_omv = gr.Button("▶ Process Video", variant="primary") with gr.Column(): omv_out = gr.Video(label="Annotated Video") omv_json = gr.Code(label="Summary JSON", language="json") btn_omv.click(infer_video, [omv_in, task_omv, conf_omv, iou_omv, sz_omv, fs_omv, mf_omv], [omv_out, omv_json]) gr.HTML("""
VisionAI · Object Detection & Human Pose Estimation · YOLO11 · weapon_detection.pt · Semester Project
""") if __name__ == "__main__": app.launch(server_name="0.0.0.0", server_port=7860, show_error=True)