import cv2 import mediapipe as mp import numpy as np import gradio as gr import tempfile # Path to hand landmark model file (make sure it's in your repo!) MODEL_PATH = "hand_landmarker.task" # MediaPipe setup BaseOptions = mp.tasks.BaseOptions HandLandmarker = mp.tasks.vision.HandLandmarker HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions VisionRunningMode = mp.tasks.vision.RunningMode mp_image = mp.Image mp_format = mp.ImageFormat # Define hand connections and colors for visualization HAND_CONNECTIONS = [ (0, 1), (1, 2), (2, 3), (3, 4), (0, 5), (5, 6), (6, 7), (7, 8), (0, 9), (9,10), (10,11), (11,12), (0,13), (13,14), (14,15), (15,16), (0,17), (17,18), (18,19), (19,20) ] FINGER_COLORS = { 'thumb': (245, 245, 245), 'index': (128, 0, 128), 'middle': (0, 255, 0), 'ring': (0, 165, 255), 'pinky': (255, 0, 0), 'palm': (100, 100, 100) } def get_finger_color(start_idx): if start_idx in range(0, 5): return FINGER_COLORS['thumb'] elif start_idx in range(5, 9): return FINGER_COLORS['index'] elif start_idx in range(9, 13): return FINGER_COLORS['middle'] elif start_idx in range(13, 17): return FINGER_COLORS['ring'] elif start_idx in range(17, 21): return FINGER_COLORS['pinky'] else: return FINGER_COLORS['palm'] def process_video(video_file): # Gradio may send a dict or path string depending on how input is passed if isinstance(video_file, dict): video_path = video_file["name"] else: video_path = video_file cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError("Could not open video.") fps = cap.get(cv2.CAP_PROP_FPS) or 24 w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Prepare output video path fourcc = cv2.VideoWriter_fourcc(*'mp4v') tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) out_path = tmp_out.name out = cv2.VideoWriter(out_path, fourcc, fps, (w, h)) # Load hand detection model options = HandLandmarkerOptions( base_options=BaseOptions(model_asset_path=MODEL_PATH), running_mode=VisionRunningMode.IMAGE, num_hands=2, min_hand_detection_confidence=0.5, min_hand_presence_confidence=0.5, min_tracking_confidence=0.5 ) with HandLandmarker.create_from_options(options) as landmarker: while cap.isOpened(): ret, frame = cap.read() if not ret: break rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) mp_img = mp_image(image_format=mp_format.SRGB, data=rgb_frame) results = landmarker.detect(mp_img) if results.hand_landmarks: for hand_landmarks in results.hand_landmarks: points = [(int(lm.x * w), int(lm.y * h)) for lm in hand_landmarks] for start, end in HAND_CONNECTIONS: color = get_finger_color(start) cv2.line(frame, points[start], points[end], color, 2) for x, y in points: cv2.circle(frame, (x, y), 4, (0, 255, 255), -1) out.write(frame) cap.release() out.release() return out_path # Gradio app interface demo = gr.Interface( fn=process_video, inputs=gr.Video(label="Upload Video or Record via Webcam"), outputs=gr.Video(label="Hand Landmark Annotated Video"), title="🖐️ Hand Detection using MediaPipe", description="Upload a video or record from webcam. The system will detect hands and annotate keypoints using MediaPipe HandLandmarker." ) if __name__ == "__main__": demo.launch()