import cv2
import mediapipe as mp
import numpy as np
import gradio as gr
import tempfile

# Path to hand landmark model file (make sure it's in your repo!)
MODEL_PATH = "hand_landmarker.task"

# MediaPipe setup
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
mp_image = mp.Image
mp_format = mp.ImageFormat

# Define hand connections and colors for visualization
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (0, 5), (5, 6), (6, 7), (7, 8),
    (0, 9), (9,10), (10,11), (11,12),
    (0,13), (13,14), (14,15), (15,16),
    (0,17), (17,18), (18,19), (19,20)
]

FINGER_COLORS = {
    'thumb': (245, 245, 245),
    'index': (128, 0, 128),
    'middle': (0, 255, 0),
    'ring': (0, 165, 255),
    'pinky': (255, 0, 0),
    'palm': (100, 100, 100)
}

def get_finger_color(start_idx):
    if start_idx in range(0, 5):
        return FINGER_COLORS['thumb']
    elif start_idx in range(5, 9):
        return FINGER_COLORS['index']
    elif start_idx in range(9, 13):
        return FINGER_COLORS['middle']
    elif start_idx in range(13, 17):
        return FINGER_COLORS['ring']
    elif start_idx in range(17, 21):
        return FINGER_COLORS['pinky']
    else:
        return FINGER_COLORS['palm']

def process_video(video_file):
    # Gradio may send a dict or path string depending on how input is passed
    if isinstance(video_file, dict):
        video_path = video_file["name"]
    else:
        video_path = video_file

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError("Could not open video.")

    fps = cap.get(cv2.CAP_PROP_FPS) or 24
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Prepare output video path
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    out_path = tmp_out.name
    out = cv2.VideoWriter(out_path, fourcc, fps, (w, h))

    # Load hand detection model
    options = HandLandmarkerOptions(
        base_options=BaseOptions(model_asset_path=MODEL_PATH),
        running_mode=VisionRunningMode.IMAGE,
        num_hands=2,
        min_hand_detection_confidence=0.5,
        min_hand_presence_confidence=0.5,
        min_tracking_confidence=0.5
    )

    with HandLandmarker.create_from_options(options) as landmarker:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            mp_img = mp_image(image_format=mp_format.SRGB, data=rgb_frame)
            results = landmarker.detect(mp_img)

            if results.hand_landmarks:
                for hand_landmarks in results.hand_landmarks:
                    points = [(int(lm.x * w), int(lm.y * h)) for lm in hand_landmarks]
                    for start, end in HAND_CONNECTIONS:
                        color = get_finger_color(start)
                        cv2.line(frame, points[start], points[end], color, 2)
                    for x, y in points:
                        cv2.circle(frame, (x, y), 4, (0, 255, 255), -1)

            out.write(frame)

    cap.release()
    out.release()
    return out_path

# Gradio app interface
demo = gr.Interface(
    fn=process_video,
    inputs=gr.Video(label="Upload Video or Record via Webcam"),
    outputs=gr.Video(label="Hand Landmark Annotated Video"),
    title="🖐️ Hand Detection using MediaPipe",
    description="Upload a video or record from webcam. The system will detect hands and annotate keypoints using MediaPipe HandLandmarker."
)

if __name__ == "__main__":
    demo.launch()