import gradio as gr
import cv2
import torch
import numpy as np
from facenet_pytorch import MTCNN
from transformers import pipeline

# 1) Initialize device, face detector, and HF pipelines
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mtcnn = MTCNN(keep_all=True, device=device)

age_pipe = pipeline(
    "image-classification",
    model="nateraw/vit-age-classifier",
    device=0 if device.type=="cuda" else -1
)
gender_pipe = pipeline(
    "image-classification",
    model="prithivMLmods/Gender-Classifier-Mini",
    device=0 if device.type=="cuda" else -1
)

# 2) Frame annotation function
def annotate(frame: np.ndarray) -> np.ndarray:
    """
    Input: H×W×3 RGB frame as NumPy array
    Output: same frame with boxes + age/gender labels drawn
    """
    img = frame.copy()
    boxes, _ = mtcnn.detect(img)
    if boxes is not None:
        for box in boxes:
            x1, y1, x2, y2 = map(int, box)
            face = img[y1:y2, x1:x2]
            # HF pipelines accept NumPy RGB directly
            age_pred    = age_pipe(face)[0]
            gender_pred = gender_pipe(face)[0]
            label = (
                f"{gender_pred['label']} {gender_pred['score']:.2f}, "
                f"{age_pred['label']} {age_pred['score']:.2f}"
            )
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(
                img, label, (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2
            )
    return img

# 3) Build Gradio interface
iface = gr.Interface(
    fn=annotate,
    inputs=gr.Image(source="webcam", tool="editor", streaming=True),
    outputs=gr.Image(type="numpy"),
    title="Real-Time Age & Gender Prediction",
    description=(
        "Allow webcam access in your browser; faces will be boxed with age & gender labels."
    ),
    live=True
)

# 4) Launch for HF Spaces (no share=True needed)
if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)