import gradio as gr import cv2 import torch import numpy as np from facenet_pytorch import MTCNN from transformers import pipeline # 1) Initialize device, face detector, and HF pipelines device = torch.device("cuda" if torch.cuda.is_available() else "cpu") mtcnn = MTCNN(keep_all=True, device=device) age_pipe = pipeline( "image-classification", model="nateraw/vit-age-classifier", device=0 if device.type=="cuda" else -1 ) gender_pipe = pipeline( "image-classification", model="prithivMLmods/Gender-Classifier-Mini", device=0 if device.type=="cuda" else -1 ) # 2) Frame annotation function def annotate(frame: np.ndarray) -> np.ndarray: """ Input: H×W×3 RGB frame as NumPy array Output: same frame with boxes + age/gender labels drawn """ img = frame.copy() boxes, _ = mtcnn.detect(img) if boxes is not None: for box in boxes: x1, y1, x2, y2 = map(int, box) face = img[y1:y2, x1:x2] # HF pipelines accept NumPy RGB directly age_pred = age_pipe(face)[0] gender_pred = gender_pipe(face)[0] label = ( f"{gender_pred['label']} {gender_pred['score']:.2f}, " f"{age_pred['label']} {age_pred['score']:.2f}" ) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText( img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2 ) return img # 3) Build Gradio interface iface = gr.Interface( fn=annotate, inputs=gr.Image(source="webcam", tool="editor", streaming=True), outputs=gr.Image(type="numpy"), title="Real-Time Age & Gender Prediction", description=( "Allow webcam access in your browser; faces will be boxed with age & gender labels." ), live=True ) # 4) Launch for HF Spaces (no share=True needed) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)