import gradio as gr import cv2 import torch import numpy as np import mediapipe as mp import matplotlib.pyplot as plt import seaborn as sns from facenet_pytorch import MTCNN from transformers import AutoFeatureExtractor, AutoModelForImageClassification from PIL import Image import os from collections import Counter # Load models device = 'cuda' if torch.cuda.is_available() else 'cpu' mtcnn = MTCNN(device=device) model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device) extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression") # Emotion labels affectnet_labels = { 0: "neutral", 1: "happy", 2: "sad", 3: "surprise", 4: "fear", 5: "disgust", 6: "anger", 7: "contempt" } def detect_emotions(frame): """Detects facial emotions in a given frame.""" img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) faces, _ = mtcnn.detect(img) if faces is None or len(faces) == 0: return "No Face Detected" face = img.crop(faces[0]) inputs = extractor(images=face, return_tensors="pt").to(device) outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1) return model.config.id2label[torch.argmax(probs).item()] def process_video(input_path): """Processes video, overlays emotions, and creates a summary chart.""" cap = cv2.VideoCapture(input_path) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) out = cv2.VideoWriter("output_video.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) emotion_counts = [] while cap.isOpened(): ret, frame = cap.read() if not ret: break emotion = detect_emotions(frame) emotion_counts.append(emotion) # Overlay emotion overlay = frame.copy() cv2.rectangle(overlay, (10, 10), (350, 80), (255, 255, 255), -1) cv2.putText(overlay, f'Emotion: {emotion}', (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame) out.write(frame) cap.release() out.release() cv2.destroyAllWindows() # Find major emotion emotion_counter = Counter(emotion_counts) major_emotion = emotion_counter.most_common(1)[0][0] if emotion_counter else "No Face Detected" # Generate emotion distribution pie chart plt.figure(figsize=(5, 5)) labels, sizes = zip(*emotion_counter.items()) plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('pastel')) plt.title("Emotion Distribution") plt.savefig("emotion_distribution.jpg") return "output_video.mp4", plt, major_emotion # Gradio Web Interface with gr.Blocks(css=""" .gradio-container { max-width: 750px !important; margin: auto; background-color: #f8f9fa; padding: 20px; border-radius: 15px; } .gradio-container h1 { font-size: 22px; text-align: center; color: #333; } .gradio-container .gr-button { background-color: #007bff; color: white; border-radius: 10px; padding: 8px 15px; } .gradio-container .gr-textbox { font-size: 16px; font-weight: bold; color: #007bff; } .gradio-container .gr-file { border-radius: 10px; padding: 5px; } @media screen and (max-width: 768px) { .gradio-container { width: 100%; padding: 10px; } .gradio-container h1 { font-size: 18px; } } """) as demo: gr.Markdown("# 🎭 Emotion Analysis from Video 🎥") gr.Markdown("Upload a video, and the AI will detect emotions in each frame, providing a processed video, an emotion distribution chart, and the major detected emotion.") with gr.Row(): video_input = gr.File(label="📤 Upload Video (MP4, MOV, AVI)") with gr.Row(): process_button = gr.Button("🚀 Analyze") with gr.Row(): video_output = gr.File(label="📥 Processed Video") emotion_chart = gr.Plot(label="📊 Emotion Distribution Chart") major_emotion_output = gr.Textbox(label="🔥 Major Emotion Detected", interactive=False) process_button.click(fn=process_video, inputs=video_input, outputs=[video_output, emotion_chart, major_emotion_output]) demo.launch()