Sagnik1750's picture
Update app.py
e671129 verified
import gradio as gr
import cv2
import torch
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
import seaborn as sns
from facenet_pytorch import MTCNN
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from PIL import Image
import os
from collections import Counter
# Load models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(device=device)
model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device)
extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression")
# Emotion labels
affectnet_labels = {
0: "neutral", 1: "happy", 2: "sad", 3: "surprise", 4: "fear",
5: "disgust", 6: "anger", 7: "contempt"
}
def detect_emotions(frame):
"""Detects facial emotions in a given frame."""
img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
faces, _ = mtcnn.detect(img)
if faces is None or len(faces) == 0:
return "No Face Detected"
face = img.crop(faces[0])
inputs = extractor(images=face, return_tensors="pt").to(device)
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
return model.config.id2label[torch.argmax(probs).item()]
def process_video(input_path):
"""Processes video, overlays emotions, and creates a summary chart."""
cap = cv2.VideoCapture(input_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter("output_video.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
emotion_counts = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
emotion = detect_emotions(frame)
emotion_counts.append(emotion)
# Overlay emotion
overlay = frame.copy()
cv2.rectangle(overlay, (10, 10), (350, 80), (255, 255, 255), -1)
cv2.putText(overlay, f'Emotion: {emotion}', (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
out.write(frame)
cap.release()
out.release()
cv2.destroyAllWindows()
# Find major emotion
emotion_counter = Counter(emotion_counts)
major_emotion = emotion_counter.most_common(1)[0][0] if emotion_counter else "No Face Detected"
# Generate emotion distribution pie chart
plt.figure(figsize=(5, 5))
labels, sizes = zip(*emotion_counter.items())
plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('pastel'))
plt.title("Emotion Distribution")
plt.savefig("emotion_distribution.jpg")
return "output_video.mp4", plt, major_emotion
# Gradio Web Interface
with gr.Blocks(css="""
.gradio-container { max-width: 750px !important; margin: auto; background-color: #f8f9fa; padding: 20px; border-radius: 15px; }
.gradio-container h1 { font-size: 22px; text-align: center; color: #333; }
.gradio-container .gr-button { background-color: #007bff; color: white; border-radius: 10px; padding: 8px 15px; }
.gradio-container .gr-textbox { font-size: 16px; font-weight: bold; color: #007bff; }
.gradio-container .gr-file { border-radius: 10px; padding: 5px; }
@media screen and (max-width: 768px) {
.gradio-container { width: 100%; padding: 10px; }
.gradio-container h1 { font-size: 18px; }
}
""") as demo:
gr.Markdown("# 🎭 Emotion Analysis from Video πŸŽ₯")
gr.Markdown("Upload a video, and the AI will detect emotions in each frame, providing a processed video, an emotion distribution chart, and the major detected emotion.")
with gr.Row():
video_input = gr.File(label="πŸ“€ Upload Video (MP4, MOV, AVI)")
with gr.Row():
process_button = gr.Button("πŸš€ Analyze")
with gr.Row():
video_output = gr.File(label="πŸ“₯ Processed Video")
emotion_chart = gr.Plot(label="πŸ“Š Emotion Distribution Chart")
major_emotion_output = gr.Textbox(label="πŸ”₯ Major Emotion Detected", interactive=False)
process_button.click(fn=process_video, inputs=video_input, outputs=[video_output, emotion_chart, major_emotion_output])
demo.launch()