File size: 4,300 Bytes
e671129
b337ab5
 
 
e671129
b337ab5
 
 
e671129
b337ab5
 
e671129
b337ab5
e671129
b337ab5
 
e671129
 
21cc2ce
e671129
 
 
 
 
b337ab5
 
 
 
 
 
e671129
b337ab5
e671129
 
 
 
b337ab5
e671129
b337ab5
e671129
 
 
b337ab5
 
e671129
b337ab5
e671129
b337ab5
 
 
 
 
 
e671129
 
 
 
 
 
 
 
 
b337ab5
e671129
b337ab5
 
e671129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b337ab5
e671129
 
b337ab5
e671129
 
 
b337ab5
e671129
 
 
b337ab5
e671129
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import cv2
import torch
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
import seaborn as sns
from facenet_pytorch import MTCNN
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from PIL import Image
import os
from collections import Counter

# Load models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(device=device)
model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device)
extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression")

# Emotion labels
affectnet_labels = {
    0: "neutral", 1: "happy", 2: "sad", 3: "surprise", 4: "fear",
    5: "disgust", 6: "anger", 7: "contempt"
}

def detect_emotions(frame):
    """Detects facial emotions in a given frame."""
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    faces, _ = mtcnn.detect(img)
    if faces is None or len(faces) == 0:
        return "No Face Detected"
    
    face = img.crop(faces[0])
    inputs = extractor(images=face, return_tensors="pt").to(device)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    return model.config.id2label[torch.argmax(probs).item()]

def process_video(input_path):
    """Processes video, overlays emotions, and creates a summary chart."""
    cap = cv2.VideoCapture(input_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter("output_video.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    emotion_counts = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        emotion = detect_emotions(frame)
        emotion_counts.append(emotion)

        # Overlay emotion
        overlay = frame.copy()
        cv2.rectangle(overlay, (10, 10), (350, 80), (255, 255, 255), -1)
        cv2.putText(overlay, f'Emotion: {emotion}', (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)

        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # Find major emotion
    emotion_counter = Counter(emotion_counts)
    major_emotion = emotion_counter.most_common(1)[0][0] if emotion_counter else "No Face Detected"

    # Generate emotion distribution pie chart
    plt.figure(figsize=(5, 5))
    labels, sizes = zip(*emotion_counter.items())
    plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('pastel'))
    plt.title("Emotion Distribution")
    plt.savefig("emotion_distribution.jpg")
    
    return "output_video.mp4", plt, major_emotion

# Gradio Web Interface
with gr.Blocks(css="""
    .gradio-container { max-width: 750px !important; margin: auto; background-color: #f8f9fa; padding: 20px; border-radius: 15px; }
    .gradio-container h1 { font-size: 22px; text-align: center; color: #333; }
    .gradio-container .gr-button { background-color: #007bff; color: white; border-radius: 10px; padding: 8px 15px; }
    .gradio-container .gr-textbox { font-size: 16px; font-weight: bold; color: #007bff; }
    .gradio-container .gr-file { border-radius: 10px; padding: 5px; }
    @media screen and (max-width: 768px) {
        .gradio-container { width: 100%; padding: 10px; }
        .gradio-container h1 { font-size: 18px; }
    }
""") as demo:
    gr.Markdown("# 🎭 Emotion Analysis from Video πŸŽ₯")
    gr.Markdown("Upload a video, and the AI will detect emotions in each frame, providing a processed video, an emotion distribution chart, and the major detected emotion.")

    with gr.Row():
        video_input = gr.File(label="πŸ“€ Upload Video (MP4, MOV, AVI)")
    
    with gr.Row():
        process_button = gr.Button("πŸš€ Analyze")

    with gr.Row():
        video_output = gr.File(label="πŸ“₯ Processed Video")
        emotion_chart = gr.Plot(label="πŸ“Š Emotion Distribution Chart")
    
    major_emotion_output = gr.Textbox(label="πŸ”₯ Major Emotion Detected", interactive=False)

    process_button.click(fn=process_video, inputs=video_input, outputs=[video_output, emotion_chart, major_emotion_output])

demo.launch()