Face-Emotion / process_emotion_video.py
trttung1610's picture
Upload 4 files
04f9904
import os
import tempfile
import torch
import cv2
from PIL import Image
from torchvision import transforms
from facenet_pytorch import MTCNN
def process_emotion(input_video):
try:
# Load the pre-trained CNN model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 7) # 7 classes for emotions
# Load the pre-trained weights
model.eval()
# Define emotions
emotions = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
# Initialize MTCNN for face detection
mtcnn = MTCNN()
# Open the video capture
cap = cv2.VideoCapture(input_video.name)
if not cap.isOpened():
return "Error: Could not open video capture."
# Create a temporary directory to store the frames
temp_dir = tempfile.mkdtemp()
frame_paths = []
# Define image transformations
preprocess = transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Process each frame of the video
frame_count = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
if frame is None:
print("Error: Empty frame.")
continue
frame_count += 1
pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
face_boxes, _ = mtcnn.detect(pil_frame)
if face_boxes is None:
print(f"No face detected in frame {frame_count}.")
else:
for box in face_boxes:
x1, y1, x2, y2 = box.astype(int)
w = x2 - x1
h = y2 - y1
face_roi = frame[y1:y2, x1:x2]
pil_face = Image.fromarray(cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB))
input_tensor = preprocess(pil_face)
input_batch = input_tensor.unsqueeze(0)
with torch.no_grad():
output = model(input_batch)
predicted_emotion = output.argmax().item()
emotion_label = emotions[predicted_emotion]
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, emotion_label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
frame_path = os.path.join(temp_dir, f"{frame_count:04d}.png")
cv2.imwrite(frame_path, frame)
frame_paths.append(frame_path)
cap.release()
cv2.destroyAllWindows()
if not frame_paths:
return "No faces detected in the video."
# Convert the frames to a video
output_path = "output_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# Get dimensions from the first frame
sample_frame = cv2.imread(frame_paths[0])
if sample_frame is None:
return "Error: Unable to read sample frame."
frame_height, frame_width, _ = sample_frame.shape
out = cv2.VideoWriter(output_path, fourcc, 30, (frame_width, frame_height))
for frame_path in frame_paths:
frame = cv2.imread(frame_path)
if frame is not None:
out.write(frame)
os.remove(frame_path)
else:
print(f"Warning: Unable to read frame {frame_path}")
out.release()
return output_path
except Exception as e:
return f"An error occurred: {str(e)}"