Spaces:
Sleeping
Sleeping
import threading | |
import streamlit as st | |
import cv2 | |
import numpy as np | |
from transformers import pipeline | |
from PIL import Image, ImageDraw | |
from mtcnn import MTCNN | |
from streamlit_webrtc import webrtc_streamer | |
import logging | |
# Suppress transformers progress bars | |
logging.getLogger("transformers").setLevel(logging.ERROR) | |
lock = threading.Lock() | |
img_container = {"webcam": None, | |
"analyzed": None} | |
# Initialize the Hugging Face pipeline for facial emotion detection | |
emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression") | |
# Initialize MTCNN for face detection | |
mtcnn = MTCNN() | |
# Function to analyze sentiment | |
def analyze_sentiment(face): | |
# Convert face to RGB | |
rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) | |
# Convert the face to a PIL image | |
pil_image = Image.fromarray(rgb_face) | |
# Analyze sentiment using the Hugging Face pipeline | |
results = emotion_pipeline(pil_image) | |
# Get the dominant emotion | |
dominant_emotion = max(results, key=lambda x: x['score'])['label'] | |
return dominant_emotion | |
TEXT_SIZE = 3 | |
# Function to detect faces, analyze sentiment, and draw a red box around them | |
def detect_and_draw_faces(frame): | |
# Detect faces using MTCNN | |
results = mtcnn.detect_faces(frame) | |
# Draw on the frame | |
for result in results: | |
x, y, w, h = result['box'] | |
face = frame[y:y+h, x:x+w] | |
sentiment = analyze_sentiment(face) | |
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 10) # Thicker red box | |
# Calculate position for the text background and the text itself | |
text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0] | |
text_x = x | |
text_y = y - 10 | |
background_tl = (text_x, text_y - text_size[1]) | |
background_br = (text_x + text_size[0], text_y + 5) | |
# Draw black rectangle as background | |
cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED) | |
# Draw white text on top | |
cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2) | |
return frame | |
# Streamlit UI | |
st.markdown( | |
""" | |
<style> | |
.main { | |
background-color: #FFFFFF; | |
} | |
.reportview-container .main .block-container{ | |
padding-top: 2rem; | |
} | |
h1 { | |
color: #E60012; | |
font-family: 'Arial Black', Gadget, sans-serif; | |
} | |
h2 { | |
color: #E60012; | |
font-family: 'Arial', sans-serif; | |
} | |
h3 { | |
color: #333333; | |
font-family: 'Arial', sans-serif; | |
} | |
.stButton button { | |
background-color: #E60012; | |
color: white; | |
border-radius: 5px; | |
font-size: 16px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.title("Computer Vision Test Lab") | |
st.subheader("Facial Sentiment") | |
# Columns for input and output streams | |
col1, col2 = st.columns(2) | |
with col1: | |
st.header("Input Stream") | |
st.subheader("Webcam") | |
video_placeholder = st.empty() | |
with col2: | |
st.header("Output Stream") | |
st.subheader("Analysis") | |
output_placeholder = st.empty() | |
sentiment_placeholder = st.empty() | |
def video_frame_callback(frame): | |
try: | |
with lock: | |
img = frame.to_ndarray(format="bgr24") | |
img_container["webcam"] = img | |
frame_with_boxes = detect_and_draw_faces(img) | |
img_container["analyzed"] = frame_with_boxes | |
except Exception as e: | |
st.error(f"Error processing frame: {e}") | |
return frame | |
ctx = webrtc_streamer(key="webcam", video_frame_callback=video_frame_callback) | |
while ctx.state.playing: | |
with lock: | |
print(img_container) | |
img = img_container["webcam"] | |
frame_with_boxes = img_container["analyzed"] | |
if img is None: | |
continue | |
video_placeholder.image(img, channels="BGR") | |
output_placeholder.image(frame_with_boxes, channels="BGR") | |