fer_app / app.py
Pasit's picture
correct orientation
16182c1
raw
history blame
4.35 kB
import os
import gradio as gr
import numpy as np
import cv2
from PIL import Image, ImageOps
from insightface.app import FaceAnalysis
from hsemotion_onnx.facial_emotions import HSEmotionRecognizer
def exif_transpose(img):
if hasattr(ImageOps, 'exif_transpose'):
# Very recent versions of PIL can do exit transpose internally
return ImageOps.exif_transpose(img)
exif_orientation_tag = 274
# Check for EXIF data (only present on some files)
if hasattr(img, "_getexif") and isinstance(img._getexif(), dict) and exif_orientation_tag in img._getexif():
exif_data = img._getexif()
orientation = exif_data[exif_orientation_tag]
# Handle EXIF Orientation
if orientation == 1:
# Normal image - nothing to do!
pass
elif orientation == 2:
# Mirrored left to right
img = img.transpose(Image.FLIP_LEFT_RIGHT)
elif orientation == 3:
# Rotated 180 degrees
img = img.rotate(180)
elif orientation == 4:
# Mirrored top to bottom
img = img.rotate(180).transpose(Image.FLIP_LEFT_RIGHT)
elif orientation == 5:
# Mirrored along top-left diagonal
img = img.rotate(-90, expand=True).transpose(Image.FLIP_LEFT_RIGHT)
elif orientation == 6:
# Rotated 90 degrees
img = img.rotate(-90, expand=True)
elif orientation == 7:
# Mirrored along top-right diagonal
img = img.rotate(90, expand=True).transpose(Image.FLIP_LEFT_RIGHT)
elif orientation == 8:
# Rotated 270 degrees
img = img.rotate(90, expand=True)
return img
def resize(image, target_size):
# Get the dimensions of the input image
width, height = image.size
# Calculate the scaling factor needed to resize the image to the target size
scaling_factor = min(target_size[0] / width, target_size[1] / height)
target_height = int(scaling_factor * height)
target_width = int(scaling_factor * width)
# Resize the image
resized_image = image.resize((target_width, target_height), resample=Image.NEAREST)
return resized_image
def facial_emotion_recognition(img):
img = np.asarray(resize(exif_transpose(img), target_size))
faces = face_detector.get(img)
if len(faces) > 0:
highest_score_box = (0, 0, 0, 0) # x, y, w, h
highest_score = 0
for face in faces:
if face['det_score'] > highest_score:
highest_score = face['det_score']
x1, y1, x2, y2 = face['bbox'].astype(int)
x_margin = int((x2 - x1) * face_margin)
y_margin = int((y2 - y1) * face_margin)
x = max(0, x1 - x_margin)
y = max(0, y1 - y_margin)
w = min(x2 + x_margin, img.shape[1]) - x
h = min(y2 + y_margin, img.shape[0]) - y
highest_score_box = (x, y, w, h)
x, y, w, h = highest_score_box
emotion, _ = hse_emo_model.predict_emotions(img[y:y+h, x:x+w], logits=True)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(img, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
return img
face_margin = 0.1
target_size = (640, 640) # w, h
model_name = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'buffalo_sc')
face_detector = FaceAnalysis(name=model_name, allowed_modules=['detection'], providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
face_detector.prepare(ctx_id=0, det_size=(640, 640))
hse_emo_model = HSEmotionRecognizer(model_name='enet_b0_8_best_vgaf')
webcam = gr.Image(type='pil', source='webcam', label='Input Image')
webcam_output = gr.Image(image_mode='RGB', type='numpy', label='Output Image')
webcam_interface = gr.Interface(facial_emotion_recognition, inputs=webcam, outputs=webcam_output)
upload = gr.Image(type='pil', source='upload', label='Input Image')
upload_output = gr.Image(image_mode='RGB', type='numpy', label='Output Image')
upload_interface = gr.Interface(facial_emotion_recognition, inputs=upload, outputs=upload_output, examples='examples')
demo = gr.TabbedInterface(interface_list=[upload_interface, webcam_interface], tab_names=['Upload', 'Webcam'])
demo.launch()