File size: 2,106 Bytes
4ccfc88
 
 
 
db81938
c87f530
 
 
134050f
b14dda0
c87f530
 
47e7826
c87f530
76ce779
c87f530
134050f
4ccfc88
 
 
 
134050f
aac6a6d
 
 
 
 
 
 
 
 
 
 
 
 
b14dda0
 
 
c87f530
 
 
47e7826
c87f530
 
 
 
 
 
1b35d13
c87f530
 
134050f
4ccfc88
 
 
 
 
 
aac6a6d
4ccfc88
b14dda0
 
 
 
 
4ccfc88
b14dda0
aac6a6d
 
 
134050f
8237488
72bf8aa
980ee56
c3b2098
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''from pathlib import Path
import shutil
import itertools
import os, cv2, numpy as np'''
import gradio as gr
import torch
from transformers import AutoModelForImageClassification
from optimum.pipelines import pipeline
from PIL import Image
import numpy as np
device = 1 if torch.cuda.is_available() else "cpu"

# chk_point = "kdhht2334/autotrain-diffusion-emotion-facial-expression-recognition-40429105176"

model = AutoModelForImageClassification.from_pretrained("./autotrain-diffusion-emotion-facial-expression-recognition-40429105176")

##Add face detector
from facenet_pytorch import MTCNN, InceptionResnetV1
mtcnn = MTCNN(image_size=300, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True)
resnet = InceptionResnetV1(pretrained='vggface2').eval()

emotion_dict = {
'neutral': '0',
'happy': '1',
'sad' :'2',
'surprise': '3',
'fear': '4',
'disgust': '5',
'angry': '6',
'uncertain': '7',
'nonface': '8',
}


output_img_size = (2100, 700)


try:
    pipe = pipeline(
        "image-classification",
        model,
        accelerator="bettertransformer",
        device=device,
    )
except NotImplementedError:
    from transformers import pipeline

    pipe = pipeline("image-classification", model, device=device)


def face_detector(input_img):
    img = Image.fromarray(input_img)
    bbox, _ = mtcnn.detect(img)
    bbox = bbox.squeeze().tolist()
    crop = img.crop(bbox)
    return crop

def predict(image):
    cropped_face = face_detector(image)
    face_w, face_h = cropped_face.size
    face_re_w = int(face_w * (700 / face_h))
    resized_face = cropped_face.resize((face_re_w, 700))
    output_img = Image.new("RGBA", output_img_size)
    output_img.paste(resized_face, (1050 - int(face_re_w/2), 0))
    predictions = pipe(cropped_face)
    return output_img, {p["label"]: p["score"] for p in predictions}

gr.Interface(
    predict,
    inputs=gr.inputs.Image(label="Upload image"),
    outputs=["image", "label"],
    examples=[["examples/happy.png"], ["examples/angry.png"], ["examples/surprise.png"]],
    title="Demo - DiffusionFER",
).launch()