Spaces:
Sleeping
Sleeping
initial commit
Browse files- .gitattributes +3 -0
- anger.png +0 -0
- app.py +15 -0
- bratt.jpg +0 -0
- celebrity_custom_model +3 -0
- celebrity_resnet_model +3 -0
- emotion_resnet_model +3 -0
- joha.jpg +0 -0
- model.py +96 -0
- predict.py +46 -0
- utils.py +22 -0
.gitattributes
CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
celebrity_custom_model filter=lfs diff=lfs merge=lfs -text
|
36 |
+
celebrity_resnet_model filter=lfs diff=lfs merge=lfs -text
|
37 |
+
emotion_resnet_model filter=lfs diff=lfs merge=lfs -text
|
anger.png
ADDED
![]() |
app.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from predict import predict_one_image
|
3 |
+
|
4 |
+
input_image = [
|
5 |
+
gr.components.Image(type='filepath',label='Input Image')
|
6 |
+
]
|
7 |
+
examples = ['joha.jpg','anger.png','bratt.jpg']
|
8 |
+
gr.Interface(
|
9 |
+
fn=predict_one_image,
|
10 |
+
inputs=input_image,
|
11 |
+
outputs='text',
|
12 |
+
title="CELEBRITY & EMOTION RECOGNITION APP",
|
13 |
+
examples=examples,
|
14 |
+
cache_examples=False,
|
15 |
+
).launch()
|
bratt.jpg
ADDED
![]() |
celebrity_custom_model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:064745679f779aeb31206e7b7190077367c89dfa97145bc452ab9fb1aafbc1b9
|
3 |
+
size 47943319
|
celebrity_resnet_model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56b67d6619d01cc4fcf6b5639ae59003c3cb878c0e7acaa5a7d95791b07bd154
|
3 |
+
size 103103793
|
emotion_resnet_model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18f37453beaccf41597f85730b803d009d3b109c02d1ccd2144906b01c030e4e
|
3 |
+
size 103103793
|
joha.jpg
ADDED
![]() |
model.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
N_EMOTIONS = 8
|
5 |
+
N_CELEBRITIES = 17
|
6 |
+
class CustomModel(nn.Module) :
|
7 |
+
def __init__(self,mode = 'emotion') :
|
8 |
+
super().__init__()
|
9 |
+
self.mode = mode
|
10 |
+
|
11 |
+
self.backbone = nn.Sequential(
|
12 |
+
#3x224x224
|
13 |
+
nn.Conv2d(3, 64, kernel_size=3, stride=1, bias=False),
|
14 |
+
nn.BatchNorm2d(64),
|
15 |
+
nn.LeakyReLU(0.2, inplace=True),
|
16 |
+
# out: 64 x 222 x 222
|
17 |
+
|
18 |
+
nn.Conv2d(64, 32, kernel_size=3, stride=1, bias=False),
|
19 |
+
nn.BatchNorm2d(32),
|
20 |
+
nn.LeakyReLU(0.2, inplace=True),
|
21 |
+
nn.MaxPool2d(kernel_size=2),
|
22 |
+
nn.Dropout(0.2),
|
23 |
+
# out: 32 x 110 x 110
|
24 |
+
|
25 |
+
nn.Conv2d(32, 32, kernel_size=3, stride=1, bias=False),
|
26 |
+
nn.BatchNorm2d(32),
|
27 |
+
nn.LeakyReLU(0.2, inplace=True),
|
28 |
+
nn.MaxPool2d(kernel_size=2),
|
29 |
+
nn.Dropout(0.3),
|
30 |
+
# out: 32 x 54 x 54
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
nn.Flatten(),
|
36 |
+
)
|
37 |
+
self.in_features = 32*54*54
|
38 |
+
self.neck = nn.Sequential(
|
39 |
+
nn.Linear(self.in_features,128),
|
40 |
+
nn.ReLU(),
|
41 |
+
nn.Linear(128,64),
|
42 |
+
nn.ReLU()
|
43 |
+
)
|
44 |
+
self.emotion_classifier = nn.Linear(64,N_EMOTIONS)
|
45 |
+
self.celebrity_classifier = nn.Linear(64,N_CELEBRITIES)
|
46 |
+
|
47 |
+
def forward(self,image) :
|
48 |
+
features = self.backbone(image)
|
49 |
+
features = self.neck(features)
|
50 |
+
if self.mode=='emotion' :
|
51 |
+
emotion_logits = self.emotion_classifier(features)
|
52 |
+
return emotion_logits
|
53 |
+
elif self.mode=='celebrity' :
|
54 |
+
celebrity_logits = self.celebrity_classifier(features)
|
55 |
+
return celebrity_logits
|
56 |
+
else :
|
57 |
+
emotion_logits = self.emotion_classifier(features)
|
58 |
+
celebrity_logits = self.celebrity_classifier(features)
|
59 |
+
return emotion_logits,celebrity_logits
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
import torchvision.models as models
|
64 |
+
class ResNet50Model(nn.Module) :
|
65 |
+
def __init__(self,mode = 'emotion') :
|
66 |
+
super().__init__()
|
67 |
+
self.mode = mode
|
68 |
+
|
69 |
+
self.backbone = getattr(models, 'resnet50')(False)
|
70 |
+
self.in_features = 1000
|
71 |
+
self.neck = nn.Sequential(
|
72 |
+
nn.Linear(self.in_features,128),
|
73 |
+
nn.ReLU(),
|
74 |
+
nn.Linear(128,64),
|
75 |
+
nn.ReLU()
|
76 |
+
)
|
77 |
+
self.emotion_classifier = nn.Linear(64,N_EMOTIONS)
|
78 |
+
self.celebrity_classifier = nn.Linear(64,N_CELEBRITIES)
|
79 |
+
|
80 |
+
def forward(self,image) :
|
81 |
+
features = self.backbone(image)
|
82 |
+
features = self.neck(features)
|
83 |
+
if self.mode=='emotion' :
|
84 |
+
emotion_logits = self.emotion_classifier(features)
|
85 |
+
return emotion_logits
|
86 |
+
elif self.mode=='celebrity' :
|
87 |
+
celebrity_logits = self.celebrity_classifier(features)
|
88 |
+
return celebrity_logits
|
89 |
+
else :
|
90 |
+
emotion_logits = self.emotion_classifier(features)
|
91 |
+
celebrity_logits = self.celebrity_classifier(features)
|
92 |
+
return emotion_logits,celebrity_logits
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
predict.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from utils import read_image,get_valid_augs
|
3 |
+
import torch
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from model import ResNet50Model
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
CKPT_EMOTION = 'emotion_resnet_model'
|
9 |
+
CKPT_CELEBRITY = 'celebrity_resnet_model'
|
10 |
+
FaceInverseTargetMapper = {0: 'Tom Hanks',1: 'Sandra Bullock',2: 'Natalie Portman',3: 'Scarlett Johansson',4: 'Robert Downey Jr',5: 'Nicole Kidman',6: 'Brad Pitt',
|
11 |
+
7: 'Hugh Jackman',8: 'Tom Cruise',9: 'Leonardo DiCaprio',10: 'Megan Fox',11: 'Johnny Depp',12: 'Will Smith',13: 'Denzel Washington',14: 'Jennifer Lawrence',15: 'Kate Winslet',16: 'Angelina Jolie'}
|
12 |
+
EmotionMapper = {0: 'sadness',1: 'contempt',2: 'happiness',3: 'surprise',
|
13 |
+
4: 'fear',5: 'anger',6: 'disgust',7: 'neutrality'}
|
14 |
+
def predict_one_image(path) :
|
15 |
+
image = read_image(path)
|
16 |
+
image = get_valid_augs()(image=image)['image']
|
17 |
+
image = torch.tensor(image,dtype=torch.float)
|
18 |
+
image = image.reshape((1,3,224,224))
|
19 |
+
emotion_model = ResNet50Model('emotion')
|
20 |
+
#loading ckpt
|
21 |
+
emotion_model.load_state_dict(torch.load(CKPT_EMOTION,map_location=torch.device('cpu')))
|
22 |
+
|
23 |
+
celebrity_model = ResNet50Model('celebrity')
|
24 |
+
#loading ckpt
|
25 |
+
celebrity_model.load_state_dict(torch.load(CKPT_CELEBRITY,map_location=torch.device('cpu')))
|
26 |
+
|
27 |
+
|
28 |
+
with torch.no_grad() :
|
29 |
+
#emotion
|
30 |
+
outputs = emotion_model(image)
|
31 |
+
outputs = torch.nn.functional.softmax(outputs).cpu().detach().numpy()
|
32 |
+
print(outputs.shape)
|
33 |
+
emotion = np.argmax(outputs,axis=1)[0]
|
34 |
+
emotion_proba = np.max(outputs,axis=1)[0]
|
35 |
+
print(emotion_proba)
|
36 |
+
#celebrity
|
37 |
+
outputs = celebrity_model(image)
|
38 |
+
outputs = torch.nn.functional.softmax(outputs).cpu().detach().numpy()
|
39 |
+
print(outputs.shape)
|
40 |
+
celebrity = np.argmax(outputs,axis=1)[0]
|
41 |
+
celebrity_proba = np.max(outputs,axis=1)[0]
|
42 |
+
print(celebrity_proba)
|
43 |
+
|
44 |
+
if celebrity_proba<0.45 :
|
45 |
+
return f"Unkonwn Person Detected with emotion {EmotionMapper[emotion]} "
|
46 |
+
return f"Detected {FaceInverseTargetMapper[celebrity]} with emotion {EmotionMapper[emotion]} "
|
utils.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import albumentations as A
|
3 |
+
from albumentations.pytorch import ToTensorV2
|
4 |
+
|
5 |
+
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
6 |
+
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
7 |
+
|
8 |
+
def read_image(path) :
|
9 |
+
img = cv2.imread(path)
|
10 |
+
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
|
11 |
+
return img
|
12 |
+
|
13 |
+
def get_valid_augs() :
|
14 |
+
return A.Compose([
|
15 |
+
A.Resize(height=224, width=224, always_apply=True, p=1),
|
16 |
+
A.Normalize(
|
17 |
+
mean = IMAGENET_DEFAULT_MEAN,
|
18 |
+
std = IMAGENET_DEFAULT_STD,
|
19 |
+
max_pixel_value=255
|
20 |
+
),
|
21 |
+
ToTensorV2(),
|
22 |
+
])
|