DemoEmotions / utils /Emotions.py
karpurna2's picture
updated with spaces
97bbc07
raw
history blame
1.75 kB
import numpy as np
import torch
import clip
import torchvision
from utils.Roberta import RoBERTaClassifier
from utils.ImageOnly import Decoder4
from utils.CustomDataset import CustomDataset
from utils.test import test
from transformers import RobertaTokenizer
import spaces
@spaces.GPU
def get_emotions(image, text):
tags = ['Excitement', 'Sadness', 'Amusement', 'Disgust', 'Awe', 'Contentment', 'Fear', 'Anger']
max_len = 128
input_dim = 768
output_dim = 8
print(image)
test_transform = torchvision.transforms.Compose([
torchvision.transforms.Resize((224, 224)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
test_emo = np.zeros((1, 8))
text = [text]
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
test_dataset = CustomDataset(image, text, test_emo, tokenizer, max_len, test_transform)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=1,
shuffle=False, num_workers=2)
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14", device=device)
model2 = RoBERTaClassifier(num_labels=output_dim)
decoder = Decoder4(input_dim, output_dim).to(device)
model2.load_state_dict(torch.load('models/Roberta.pth', map_location=device))
decoder.load_state_dict(torch.load('models/Custom.pth', map_location=device))
decoder = decoder.to(device)
y_pred = test(model, model2, decoder, device, test_loader)
del model, model2, decoder, test_loader
torch.cuda.empty_cache()
pred = y_pred.flatten()
return pred