File size: 1,753 Bytes
127e34a
 
 
 
 
 
 
 
 
97bbc07
127e34a
97bbc07
127e34a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
import torch
import clip
import torchvision
from utils.Roberta import RoBERTaClassifier
from utils.ImageOnly import Decoder4
from utils.CustomDataset import CustomDataset
from utils.test import test
from transformers import RobertaTokenizer
import spaces

@spaces.GPU
def get_emotions(image, text):
    tags = ['Excitement', 'Sadness', 'Amusement', 'Disgust', 'Awe', 'Contentment', 'Fear', 'Anger']
    max_len = 128
    input_dim = 768
    output_dim = 8
    print(image)

    test_transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize((224, 224)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

    test_emo = np.zeros((1, 8))
    text = [text]

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    test_dataset = CustomDataset(image, text, test_emo, tokenizer, max_len, test_transform)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False, num_workers=2)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-L/14", device=device)

    model2 = RoBERTaClassifier(num_labels=output_dim)
    decoder = Decoder4(input_dim, output_dim).to(device)
    model2.load_state_dict(torch.load('models/Roberta.pth', map_location=device))
    decoder.load_state_dict(torch.load('models/Custom.pth', map_location=device))
    decoder = decoder.to(device)

    y_pred = test(model, model2, decoder, device, test_loader)
    del model, model2, decoder, test_loader
    torch.cuda.empty_cache()
    pred = y_pred.flatten()

    return pred