File size: 2,516 Bytes
0d6cb4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from transformers import Pipeline
from transformers import AutoTokenizer
    
import numpy as np


def softmax(outputs):
    maxes = np.max(outputs, axis=-1, keepdims=True)
    shifted_exp = np.exp(outputs - maxes)
    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)



class HeritageDigitalAgePipeline(Pipeline):
    def _sanitize_parameters(self, **kwargs):
        preprocess_kwargs = {}
        if "expression1" in kwargs:
            preprocess_kwargs["caption"] = str(kwargs["caption"]).lower()
        if "expression2" in kwargs:
            preprocess_kwargs["title"] = str(kwargs["title"]).lower()
        return preprocess_kwargs, {}, {}

    def preprocess(self, inputs, maybe_arg=2):
        sep_token = "[SEP]"
        tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", use_fast=True)
        model_input = tokenizer.encode(inputs['caption'] + sep_token + inputs['title'], return_tensors='pt', add_special_tokens=True,  truncation=True)
        return {"model_input": model_input}

    def _forward(self, model_inputs):
        # model_inputs == {"model_input": model_input}
        return self.model(model_inputs['model_input'])
        
    def postprocess(self, model_outputs):
        logits = model_outputs.logits[0].numpy()
        probabilities = softmax(logits)

        best_class = np.argmax(probabilities)
        label = self.model.config.id2label[best_class]
        score = probabilities[best_class].item()
        logits = logits.tolist()

        return {"label": label, "score": score, "logits": logits}


class ExpressionRankingPipeline(Pipeline):
    def _sanitize_parameters(self, **kwargs):
        preprocess_kwargs = {}
        return preprocess_kwargs, {}, {}

    def preprocess(self, inputs, maybe_arg=2):
        tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", use_fast=True)
        model_input = tokenizer(inputs, truncation=True, padding="max_length", max_length=256, return_tensors="pt")

        return {"model_input": model_input}

    def _forward(self, model_inputs):
        return self.model(**model_inputs['model_input'])
        
    def postprocess(self, model_outputs):
        logits = model_outputs.logits[0].numpy()
        probabilities = softmax(logits)

        best_class = np.argmax(probabilities)
        label = self.model.config.id2label[best_class]
        score = probabilities[best_class].item()
        logits = logits.tolist()

        return {"label": label, "score": score, "logits": logits}