j-hartmann commited on
Commit
a0ffc1e
1 Parent(s): ef41482

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -1
app.py CHANGED
@@ -8,4 +8,85 @@ import itertools
8
  import numpy as np
9
  from numpy import dot
10
  from numpy.linalg import norm
11
- #from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import numpy as np
9
  from numpy import dot
10
  from numpy.linalg import norm
11
+ #from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
12
+
13
+ # compute dot product of inputs
14
+ # summary function - test for single gradio function interfrace
15
+ def gr_cosine_similarity(sentence1, sentence2):
16
+ # Create class for data preparation
17
+ class SimpleDataset:
18
+ def __init__(self, tokenized_texts):
19
+ self.tokenized_texts = tokenized_texts
20
+
21
+ def __len__(self):
22
+ return len(self.tokenized_texts["input_ids"])
23
+
24
+ def __getitem__(self, idx):
25
+ return {k: v[idx] for k, v in self.tokenized_texts.items()}
26
+
27
+ # load tokenizer and model, create trainer
28
+ model_name = "j-hartmann/emotion-english-distilroberta-base"
29
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
30
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
31
+ trainer = Trainer(model=model)
32
+
33
+ # sentences in list
34
+ lines_s = [sentence1, sentence2]
35
+ print(type(sentence1), type(sentence2))
36
+ print(sentence1, sentence2)
37
+ print(lines_s)
38
+
39
+ # Tokenize texts and create prediction data set
40
+ tokenized_texts = tokenizer(lines_s, truncation=True, padding=True)
41
+ pred_dataset = SimpleDataset(tokenized_texts)
42
+
43
+ # Run predictions -> predict whole df
44
+ predictions = trainer.predict(pred_dataset)
45
+
46
+ # Transform predictions to labels
47
+ preds = predictions.predictions.argmax(-1)
48
+ labels = pd.Series(preds).map(model.config.id2label)
49
+ scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
50
+ # scores raw
51
+ temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1, keepdims=True)).tolist()
52
+
53
+
54
+ # work in progress
55
+ # container
56
+ anger = []
57
+ disgust = []
58
+ fear = []
59
+ joy = []
60
+ neutral = []
61
+ sadness = []
62
+ surprise = []
63
+
64
+ print(temp)
65
+ # extract scores (as many entries as exist in pred_texts)
66
+ for i in range(len(lines_s)):
67
+ anger.append(temp[i][0])
68
+ disgust.append(temp[i][1])
69
+ fear.append(temp[i][2])
70
+ joy.append(temp[i][3])
71
+ neutral.append(temp[i][4])
72
+ sadness.append(temp[i][5])
73
+ surprise.append(temp[i][6])
74
+
75
+ # define both vectors for the dot product
76
+ # each include all values for both predictions
77
+ v1 = temp[0]
78
+ v2 = temp[1]
79
+ print(type(v1), type(v2))
80
+ # compute dot product of all
81
+ dot_product = dot(v1, v2)
82
+
83
+ # define df
84
+ df = pd.DataFrame(list(zip(lines_s,labels, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','label', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])
85
+
86
+ # compute cosine similarity
87
+ # is dot product of vectors n / norms 1*..*n vectors
88
+ cosine_similarity = dot_product / (norm(v1) * norm(v2))
89
+
90
+
91
+ # return dataframe for space output
92
+ return df, cosine_similarity