Spaces:
Running
Running
Create sentimentorr.py
Browse files- sentimentorr.py +76 -0
sentimentorr.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
+
from torch.nn.functional import softmax as softmax
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
|
10 |
+
|
11 |
+
model = AutoModelForSequenceClassification.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
|
12 |
+
|
13 |
+
labels = ["admiration","amusement","anger","annoyance","approval","caring","confusion","curiosity","desire","disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
|
14 |
+
]
|
15 |
+
|
16 |
+
labels7larg=['admiration',
|
17 |
+
'amusement',
|
18 |
+
'anger',
|
19 |
+
'annoyance',
|
20 |
+
'approval',
|
21 |
+
'caring',
|
22 |
+
'confusion',
|
23 |
+
'curiosity',
|
24 |
+
'desire',
|
25 |
+
'disappointment',
|
26 |
+
'disapproval',
|
27 |
+
'disgust',
|
28 |
+
'embarrassment',
|
29 |
+
'excitement',
|
30 |
+
'fear',
|
31 |
+
'gratitude',
|
32 |
+
'grief',
|
33 |
+
'joy',
|
34 |
+
'love',
|
35 |
+
'nervousness',
|
36 |
+
'optimism',
|
37 |
+
'pride',
|
38 |
+
'realization',
|
39 |
+
'relief',
|
40 |
+
'remorse',
|
41 |
+
'sadness',
|
42 |
+
'surprise',
|
43 |
+
'neutral',
|
44 |
+
'larg']
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
def sentimentor(mmm):
|
49 |
+
|
50 |
+
dfda = pd.Series(mmm)
|
51 |
+
|
52 |
+
vecs = np.array([model(**tokenizer(txt, return_tensors="pt", padding=True))[0][0].detach().numpy() for txt in dfda])
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
ds=pd.DataFrame(columns=labels7larg)
|
57 |
+
for iii in range(len(vecs)):
|
58 |
+
softt=softmax(torch.from_numpy(vecs[iii]), dim=0)
|
59 |
+
kki=pd.DataFrame(softt, index=labels)
|
60 |
+
ji=kki.nlargest(1, 0)
|
61 |
+
dv=pd.DataFrame(columns=labels)
|
62 |
+
|
63 |
+
dv.loc[iii,:]=softt
|
64 |
+
|
65 |
+
dv.loc[iii,'larg']=ji.index[0]
|
66 |
+
|
67 |
+
ds=pd.concat([ds, dv])
|
68 |
+
|
69 |
+
if len(ds)==len(dfda):
|
70 |
+
concatt = pd.concat([dfda,ds], axis=1)
|
71 |
+
xsd=concatt['larg']
|
72 |
+
return xsd[0]
|
73 |
+
else:
|
74 |
+
print('eshteba kardi')
|
75 |
+
|
76 |
+
|