sepehr commited on
Commit
0307f7f
1 Parent(s): 54f9d37

Create annotator.py

Browse files
Files changed (1) hide show
  1. annotator.py +96 -0
annotator.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import transformers
3
+ from sklearn import metrics
4
+ import pandas as pd
5
+ import streamlit as st
6
+ def ignitor_load():
7
+ dataj=pd.read_json('tinyignitorfile.json')
8
+ return dataj
9
+
10
+ def appendor(thex):
11
+ gaa=ignitor_load()
12
+ shortt=gaa.loc[:21,['text','index']]
13
+ shortt.loc[21,'text']=thex
14
+ return shortt
15
+
16
+
17
+
18
+ tokenizerr = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
19
+ modell = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased')
20
+
21
+
22
+ encod=[]
23
+ def allll(df):
24
+ for i in range(len(df)):
25
+ v=df.loc[i,'text']
26
+ embed=np.array(np.array(modell(np.array(tokenizerr.encode(v))[np.newaxis,:])[0][0][1:-1]).mean(0))
27
+ encod.append(embed)
28
+ #allll(shortt)
29
+
30
+
31
+
32
+ labs = {}
33
+
34
+ labs["SALARY"] = ['underpay','underpaid','overpay','overpaying','payments','wage','payroll','pay','paycheck']
35
+ labs["COLLEAGUES"] = ['colleague','employee','staff' ,'coworker','co-worker','colleagues']
36
+ labs["SUPERVISION"] = ['boss','supervisors','manager','supervisor']
37
+ labs["TIMEDAY"] = ['monday','weekday','day','weekend']
38
+ labs["TIMEDAYNOMONDAY"] = ['weekday','day','weekend']
39
+ emblabs={}
40
+ emblabss=[]
41
+ keyy=[]
42
+ for key,v in labs.items():
43
+ keyy.append(key)
44
+ embed=np.array(np.array(modell(np.array(tokenizerr.encode(v))[np.newaxis,:])[0][0][1:-1]).mean(0))
45
+ emblabss.append(embed)
46
+
47
+ for i in range(len(keyy)):
48
+ emblabs[keyy[i]] = emblabss[i]
49
+
50
+ hamme=[]
51
+ for a,z in emblabs.items():
52
+ jj=z.reshape(-1, 1)
53
+ hamme.append(jj)
54
+
55
+
56
+ sim=[]
57
+ for i in range(len(hamme)):
58
+ zz=metrics.pairwise.cosine_similarity(encod, hamme[i].T)
59
+ sim.append(zz)
60
+
61
+ sim=np.array(sim)
62
+ cyr1=st.secrets["cyr1"]
63
+ cyr1=float(cyr1)
64
+ cyr2=st.secrets["cyr2"]
65
+ cyr2=float(cyr2)
66
+ cyr3=st.secrets["cyr3"]
67
+ cyr3=float(cyr3)
68
+ cyr4=st.secrets["cyr4"]
69
+ cyr4=float(cyr4)
70
+ cyr5=st.secrets["cyr5"]
71
+ cyr5=float(cyr5)
72
+
73
+ referirv=[cyr1,cyr2,cyr3,cyr4,cyr5]
74
+ meanss=[]
75
+ labels = list(emblabs.keys())
76
+ for i in range(len(sim)):
77
+ sim[i] = sim[i] / sum(sim[i])
78
+ meanss.append(sim[i].mean())
79
+ zarayeb = [ii / jj for ii, jj in zip(referirv, meanss)]
80
+
81
+ for i in range(len(sim)):
82
+
83
+ sim[i] = (sim[i])*zarayeb[i]
84
+
85
+ threshhold=st.secrets["threshhold"]
86
+ threshhold=float(threshhold)
87
+ tags=[]
88
+ for j in range(len(sim[0])):
89
+ if np.amax([sim[:,j,0]]) <= threshhold:
90
+ label='None'
91
+ tags.append(label)
92
+ else:
93
+ label=np.argmax([sim[:,j,0]])
94
+ tags.append(label)
95
+
96
+ return tags[-1]