Create utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer,TFAutoModelForMaskedLM,TFAutoModelForSequenceClassification
|
2 |
+
import tensorflow as tf
|
3 |
+
import numpy as np
|
4 |
+
def get_model(model):
|
5 |
+
DROP_OUT = 0.1
|
6 |
+
input_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='input_ids')
|
7 |
+
token_type_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='token_type_ids')
|
8 |
+
attention_mask = tf.keras.Input(shape=(None, ), dtype='int32',name='attention_mask')
|
9 |
+
|
10 |
+
transformer = model(input_ids,attention_mask, token_type_ids,output_hidden_states=True)
|
11 |
+
cls = transformer.hidden_states[-1][:,0,:]
|
12 |
+
drop_out = tf.keras.layers.Dropout(DROP_OUT)(cls)
|
13 |
+
|
14 |
+
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment')(drop_out)
|
15 |
+
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment2')(sentiment)
|
16 |
+
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment3')(sentiment)
|
17 |
+
sentiment = tf.keras.layers.Dense(4,activation='softmax',name='sentiment_out')(sentiment)
|
18 |
+
|
19 |
+
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act')(drop_out)
|
20 |
+
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act2')(speech_act)
|
21 |
+
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act3')(speech_act)
|
22 |
+
speech_act = tf.keras.layers.Dense(6,activation='softmax',name='speech_act_out')(speech_act)
|
23 |
+
|
24 |
+
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm')(drop_out)
|
25 |
+
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm2')(sarcasm)
|
26 |
+
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm3')(sarcasm)
|
27 |
+
sarcasm = tf.keras.layers.Dense(2,activation='softmax',name='sarcasm_out')(sarcasm)
|
28 |
+
|
29 |
+
output = tf.keras.layers.Concatenate(axis = -1)([sentiment,speech_act,sarcasm])
|
30 |
+
|
31 |
+
Fmodel = tf.keras.Model(inputs=[input_ids,token_type_ids, attention_mask], outputs=output)
|
32 |
+
Fmodel.load_weights('model_cls_weights.h5')
|
33 |
+
return Fmodel
|
34 |
+
|
35 |
+
def classify(texts,model,tokenizer):
|
36 |
+
speech_act_decoder = {0:'Expression',1:'Assertion',2:'Question',3:'Recommendation',4:'Request',5:'Miscellaneous'}
|
37 |
+
sentiment_decoder = {0:'Positive',1:'Neutral',2:'Negative',3:'Mixed'}
|
38 |
+
sarcasm_decoder = {0:'Not Sarcastic',1:'Sarcastic'}
|
39 |
+
tokens = tokenizer(texts,return_tensors='tf',padding=True)
|
40 |
+
preds = model.predict(list(dict(tokens).values()),verbose=0)
|
41 |
+
|
42 |
+
sen_func = np.vectorize(lambda x : sentiment_decoder[x])
|
43 |
+
sentiment = sen_func(preds[:,0:4].argmax(axis=1))
|
44 |
+
|
45 |
+
sar_func = np.vectorize(lambda x : sarcasm_decoder[x])
|
46 |
+
sarcasm = sar_func(preds[:,10:].argmax(axis=1))
|
47 |
+
|
48 |
+
speech_func = np.vectorize(lambda x: speech_act_decoder[x])
|
49 |
+
speech_act = speech_func(preds[:,4:10].argmax(axis=1))
|
50 |
+
res = []
|
51 |
+
if isinstance(texts,list):
|
52 |
+
for i in range(len(texts)):
|
53 |
+
res.append({"text":texts[i],"sentiment":sentiment[i],"speech_act":speech_act[i],"sarcasm":sarcasm[i]})
|
54 |
+
return res
|
55 |
+
else:
|
56 |
+
res.append({"text":texts,"sentiment":sentiment[0],"speech_act":speech_act[0],"sarcasm":sarcasm[0]})
|
57 |
+
return res
|