Faris-ML's picture
Create utils.py
0212b2e verified
raw
history blame
No virus
3.07 kB
from transformers import AutoTokenizer,TFAutoModelForMaskedLM,TFAutoModelForSequenceClassification
import tensorflow as tf
import numpy as np
def get_model(model):
DROP_OUT = 0.1
input_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='input_ids')
token_type_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='token_type_ids')
attention_mask = tf.keras.Input(shape=(None, ), dtype='int32',name='attention_mask')
transformer = model(input_ids,attention_mask, token_type_ids,output_hidden_states=True)
cls = transformer.hidden_states[-1][:,0,:]
drop_out = tf.keras.layers.Dropout(DROP_OUT)(cls)
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment')(drop_out)
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment2')(sentiment)
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment3')(sentiment)
sentiment = tf.keras.layers.Dense(4,activation='softmax',name='sentiment_out')(sentiment)
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act')(drop_out)
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act2')(speech_act)
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act3')(speech_act)
speech_act = tf.keras.layers.Dense(6,activation='softmax',name='speech_act_out')(speech_act)
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm')(drop_out)
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm2')(sarcasm)
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm3')(sarcasm)
sarcasm = tf.keras.layers.Dense(2,activation='softmax',name='sarcasm_out')(sarcasm)
output = tf.keras.layers.Concatenate(axis = -1)([sentiment,speech_act,sarcasm])
Fmodel = tf.keras.Model(inputs=[input_ids,token_type_ids, attention_mask], outputs=output)
Fmodel.load_weights('model_cls_weights.h5')
return Fmodel
def classify(texts,model,tokenizer):
speech_act_decoder = {0:'Expression',1:'Assertion',2:'Question',3:'Recommendation',4:'Request',5:'Miscellaneous'}
sentiment_decoder = {0:'Positive',1:'Neutral',2:'Negative',3:'Mixed'}
sarcasm_decoder = {0:'Not Sarcastic',1:'Sarcastic'}
tokens = tokenizer(texts,return_tensors='tf',padding=True)
preds = model.predict(list(dict(tokens).values()),verbose=0)
sen_func = np.vectorize(lambda x : sentiment_decoder[x])
sentiment = sen_func(preds[:,0:4].argmax(axis=1))
sar_func = np.vectorize(lambda x : sarcasm_decoder[x])
sarcasm = sar_func(preds[:,10:].argmax(axis=1))
speech_func = np.vectorize(lambda x: speech_act_decoder[x])
speech_act = speech_func(preds[:,4:10].argmax(axis=1))
res = []
if isinstance(texts,list):
for i in range(len(texts)):
res.append({"text":texts[i],"sentiment":sentiment[i],"speech_act":speech_act[i],"sarcasm":sarcasm[i]})
return res
else:
res.append({"text":texts,"sentiment":sentiment[0],"speech_act":speech_act[0],"sarcasm":sarcasm[0]})
return res