|
from transformers import AutoTokenizer,TFAutoModelForMaskedLM,TFAutoModelForSequenceClassification |
|
import tensorflow as tf |
|
import numpy as np |
|
def get_model(model): |
|
DROP_OUT = 0.1 |
|
input_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='input_ids') |
|
token_type_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='token_type_ids') |
|
attention_mask = tf.keras.Input(shape=(None, ), dtype='int32',name='attention_mask') |
|
|
|
transformer = model(input_ids,attention_mask, token_type_ids,output_hidden_states=True) |
|
cls = transformer.hidden_states[-1][:,0,:] |
|
drop_out = tf.keras.layers.Dropout(DROP_OUT)(cls) |
|
|
|
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment')(drop_out) |
|
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment2')(sentiment) |
|
sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment3')(sentiment) |
|
sentiment = tf.keras.layers.Dense(4,activation='softmax',name='sentiment_out')(sentiment) |
|
|
|
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act')(drop_out) |
|
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act2')(speech_act) |
|
speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act3')(speech_act) |
|
speech_act = tf.keras.layers.Dense(6,activation='softmax',name='speech_act_out')(speech_act) |
|
|
|
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm')(drop_out) |
|
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm2')(sarcasm) |
|
sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm3')(sarcasm) |
|
sarcasm = tf.keras.layers.Dense(2,activation='softmax',name='sarcasm_out')(sarcasm) |
|
|
|
output = tf.keras.layers.Concatenate(axis = -1)([sentiment,speech_act,sarcasm]) |
|
|
|
Fmodel = tf.keras.Model(inputs=[input_ids,token_type_ids, attention_mask], outputs=output) |
|
Fmodel.load_weights('model_cls_weights.h5') |
|
return Fmodel |
|
|
|
def classify(texts,model,tokenizer): |
|
speech_act_decoder = {0:'Expression',1:'Assertion',2:'Question',3:'Recommendation',4:'Request',5:'Miscellaneous'} |
|
sentiment_decoder = {0:'Positive',1:'Neutral',2:'Negative',3:'Mixed'} |
|
sarcasm_decoder = {0:'Not Sarcastic',1:'Sarcastic'} |
|
tokens = tokenizer(texts,return_tensors='tf',padding=True) |
|
preds = model.predict(list(dict(tokens).values()),verbose=0) |
|
|
|
sen_func = np.vectorize(lambda x : sentiment_decoder[x]) |
|
sentiment = sen_func(preds[:,0:4].argmax(axis=1)) |
|
|
|
sar_func = np.vectorize(lambda x : sarcasm_decoder[x]) |
|
sarcasm = sar_func(preds[:,10:].argmax(axis=1)) |
|
|
|
speech_func = np.vectorize(lambda x: speech_act_decoder[x]) |
|
speech_act = speech_func(preds[:,4:10].argmax(axis=1)) |
|
res = [] |
|
if isinstance(texts,list): |
|
for i in range(len(texts)): |
|
res.append({"text":texts[i],"sentiment":sentiment[i],"speech_act":speech_act[i],"sarcasm":sarcasm[i]}) |
|
return res |
|
else: |
|
res.append({"text":texts,"sentiment":sentiment[0],"speech_act":speech_act[0],"sarcasm":sarcasm[0]}) |
|
return res |