# from scipy.special import softmax | |
import tensorflow as tf | |
class PreTrainedPipeline(): | |
def __init__(self, path): | |
# define the best model TODO | |
sequence_input = tf.keras.Input(shape=(300), name='input') | |
x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input) | |
x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x) | |
x = tf.keras.layers.Dense(512, activation="LeakyReLU")(x) | |
x = tf.keras.layers.Dense(128, activation="LeakyReLU")(x) | |
x = tf.keras.layers.Dense(512, activation="LeakyReLU")(x) | |
x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x) | |
x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(x) | |
outputs = tf.keras.layers.Dense(300, activation="tanh")(x) | |
model = tf.keras.Model(sequence_input, outputs) | |
model.compile(optimizer="Adamax", loss="cosine_similarity") | |
# model.load_weights("path to model file") TODO | |
self.model = model | |
def __call__(self, inputs: str): | |
return [ | |
[ # Sample output, call the model here TODO | |
{'label': 'POSITIVE', 'score': 0.05}, | |
{'label': 'NEGATIVE', 'score': 0.03}, | |
{'label': 'معنی', 'score': 0.92}, | |
{'label': f'{inputs}', 'score': 0}, | |
] | |
] | |
# def RevDict(sent,flag,model): | |
# """ | |
# This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions. | |
# the input sentence will be normalized, and stop words will be removed | |
# """ | |
# normalizer = Normalizer() | |
# X_Normalized = normalizer.normalize(sent) | |
# X_Tokens = word_tokenize(X_Normalized) | |
# stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()] | |
# X_Tokens = [t for t in X_Tokens if t not in stopwords] | |
# preprocessed = [' '.join(X_Tokens)][0] | |
# sent_ids = sent2id([preprocessed]) | |
# output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0])) | |
# distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0] | |
# min_index_100 = distances.argsort()[:100] | |
# min_index_10 = distances.argsort()[:10] | |
# temp=[] | |
# if flag == 0: | |
# for i in range(10): | |
# temp.append(id2h[str(min_index_10[i])]) | |
# elif flag == 1: | |
# for i in range(100): | |
# temp.append(id2h[str(min_index_100[i])]) | |
# for i in range(len(temp)): | |
# print(temp[i]) | |
# def sent2id(sents): | |
# sents_id=np.zeros((len(sents),20)) | |
# for j in tqdm(range(len(sents))): | |
# for i,word in enumerate(sents[j].split()): | |
# try: | |
# sents_id[j,i] = t2id[word] | |
# except: | |
# sents_id[j,i] = t2id['UNK'] | |
# if i==19: | |
# break | |
# return sents_id | |