behnamsa commited on
Commit
bf1126c
1 Parent(s): fcb30fd

Fix pipeline

Browse files
Files changed (1) hide show
  1. pipeline.py +52 -44
pipeline.py CHANGED
@@ -2,7 +2,8 @@
2
  import tensorflow as tf
3
 
4
  class PreTrainedPipeline():
5
- def __init__(self):
 
6
  sequence_input = tf.keras.Input(shape=(300), name='input')
7
  x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
8
  x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
@@ -17,48 +18,55 @@ class PreTrainedPipeline():
17
 
18
  model.compile(optimizer="Adamax", loss="cosine_similarity")
19
 
20
- def __call__(self):
21
- return {
22
- "text": "Hi!!!"
23
- }
24
-
25
- # def RevDict(sent,flag,model):
26
- # """
27
- # This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
28
- # the input sentence will be normalized, and stop words will be removed
29
- # """
30
-
31
- # normalizer = Normalizer()
32
- # X_Normalized = normalizer.normalize(sent)
33
- # X_Tokens = word_tokenize(X_Normalized)
34
- # stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
35
- # X_Tokens = [t for t in X_Tokens if t not in stopwords]
36
- # preprocessed = [' '.join(X_Tokens)][0]
37
- # sent_ids = sent2id([preprocessed])
38
- # output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
39
- # distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
40
- # min_index_100 = distances.argsort()[:100]
41
- # min_index_10 = distances.argsort()[:10]
42
 
43
- # temp=[]
44
- # if flag == 0:
45
- # for i in range(10):
46
- # temp.append(id2h[str(min_index_10[i])])
47
- # elif flag == 1:
48
- # for i in range(100):
49
- # temp.append(id2h[str(min_index_100[i])])
50
-
51
- # for i in range(len(temp)):
52
- # print(temp[i])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- # def sent2id(sents):
55
- # sents_id=np.zeros((len(sents),20))
56
- # for j in tqdm(range(len(sents))):
57
- # for i,word in enumerate(sents[j].split()):
58
- # try:
59
- # sents_id[j,i] = t2id[word]
60
- # except:
61
- # sents_id[j,i] = t2id['UNK']
62
- # if i==19:
63
- # break
64
- # return sents_id
 
2
  import tensorflow as tf
3
 
4
  class PreTrainedPipeline():
5
+ def __init__(self, path):
6
+ # define the best model TODO
7
  sequence_input = tf.keras.Input(shape=(300), name='input')
8
  x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
9
  x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
 
18
 
19
  model.compile(optimizer="Adamax", loss="cosine_similarity")
20
 
21
+ # model.load_weights("path to model file") TODO
22
+
23
+ self.model = model
24
+
25
+ def __call__(self, inputs):
26
+ return [ # Sample output, call the model here TODO
27
+ {'label': 'POSITIVE', 'score': 0.05},
28
+ {'label': 'NEGATIVE', 'score': 0.03},
29
+ {'label': 'معنی', 'score': 0.92},
30
+ {'label': f'{inputs}', 'score': 0},
31
+ ]
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # def RevDict(sent,flag,model):
34
+ # """
35
+ # This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
36
+ # the input sentence will be normalized, and stop words will be removed
37
+ # """
38
+
39
+ # normalizer = Normalizer()
40
+ # X_Normalized = normalizer.normalize(sent)
41
+ # X_Tokens = word_tokenize(X_Normalized)
42
+ # stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
43
+ # X_Tokens = [t for t in X_Tokens if t not in stopwords]
44
+ # preprocessed = [' '.join(X_Tokens)][0]
45
+ # sent_ids = sent2id([preprocessed])
46
+ # output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
47
+ # distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
48
+ # min_index_100 = distances.argsort()[:100]
49
+ # min_index_10 = distances.argsort()[:10]
50
+
51
+ # temp=[]
52
+ # if flag == 0:
53
+ # for i in range(10):
54
+ # temp.append(id2h[str(min_index_10[i])])
55
+ # elif flag == 1:
56
+ # for i in range(100):
57
+ # temp.append(id2h[str(min_index_100[i])])
58
+
59
+ # for i in range(len(temp)):
60
+ # print(temp[i])
61
 
62
+ # def sent2id(sents):
63
+ # sents_id=np.zeros((len(sents),20))
64
+ # for j in tqdm(range(len(sents))):
65
+ # for i,word in enumerate(sents[j].split()):
66
+ # try:
67
+ # sents_id[j,i] = t2id[word]
68
+ # except:
69
+ # sents_id[j,i] = t2id['UNK']
70
+ # if i==19:
71
+ # break
72
+ # return sents_id