mbahrami commited on
Commit
7cf4d15
1 Parent(s): 4677bcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -11,16 +11,21 @@ HISTORY_WEIGHT = 100 # set history weight (if found any keyword from history, it
11
  def get_model(model):
12
  return pipeline("fill-mask", model=model, top_k=10)#set the maximum of tokens to be retrieved after each inference to model
13
 
14
- def main(nlp, semantic_model):
15
- data_load_state = st.text('Inference from model...')
 
 
 
 
16
  result = nlp(text+' '+nlp.tokenizer.mask_token)
17
- data_load_state.text('')
18
  sem_list=[semantic_text.strip()]
 
19
  if len(semantic_text):
20
  predicted_seq=[rec['sequence'] for rec in result]
21
  predicted_embeddings = semantic_model.encode(predicted_seq, convert_to_tensor=True)
22
  semantic_history_embeddings = semantic_model.encode(sem_list, convert_to_tensor=True)
23
  cosine_scores = util.cos_sim(predicted_embeddings, semantic_history_embeddings)
 
24
 
25
  for index, r in enumerate(result):
26
  if len(semantic_text):
@@ -29,6 +34,7 @@ def main(nlp, semantic_model):
29
  if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
30
  #found from history, then increase the score of tokens
31
  result[index]['score']*=HISTORY_WEIGHT
 
32
 
33
  #sort the results
34
  df=pd.DataFrame(result).sort_values(by='score', ascending=False)
@@ -36,6 +42,7 @@ def main(nlp, semantic_model):
36
  # show the results as a table
37
  st.table(df)
38
  # print(df)
 
39
 
40
 
41
  if __name__ == '__main__':
@@ -44,7 +51,6 @@ if __name__ == '__main__':
44
  # Auto-Complete
45
  This is an example of an auto-complete approach where the next token suggested based on users's history Keyword match & Semantic similarity of users's history (log).
46
  The next token is predicted per probability and a weight if it is appeared in keyword user's history or there is a similarity to semantic user's history
47
-
48
  """)
49
  history_keyword_text = st.text_input("Enter users's history <Keywords Match> (optional, i.e., 'Gates')", value="")
50
  semantic_text = st.text_input("Enter users's history <Semantic> (optional, i.e., 'Microsoft' or 'President')", value="Microsoft")
@@ -55,12 +61,13 @@ The next token is predicted per probability and a weight if it is appeared in ke
55
 
56
  model = st.selectbox("Choose a model", ["roberta-base", "bert-base-uncased"])
57
 
58
- data_load_state = st.text('Loading model...')
59
 
60
- semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
61
- nlp = get_model(model)
 
62
 
63
- main(nlp, semantic_model)
64
  else:
65
  sys.argv = ['streamlit', 'run', sys.argv[0]]
66
  sys.exit(stcli.main())
 
11
  def get_model(model):
12
  return pipeline("fill-mask", model=model, top_k=10)#set the maximum of tokens to be retrieved after each inference to model
13
 
14
+ @st.cache(allow_output_mutation=True)
15
+ def loading_models(model='roberta-base'):
16
+ return get_model(model), SentenceTransformer('all-MiniLM-L6-v2')
17
+
18
+ def main(nlp, semantic_model, data_load_state):
19
+ data_load_state.text('Inference from model...')
20
  result = nlp(text+' '+nlp.tokenizer.mask_token)
 
21
  sem_list=[semantic_text.strip()]
22
+ data_load_state.text('Checking similarity...')
23
  if len(semantic_text):
24
  predicted_seq=[rec['sequence'] for rec in result]
25
  predicted_embeddings = semantic_model.encode(predicted_seq, convert_to_tensor=True)
26
  semantic_history_embeddings = semantic_model.encode(sem_list, convert_to_tensor=True)
27
  cosine_scores = util.cos_sim(predicted_embeddings, semantic_history_embeddings)
28
+ data_load_state.text('similarity check completed...')
29
 
30
  for index, r in enumerate(result):
31
  if len(semantic_text):
 
34
  if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
35
  #found from history, then increase the score of tokens
36
  result[index]['score']*=HISTORY_WEIGHT
37
+ data_load_state.text('Score updated...')
38
 
39
  #sort the results
40
  df=pd.DataFrame(result).sort_values(by='score', ascending=False)
 
42
  # show the results as a table
43
  st.table(df)
44
  # print(df)
45
+ data_load_state.text('')
46
 
47
 
48
  if __name__ == '__main__':
 
51
  # Auto-Complete
52
  This is an example of an auto-complete approach where the next token suggested based on users's history Keyword match & Semantic similarity of users's history (log).
53
  The next token is predicted per probability and a weight if it is appeared in keyword user's history or there is a similarity to semantic user's history
 
54
  """)
55
  history_keyword_text = st.text_input("Enter users's history <Keywords Match> (optional, i.e., 'Gates')", value="")
56
  semantic_text = st.text_input("Enter users's history <Semantic> (optional, i.e., 'Microsoft' or 'President')", value="Microsoft")
 
61
 
62
  model = st.selectbox("Choose a model", ["roberta-base", "bert-base-uncased"])
63
 
64
+ data_load_state = st.text('1.Loading model ...')
65
 
66
+ # semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
67
+ # nlp = get_model(model)
68
+ nlp, semantic_model = loading_models(model)
69
 
70
+ main(nlp, semantic_model, data_load_state)
71
  else:
72
  sys.argv = ['streamlit', 'run', sys.argv[0]]
73
  sys.exit(stcli.main())