mbahrami commited on
Commit
fe35e1b
1 Parent(s): c8f4a2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -30
app.py CHANGED
@@ -1,40 +1,21 @@
1
  import streamlit as st
2
  import pandas as pd
3
-
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
- semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
 
 
7
 
8
  @st.cache(allow_output_mutation=True)
9
  def get_model(model):
10
  return pipeline("fill-mask", model=model, top_k=100)#set the maximum of tokens to be retrieved after each inference to model
11
 
12
-
13
- HISTORY_WEIGHT = 100 # set history weight (if found any keyword from history, it will priorities based on its weight)
14
-
15
- st.caption("This is a simple auto-completion where the next token is predicted per probability and a weigh if appears in user's history")
16
-
17
- history_keyword_text = st.text_input("Enter users's history keywords (optional, i.e., 'Gates')", value="")
18
- #history_keyword_text=''
19
-
20
- text = st.text_input("Enter a text for auto completion...", value='Where is Bill')
21
- #text='Where is Bill'
22
-
23
- semantic_text = st.text_input("Enter users's history semantic (optional, i.e., 'Microsoft or President')", value="Microsoft")
24
- #semantic_text='President'
25
-
26
- model = st.selectbox("choose a model", ["roberta-base", "bert-base-uncased"])
27
- #model='roberta-base'
28
- nlp = get_model(model)
29
- #data_load_state = st.text('Loading model...')
30
-
31
-
32
-
33
- if text:
34
- # data_load_state = st.text('Inference to model...')
35
  result = nlp(text+' '+nlp.tokenizer.mask_token)
36
- # data_load_state.text('')
37
- sem_list=[_.strip() for _ in semantic_text.split(',')]
38
  if len(semantic_text):
39
  predicted_seq=[rec['sequence'] for rec in result]
40
  predicted_embeddings = semantic_model.encode(predicted_seq, convert_to_tensor=True)
@@ -43,9 +24,8 @@ if text:
43
 
44
  for index, r in enumerate(result):
45
  if len(semantic_text):
46
- # for j_index in range(len(sem_list)):
47
  if len(r['token_str'])>2: #skip spcial chars such as "?"
48
- result[index]['score']+=float(sum(cosine_scores[index]))
49
  if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
50
  #found from history, then increase the score of tokens
51
  result[index]['score']*=HISTORY_WEIGHT
@@ -54,4 +34,27 @@ if text:
54
  df=pd.DataFrame(result).sort_values(by='score', ascending=False)
55
 
56
  # show the results as a table
57
- st.table(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ from streamlit import cli as stcli
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
+ import sys
7
+
8
+ HISTORY_WEIGHT = 100 # set history weight (if found any keyword from history, it will priorities based on its weight)
9
 
10
  @st.cache(allow_output_mutation=True)
11
  def get_model(model):
12
  return pipeline("fill-mask", model=model, top_k=100)#set the maximum of tokens to be retrieved after each inference to model
13
 
14
+ def main(nlp, semantic_model):
15
+ data_load_state = st.text('Inference to model...')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  result = nlp(text+' '+nlp.tokenizer.mask_token)
17
+ data_load_state.text('')
18
+ sem_list=[semantic_text.strip()]
19
  if len(semantic_text):
20
  predicted_seq=[rec['sequence'] for rec in result]
21
  predicted_embeddings = semantic_model.encode(predicted_seq, convert_to_tensor=True)
 
24
 
25
  for index, r in enumerate(result):
26
  if len(semantic_text):
 
27
  if len(r['token_str'])>2: #skip spcial chars such as "?"
28
+ result[index]['score']+=float(sum(cosine_scores[index]))*HISTORY_WEIGHT
29
  if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
30
  #found from history, then increase the score of tokens
31
  result[index]['score']*=HISTORY_WEIGHT
 
34
  df=pd.DataFrame(result).sort_values(by='score', ascending=False)
35
 
36
  # show the results as a table
37
+ st.table(df)
38
+ # print(df)
39
+
40
+
41
+ if __name__ == '__main__':
42
+ if st._is_running_with_streamlit:
43
+ st.caption("This is a simple auto-completion where the next token is predicted per probability and a weight if it is appeared in keyword user's history or there is a similarity to semantic user's history")
44
+ history_keyword_text = st.text_input("Enter users's history <keywords matc> (optional, i.e., 'Gates')", value="")
45
+
46
+ text = st.text_input("Enter a text for auto completion...", value='Where is Bill')
47
+
48
+ semantic_text = st.text_input("Enter users's history <semantic> (optional, i.e., 'Microsoft or President')", value="Microsoft")
49
+
50
+ model = st.selectbox("Choose a model", ["roberta-base", "bert-base-uncased"])
51
+
52
+ data_load_state = st.text('Loading model...')
53
+
54
+ semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
55
+ nlp = get_model(model)
56
+
57
+ main(nlp, semantic_model)
58
+ else:
59
+ sys.argv = ['streamlit', 'run', sys.argv[0]]
60
+ sys.exit(stcli.main())