mbahrami commited on
Commit
12094be
1 Parent(s): fa02d7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -2,18 +2,22 @@ import streamlit as st
2
  import pandas as pd
3
 
4
  from transformers import pipeline
 
 
5
 
6
  @st.cache(allow_output_mutation=True)
7
  def get_model(model):
8
  return pipeline("fill-mask", model=model, top_k=100)#seto maximum of tokens to be retrieved after each inference to model
9
 
 
10
  HISTORY_WEIGHT = 100 # set history weight (if found any keyword from history, it will priorities based on its weight)
11
 
12
  st.caption("This is a simple auto-completion where the next token is predicted per probability and a weigh if appears in user's history")
13
 
14
- history_keyword_text = st.text_input("Enter users's history keywords (optional, i.e., 'Gates')", value="Gates")
15
 
16
  text = st.text_input("Enter a text for auto completion...", value='Where is Bill')
 
17
 
18
  model = st.selectbox("choose a model", ["roberta-base", "bert-base-uncased"])
19
 
@@ -24,12 +28,26 @@ if text:
24
  data_load_state = st.text('Inference to model...')
25
  result = nlp(text+' '+nlp.tokenizer.mask_token)
26
  data_load_state.text('')
 
 
 
 
 
 
27
  for index, r in enumerate(result):
 
28
  if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
29
  #found from history, then increase the score of tokens
30
  result[index]['score']*=HISTORY_WEIGHT
31
 
 
 
32
  #sort the results
33
  df=pd.DataFrame(result).sort_values(by='score', ascending=False)
 
 
 
 
 
34
  #show the results as a table
35
  st.table(df)
 
2
  import pandas as pd
3
 
4
  from transformers import pipeline
5
+ from sentence_transformers import SentenceTransformer, util
6
+ semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
7
 
8
  @st.cache(allow_output_mutation=True)
9
  def get_model(model):
10
  return pipeline("fill-mask", model=model, top_k=100)#seto maximum of tokens to be retrieved after each inference to model
11
 
12
+
13
  HISTORY_WEIGHT = 100 # set history weight (if found any keyword from history, it will priorities based on its weight)
14
 
15
  st.caption("This is a simple auto-completion where the next token is predicted per probability and a weigh if appears in user's history")
16
 
17
+ history_keyword_text = st.text_input("Enter users's history keywords (optional, i.e., 'Gates')", value="")
18
 
19
  text = st.text_input("Enter a text for auto completion...", value='Where is Bill')
20
+ semantic_text = st.text_input("Enter users's history semantic (optional, i.e., 'Microsoft')", value="Microsoft")
21
 
22
  model = st.selectbox("choose a model", ["roberta-base", "bert-base-uncased"])
23
 
 
28
  data_load_state = st.text('Inference to model...')
29
  result = nlp(text+' '+nlp.tokenizer.mask_token)
30
  data_load_state.text('')
31
+
32
+ predicted_embeddings = model.encode(result['sequence'], convert_to_tensor=True)
33
+ semantic_history_embeddings = model.encode(semantic_text.spllit(','), convert_to_tensor=True)
34
+
35
+ cosine_scores = util.cos_sim(embeddings1, embeddings2)
36
+
37
  for index, r in enumerate(result):
38
+ result[index]['score']=cosine_scores[index][index]
39
  if r['token_str'].lower().strip() in history_keyword_text.lower().strip() and len(r['token_str'].lower().strip())>1:
40
  #found from history, then increase the score of tokens
41
  result[index]['score']*=HISTORY_WEIGHT
42
 
43
+
44
+
45
  #sort the results
46
  df=pd.DataFrame(result).sort_values(by='score', ascending=False)
47
+
48
+
49
+
50
+
51
+
52
  #show the results as a table
53
  st.table(df)