awacke1 commited on
Commit
6d3b539
β€’
1 Parent(s): c5ae802

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -35
app.py CHANGED
@@ -49,8 +49,10 @@ if query_params:
49
  selected_option = st.radio(
50
  "Param", options, index=ix, key="query", on_change=update_params
51
  )
52
- st.experimental_set_query_params(option=selected_option)
 
53
 
 
54
 
55
  try:
56
  st.session_state.query = query # if set already above. this prevents two interface elements setting it first time once
@@ -77,28 +79,6 @@ except: # catch exception and set query param to predefined value
77
  query_option = query_params['query'][0]
78
  query_option = "ai"
79
 
80
-
81
-
82
- # What impresses me about these two beautiful new streamlit persist prims is that one called the singleton can share memory across sessions (think all users yo)
83
- #@st.experimental_singleton
84
- #def get_sessionmaker(search_param):
85
- # url = "https://en.wikipedia.org/wiki/"
86
- # return url
87
- #search_param = "Star_Trek:_Discovery"
88
- #sm= get_sessionmaker(search_param)
89
-
90
- # What is supercool about the second prim the memo is it makes unwieldy data very wieldy. Like the Lord of Rings in reverse re "you cannot wield it! none of us can." -> "You can wield it, now everyone can."
91
- #@st.experimental_memo
92
- #def factorial(n):
93
- # if n < 1:
94
- # return 1
95
- # return n * factorial(n - 1)
96
- #em10 = factorial(10)
97
- #em09 = factorial(9) # Returns instantly!
98
-
99
-
100
-
101
-
102
  DEVICE = "cpu"
103
  MODEL_OPTIONS = ["msmarco-distilbert-base-tas-b", "all-mpnet-base-v2"]
104
  DESCRIPTION = """
@@ -144,14 +124,10 @@ def load():
144
  df = pd.read_csv("movies.csv")
145
  return tokenizers, models, embeddings, df
146
 
147
-
148
  tokenizers, models, embeddings, df = load()
149
-
150
-
151
  def pooling(model_output):
152
  return model_output.last_hidden_state[:, 0]
153
 
154
-
155
  def compute_embeddings(texts):
156
  encoded_input = tokenizers[0](
157
  texts, padding=True, truncation=True, return_tensors="pt"
@@ -161,10 +137,8 @@ def compute_embeddings(texts):
161
  model_output = models[0](**encoded_input, return_dict=True)
162
 
163
  embeddings = pooling(model_output)
164
-
165
  return embeddings.cpu().numpy()
166
 
167
-
168
  def pooling2(model_output, attention_mask):
169
  token_embeddings = model_output[0]
170
  input_mask_expanded = (
@@ -174,7 +148,6 @@ def pooling2(model_output, attention_mask):
174
  input_mask_expanded.sum(1), min=1e-9
175
  )
176
 
177
-
178
  def compute_embeddings2(list_of_strings):
179
  encoded_input = tokenizers[1](
180
  list_of_strings, padding=True, truncation=True, return_tensors="pt"
@@ -184,7 +157,6 @@ def compute_embeddings2(list_of_strings):
184
  sentence_embeddings = pooling2(model_output, encoded_input["attention_mask"])
185
  return F.normalize(sentence_embeddings, p=2, dim=1).cpu().numpy()
186
 
187
-
188
  @st.cache(
189
  show_spinner=False,
190
  hash_funcs={Tokenizer: lambda _: None, AddedToken: lambda _: None},
@@ -219,15 +191,11 @@ def semantic_search(query, model_id):
219
  delay = "%.3f" % (time.time() - start)
220
  return f"<p><i>Computation time: {delay} seconds</i></p>{result}</ol>"
221
 
222
-
223
  st.sidebar.markdown(DESCRIPTION)
224
 
225
  model_choice = st.sidebar.selectbox("Similarity model", options=MODEL_OPTIONS)
226
  model_id = 0 if model_choice == MODEL_OPTIONS[0] else 1
227
 
228
-
229
-
230
-
231
  clicked = click_detector(semantic_search(query, model_id))
232
 
233
  if clicked != "":
49
  selected_option = st.radio(
50
  "Param", options, index=ix, key="query", on_change=update_params
51
  )
52
+ st.write("<style>div.row-widget.stRadio > div{flex-direction:row;}</style>", unsafe_allow_html=True)
53
+
54
 
55
+ st.experimental_set_query_params(option=selected_option)
56
 
57
  try:
58
  st.session_state.query = query # if set already above. this prevents two interface elements setting it first time once
79
  query_option = query_params['query'][0]
80
  query_option = "ai"
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  DEVICE = "cpu"
83
  MODEL_OPTIONS = ["msmarco-distilbert-base-tas-b", "all-mpnet-base-v2"]
84
  DESCRIPTION = """
124
  df = pd.read_csv("movies.csv")
125
  return tokenizers, models, embeddings, df
126
 
 
127
  tokenizers, models, embeddings, df = load()
 
 
128
  def pooling(model_output):
129
  return model_output.last_hidden_state[:, 0]
130
 
 
131
  def compute_embeddings(texts):
132
  encoded_input = tokenizers[0](
133
  texts, padding=True, truncation=True, return_tensors="pt"
137
  model_output = models[0](**encoded_input, return_dict=True)
138
 
139
  embeddings = pooling(model_output)
 
140
  return embeddings.cpu().numpy()
141
 
 
142
  def pooling2(model_output, attention_mask):
143
  token_embeddings = model_output[0]
144
  input_mask_expanded = (
148
  input_mask_expanded.sum(1), min=1e-9
149
  )
150
 
 
151
  def compute_embeddings2(list_of_strings):
152
  encoded_input = tokenizers[1](
153
  list_of_strings, padding=True, truncation=True, return_tensors="pt"
157
  sentence_embeddings = pooling2(model_output, encoded_input["attention_mask"])
158
  return F.normalize(sentence_embeddings, p=2, dim=1).cpu().numpy()
159
 
 
160
  @st.cache(
161
  show_spinner=False,
162
  hash_funcs={Tokenizer: lambda _: None, AddedToken: lambda _: None},
191
  delay = "%.3f" % (time.time() - start)
192
  return f"<p><i>Computation time: {delay} seconds</i></p>{result}</ol>"
193
 
 
194
  st.sidebar.markdown(DESCRIPTION)
195
 
196
  model_choice = st.sidebar.selectbox("Similarity model", options=MODEL_OPTIONS)
197
  model_id = 0 if model_choice == MODEL_OPTIONS[0] else 1
198
 
 
 
 
199
  clicked = click_detector(semantic_search(query, model_id))
200
 
201
  if clicked != "":