Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -49,8 +49,10 @@ if query_params:
|
|
49 |
selected_option = st.radio(
|
50 |
"Param", options, index=ix, key="query", on_change=update_params
|
51 |
)
|
52 |
-
st.
|
|
|
53 |
|
|
|
54 |
|
55 |
try:
|
56 |
st.session_state.query = query # if set already above. this prevents two interface elements setting it first time once
|
@@ -77,28 +79,6 @@ except: # catch exception and set query param to predefined value
|
|
77 |
query_option = query_params['query'][0]
|
78 |
query_option = "ai"
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
# What impresses me about these two beautiful new streamlit persist prims is that one called the singleton can share memory across sessions (think all users yo)
|
83 |
-
#@st.experimental_singleton
|
84 |
-
#def get_sessionmaker(search_param):
|
85 |
-
# url = "https://en.wikipedia.org/wiki/"
|
86 |
-
# return url
|
87 |
-
#search_param = "Star_Trek:_Discovery"
|
88 |
-
#sm= get_sessionmaker(search_param)
|
89 |
-
|
90 |
-
# What is supercool about the second prim the memo is it makes unwieldy data very wieldy. Like the Lord of Rings in reverse re "you cannot wield it! none of us can." -> "You can wield it, now everyone can."
|
91 |
-
#@st.experimental_memo
|
92 |
-
#def factorial(n):
|
93 |
-
# if n < 1:
|
94 |
-
# return 1
|
95 |
-
# return n * factorial(n - 1)
|
96 |
-
#em10 = factorial(10)
|
97 |
-
#em09 = factorial(9) # Returns instantly!
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
DEVICE = "cpu"
|
103 |
MODEL_OPTIONS = ["msmarco-distilbert-base-tas-b", "all-mpnet-base-v2"]
|
104 |
DESCRIPTION = """
|
@@ -144,14 +124,10 @@ def load():
|
|
144 |
df = pd.read_csv("movies.csv")
|
145 |
return tokenizers, models, embeddings, df
|
146 |
|
147 |
-
|
148 |
tokenizers, models, embeddings, df = load()
|
149 |
-
|
150 |
-
|
151 |
def pooling(model_output):
|
152 |
return model_output.last_hidden_state[:, 0]
|
153 |
|
154 |
-
|
155 |
def compute_embeddings(texts):
|
156 |
encoded_input = tokenizers[0](
|
157 |
texts, padding=True, truncation=True, return_tensors="pt"
|
@@ -161,10 +137,8 @@ def compute_embeddings(texts):
|
|
161 |
model_output = models[0](**encoded_input, return_dict=True)
|
162 |
|
163 |
embeddings = pooling(model_output)
|
164 |
-
|
165 |
return embeddings.cpu().numpy()
|
166 |
|
167 |
-
|
168 |
def pooling2(model_output, attention_mask):
|
169 |
token_embeddings = model_output[0]
|
170 |
input_mask_expanded = (
|
@@ -174,7 +148,6 @@ def pooling2(model_output, attention_mask):
|
|
174 |
input_mask_expanded.sum(1), min=1e-9
|
175 |
)
|
176 |
|
177 |
-
|
178 |
def compute_embeddings2(list_of_strings):
|
179 |
encoded_input = tokenizers[1](
|
180 |
list_of_strings, padding=True, truncation=True, return_tensors="pt"
|
@@ -184,7 +157,6 @@ def compute_embeddings2(list_of_strings):
|
|
184 |
sentence_embeddings = pooling2(model_output, encoded_input["attention_mask"])
|
185 |
return F.normalize(sentence_embeddings, p=2, dim=1).cpu().numpy()
|
186 |
|
187 |
-
|
188 |
@st.cache(
|
189 |
show_spinner=False,
|
190 |
hash_funcs={Tokenizer: lambda _: None, AddedToken: lambda _: None},
|
@@ -219,15 +191,11 @@ def semantic_search(query, model_id):
|
|
219 |
delay = "%.3f" % (time.time() - start)
|
220 |
return f"<p><i>Computation time: {delay} seconds</i></p>{result}</ol>"
|
221 |
|
222 |
-
|
223 |
st.sidebar.markdown(DESCRIPTION)
|
224 |
|
225 |
model_choice = st.sidebar.selectbox("Similarity model", options=MODEL_OPTIONS)
|
226 |
model_id = 0 if model_choice == MODEL_OPTIONS[0] else 1
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
clicked = click_detector(semantic_search(query, model_id))
|
232 |
|
233 |
if clicked != "":
|
49 |
selected_option = st.radio(
|
50 |
"Param", options, index=ix, key="query", on_change=update_params
|
51 |
)
|
52 |
+
st.write("<style>div.row-widget.stRadio > div{flex-direction:row;}</style>", unsafe_allow_html=True)
|
53 |
+
|
54 |
|
55 |
+
st.experimental_set_query_params(option=selected_option)
|
56 |
|
57 |
try:
|
58 |
st.session_state.query = query # if set already above. this prevents two interface elements setting it first time once
|
79 |
query_option = query_params['query'][0]
|
80 |
query_option = "ai"
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
DEVICE = "cpu"
|
83 |
MODEL_OPTIONS = ["msmarco-distilbert-base-tas-b", "all-mpnet-base-v2"]
|
84 |
DESCRIPTION = """
|
124 |
df = pd.read_csv("movies.csv")
|
125 |
return tokenizers, models, embeddings, df
|
126 |
|
|
|
127 |
tokenizers, models, embeddings, df = load()
|
|
|
|
|
128 |
def pooling(model_output):
|
129 |
return model_output.last_hidden_state[:, 0]
|
130 |
|
|
|
131 |
def compute_embeddings(texts):
|
132 |
encoded_input = tokenizers[0](
|
133 |
texts, padding=True, truncation=True, return_tensors="pt"
|
137 |
model_output = models[0](**encoded_input, return_dict=True)
|
138 |
|
139 |
embeddings = pooling(model_output)
|
|
|
140 |
return embeddings.cpu().numpy()
|
141 |
|
|
|
142 |
def pooling2(model_output, attention_mask):
|
143 |
token_embeddings = model_output[0]
|
144 |
input_mask_expanded = (
|
148 |
input_mask_expanded.sum(1), min=1e-9
|
149 |
)
|
150 |
|
|
|
151 |
def compute_embeddings2(list_of_strings):
|
152 |
encoded_input = tokenizers[1](
|
153 |
list_of_strings, padding=True, truncation=True, return_tensors="pt"
|
157 |
sentence_embeddings = pooling2(model_output, encoded_input["attention_mask"])
|
158 |
return F.normalize(sentence_embeddings, p=2, dim=1).cpu().numpy()
|
159 |
|
|
|
160 |
@st.cache(
|
161 |
show_spinner=False,
|
162 |
hash_funcs={Tokenizer: lambda _: None, AddedToken: lambda _: None},
|
191 |
delay = "%.3f" % (time.time() - start)
|
192 |
return f"<p><i>Computation time: {delay} seconds</i></p>{result}</ol>"
|
193 |
|
|
|
194 |
st.sidebar.markdown(DESCRIPTION)
|
195 |
|
196 |
model_choice = st.sidebar.selectbox("Similarity model", options=MODEL_OPTIONS)
|
197 |
model_id = 0 if model_choice == MODEL_OPTIONS[0] else 1
|
198 |
|
|
|
|
|
|
|
199 |
clicked = click_detector(semantic_search(query, model_id))
|
200 |
|
201 |
if clicked != "":
|