ola13 commited on
Commit
da246ad
1 Parent(s): 31a66c1

exact search iteration

Browse files
Files changed (2) hide show
  1. app.py +63 -25
  2. spaces.code-workspace +11 -0
app.py CHANGED
@@ -103,11 +103,17 @@ def process_results(results, highlight_terms):
103
 
104
 
105
  def process_exact_match_payload(payload, query):
 
106
  results = payload["results"]
107
- results_html = ""
 
 
 
 
108
  for result in results:
 
 
109
  text = result["text"]
110
- print(result, text, type(text))
111
  meta_html = format_meta(result)
112
 
113
  query_start = text.find(query)
@@ -125,7 +131,7 @@ def process_exact_match_payload(payload, query):
125
  )
126
  )
127
  results_html += result_html
128
- return results_html + "<hr>"
129
 
130
 
131
  def process_bm25_match_payload(payload, language):
@@ -144,15 +150,20 @@ def process_bm25_match_payload(payload, language):
144
  if language == "detect_language":
145
  return (
146
  (
147
- f"""<p style='font-family: Arial; color:MediumAquaMarine; text-align: center; line-height: 3em'>
 
148
  Detected language: <b>{results[0]["lang"]}</b></p><br><hr><br>"""
149
- if len(results) > 0 and language == "detect_language"
150
- else ""
151
- )
152
- + process_results(results, highlight_terms)
 
 
153
  )
154
 
155
  if language == "all":
 
 
156
  results_html = ""
157
  for lang, results_for_lang in results.items():
158
  if len(results_for_lang) == 0:
@@ -168,14 +179,28 @@ def process_bm25_match_payload(payload, language):
168
  {process_results(results_for_lang, highlight_terms)}
169
  </details>"""
170
  results_html += collapsible_results
171
- return results_html
 
 
 
172
 
173
- return process_results(results, highlight_terms)
 
 
 
 
174
 
175
 
176
- def scisearch(query, language, num_results=10, exact_search=False):
 
177
  try:
178
- query = " ".join(query.split())
 
 
 
 
 
 
179
  if query == "" or query is None:
180
  return ""
181
  post_data = {"query": query, "k": num_results}
@@ -206,7 +231,7 @@ def scisearch(query, language, num_results=10, exact_search=False):
206
  """
207
  print(e)
208
  print(traceback.format_exc())
209
- return results_html
210
 
211
 
212
  def flag(query, language, num_results, issue_description):
@@ -254,7 +279,7 @@ if __name__ == "__main__":
254
  query = gr.Textbox(
255
  lines=1,
256
  max_lines=1,
257
- placeholder="Type your query here...",
258
  label="Query",
259
  )
260
  with gr.Row():
@@ -282,18 +307,23 @@ if __name__ == "__main__":
282
  with gr.Row():
283
  k = gr.Slider(1, 100, value=10, step=1, label="Max Results")
284
  with gr.Row():
 
285
  with gr.Column(scale=1):
286
  exact_search = gr.Checkbox(
287
  value=False, label="Exact Search", variant="compact"
288
  )
 
289
  with gr.Column(scale=4):
290
  submit_btn = gr.Button("Submit")
 
 
 
 
 
 
 
291
  with gr.Row():
292
  results = gr.HTML(label="Results")
293
- flag_description = """
294
- <p class='flagging'>
295
- If you choose to flag your search, we will save the query, language and the number of results you requested.
296
- Please consider adding any additional context in the box on the right.</p>"""
297
  with gr.Column(visible=False) as flagging_form:
298
  flag_txt = gr.Textbox(
299
  lines=1,
@@ -304,25 +334,33 @@ if __name__ == "__main__":
304
  flag_btn = gr.Button("Flag Results")
305
  flag_btn.click(flag, inputs=[query, lang, k, flag_txt], outputs=[flag_txt])
306
 
307
- def submit(query, lang, k, exact_search):
308
- print("submitting", query, lang, k, exact_search)
309
  query = query.strip()
310
  if query is None or query == "":
311
  return "", ""
 
 
312
  return {
313
- results: scisearch(query, lang, k, exact_search),
314
  flagging_form: gr.update(visible=True),
 
 
315
  }
316
 
 
 
 
317
  query.submit(
318
  fn=submit,
319
- inputs=[query, lang, k, exact_search],
320
- outputs=[results, flagging_form],
321
  )
322
  submit_btn.click(
323
  submit,
324
- inputs=[query, lang, k, exact_search],
325
- outputs=[results, flagging_form],
326
  )
327
 
 
328
  demo.launch(enable_queue=True, debug=True)
 
103
 
104
 
105
  def process_exact_match_payload(payload, query):
106
+ datasets = set()
107
  results = payload["results"]
108
+ results_html = (
109
+ "<p style='font-family: Arial;'>Total nubmer of results: {}</p>".format(
110
+ payload["num_results"]
111
+ )
112
+ )
113
  for result in results:
114
+ _, dataset, _ = result["docid"].split("/")
115
+ datasets.add(dataset)
116
  text = result["text"]
 
117
  meta_html = format_meta(result)
118
 
119
  query_start = text.find(query)
 
131
  )
132
  )
133
  results_html += result_html
134
+ return results_html + "<hr>", list(datasets)
135
 
136
 
137
  def process_bm25_match_payload(payload, language):
 
150
  if language == "detect_language":
151
  return (
152
  (
153
+ (
154
+ f"""<p style='font-family: Arial; color:MediumAquaMarine; text-align: center; line-height: 3em'>
155
  Detected language: <b>{results[0]["lang"]}</b></p><br><hr><br>"""
156
+ if len(results) > 0 and language == "detect_language"
157
+ else ""
158
+ )
159
+ + process_results(results, highlight_terms)
160
+ ),
161
+ [],
162
  )
163
 
164
  if language == "all":
165
+ datasets = set()
166
+ get_docid_html(result["docid"])
167
  results_html = ""
168
  for lang, results_for_lang in results.items():
169
  if len(results_for_lang) == 0:
 
179
  {process_results(results_for_lang, highlight_terms)}
180
  </details>"""
181
  results_html += collapsible_results
182
+ for r in results_for_lang:
183
+ _, dataset, _ = r["docid"].split("/")
184
+ datasets.add(dataset)
185
+ return results_html, list(datasets)
186
 
187
+ datasets = set()
188
+ for r in results:
189
+ _, dataset, _ = r["docid"].split("/")
190
+ datasets.add(dataset)
191
+ return process_results(results, highlight_terms), list(datasets)
192
 
193
 
194
+ def scisearch(query, language, num_results=10):
195
+ datasets = []
196
  try:
197
+ query = query.strip()
198
+ exact_search = False
199
+ if query.startswith('"') and query.endswith('"') and len(query) >= 2:
200
+ exact_search = True
201
+ query = query[1:-1]
202
+ else:
203
+ query = " ".join(query.split())
204
  if query == "" or query is None:
205
  return ""
206
  post_data = {"query": query, "k": num_results}
 
231
  """
232
  print(e)
233
  print(traceback.format_exc())
234
+ return results_html, datasets
235
 
236
 
237
  def flag(query, language, num_results, issue_description):
 
279
  query = gr.Textbox(
280
  lines=1,
281
  max_lines=1,
282
+ placeholder="Put your query in double quotes for exact search.",
283
  label="Query",
284
  )
285
  with gr.Row():
 
307
  with gr.Row():
308
  k = gr.Slider(1, 100, value=10, step=1, label="Max Results")
309
  with gr.Row():
310
+ """
311
  with gr.Column(scale=1):
312
  exact_search = gr.Checkbox(
313
  value=False, label="Exact Search", variant="compact"
314
  )
315
+ """
316
  with gr.Column(scale=4):
317
  submit_btn = gr.Button("Submit")
318
+ with gr.Row(visible=False) as datasets_filter:
319
+ available_datasets = gr.Dropdown(
320
+ type="value",
321
+ choices=["ran", "swam", "ate", "slept"],
322
+ label="Datasets",
323
+ multiselect=True,
324
+ )
325
  with gr.Row():
326
  results = gr.HTML(label="Results")
 
 
 
 
327
  with gr.Column(visible=False) as flagging_form:
328
  flag_txt = gr.Textbox(
329
  lines=1,
 
334
  flag_btn = gr.Button("Flag Results")
335
  flag_btn.click(flag, inputs=[query, lang, k, flag_txt], outputs=[flag_txt])
336
 
337
+ def submit(query, lang, k, dropdown_input):
338
+ print("submitting", query, lang, k)
339
  query = query.strip()
340
  if query is None or query == "":
341
  return "", ""
342
+ results_html, datasets = scisearch(query, lang, k)
343
+ print(datasets)
344
  return {
345
+ results: results_html,
346
  flagging_form: gr.update(visible=True),
347
+ datasets_filter: gr.update(visible=True),
348
+ available_datasets: gr.Dropdown.update(choices=datasets),
349
  }
350
 
351
+ def filter_datasets():
352
+ pass
353
+
354
  query.submit(
355
  fn=submit,
356
+ inputs=[query, lang, k, available_datasets],
357
+ outputs=[results, flagging_form, datasets_filter, available_datasets],
358
  )
359
  submit_btn.click(
360
  submit,
361
+ inputs=[query, lang, k, available_datasets],
362
+ outputs=[results, flagging_form, datasets_filter, available_datasets],
363
  )
364
 
365
+ available_datasets.change(filter_datasets, inputs=[], outputs=[])
366
  demo.launch(enable_queue=True, debug=True)
spaces.code-workspace ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "folders": [
3
+ {
4
+ "path": ".."
5
+ },
6
+ {
7
+ "path": "../../roots-search-tool"
8
+ }
9
+ ],
10
+ "settings": {}
11
+ }