lvwerra HF staff commited on
Commit
7f5bdb5
β€’
1 Parent(s): 25cf3d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -24
app.py CHANGED
@@ -12,38 +12,29 @@ import requests
12
  def mark_tokens_bold(string, tokens):
13
  for token in tokens:
14
  pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
15
- string = re.sub(pattern, "<span style='color: red;'><b>" + token + "</b></span>", string)
16
  return string
17
 
18
 
19
  def process_results(results, highlight_terms):
20
  if len(results) == 0:
21
- return """<br><p style='font-family: Arial; color:Silver; text-align: center;'>
22
- No results retrieved.</p><br><hr>"""
23
 
24
  results_html = ""
25
  for result in results:
26
  text_html = result["text"]
27
  text_html = mark_tokens_bold(text_html, highlight_terms)
28
- meta_html = (
29
- """
30
- <p class='underline-on-hover' style='font-size:12px; font-family: Arial; color:#585858; text-align: left;'>
31
- <a href='{}' target='_blank'>{}</a></p>""".format(
32
- result["meta"]["url"], result["meta"]["url"]
33
- )
34
- if "meta" in result and result["meta"] is not None and "url" in result["meta"]
35
- else ""
36
- )
37
  docid_html = str(result["docid"])
38
 
39
  licenses = " | ".join(result["repo_license"])
40
  repo_name = result["repo_name"]
41
  repo_path = result["repo_path"]
42
 
43
- results_html += """{}
44
- <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #20233fff;'>{}</span></p>
45
- <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #20233fff;'>{}</span></p>
46
- <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #20233fff;'>{}</span></p>
47
  <pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
48
  <br>
49
  """.format(
@@ -74,18 +65,14 @@ def scisearch(query, language, num_results=10):
74
  return process_results(results, highlight_terms)
75
 
76
 
77
- description = """# <p style="text-align: center;"> 🌸 πŸ”Ž ROOTS search tool πŸ” 🌸 </p>
78
- The ROOTS corpus was developed during the [BigScience workshop](https://bigscience.huggingface.co/) for the purpose
79
- of training the Multilingual Large Language Model [BLOOM](https://huggingface.co/bigscience/bloom). This tool allows
80
- you to search through the ROOTS corpus. We serve a BM25 index for each language or group of languages included in
81
- ROOTS. You can read more about the details of the tool design
82
- [here](https://huggingface.co/spaces/bigscience-data/scisearch/blob/main/roots_search_tool_specs.pdf). For more
83
- information and instructions on how to access the full corpus check [this form](https://forms.gle/qyYswbEL5kA23Wu99)."""
84
 
85
 
86
  if __name__ == "__main__":
87
  demo = gr.Blocks(
88
- css=".underline-on-hover:hover { text-decoration: underline; } .flagging { font-size:12px; background-color:#20233fff; } .gradio-container {background-color: #20233fff}"
89
  )
90
 
91
  with demo:
 
12
  def mark_tokens_bold(string, tokens):
13
  for token in tokens:
14
  pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
15
+ string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string)
16
  return string
17
 
18
 
19
  def process_results(results, highlight_terms):
20
  if len(results) == 0:
21
+ return """<br><p>No results retrieved.</p><br><hr>"""
 
22
 
23
  results_html = ""
24
  for result in results:
25
  text_html = result["text"]
26
  text_html = mark_tokens_bold(text_html, highlight_terms)
27
+
 
 
 
 
 
 
 
 
28
  docid_html = str(result["docid"])
29
 
30
  licenses = " | ".join(result["repo_license"])
31
  repo_name = result["repo_name"]
32
  repo_path = result["repo_path"]
33
 
34
+ results_html += """\
35
+ <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #ff75b3;'>{}</span></p>
36
+ <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #ff75b3;'>{}</span></p>
37
+ <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #ff75b3;'>{}</span></p>
38
  <pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
39
  <br>
40
  """.format(
 
65
  return process_results(results, highlight_terms)
66
 
67
 
68
+ description = """# <p style="text-align: center;"> πŸ”Ž IceCoder Dataset Search πŸ” </p>
69
+ When you use [IceCoder]() to generate code it might produce exact copies of code in the pretraining dataset. In that case the code requires
70
+ and with this search tool we aim to provide help to finding out where the code came from."""
 
 
 
 
71
 
72
 
73
  if __name__ == "__main__":
74
  demo = gr.Blocks(
75
+ css=".gradio-container {background-color: #20233fff; color:white}"
76
  )
77
 
78
  with demo: