loubnabnl HF staff commited on
Commit
5512dad
β€’
1 Parent(s): 7a32dc0

chnage colors

Browse files
Files changed (1) hide show
  1. app.py +38 -12
app.py CHANGED
@@ -13,7 +13,7 @@ es = Elasticsearch(os.environ.get("host"), timeout=100, http_compress=True, maxs
13
  def mark_tokens_bold(string, tokens):
14
  for token in tokens:
15
  pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
16
- string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string)
17
  return string
18
 
19
 
@@ -26,11 +26,16 @@ def process_results(results):
26
  text_html = result["text"]
27
  # text_html = mark_tokens_bold(text_html, highlight_terms)
28
  repository = result["repository"]
29
-
 
30
  results_html += """\
31
- <p style='font-size:16px; text-align: left; color: white;'>Source: <span style='color: #727cd6;'>{}</span></p>
32
  <br>
33
- <pre style='height: 600px; overflow-y: scroll; overflow-x: hidden; color: #d9d9d9;border: 1px solid #ff75b3; padding: 10px'><code>{}</code></pre>
 
 
 
 
34
  <br>
35
  <hr>
36
  <br>
@@ -59,19 +64,40 @@ def search(query, num_results=10):
59
  print(len(response))
60
  else:
61
  response = match_query(query, num_results=num_results)
62
- results = [{"text": hit.content, "repository": f"{hit.repository}/{hit.path}"} for hit in response]
63
  return process_results(results)
64
 
65
-
66
- description = """# <p style="text-align: center; color: white;"><span style='color: #ff75b3;'>StarCoder:</span> Dataset Search πŸ” </p>
67
- <span style='color: white;'>When you use <a href="https://huggingface.co/bigcode/large-model" style="color: #ff75b3;">StarCoder</a> to generate code it might produce exact copies of code in the pretraining dataset.
68
- In that case, the code license might have requirements to comply with.
69
- With this search tool we aim to provide help to find out where the code came from, in order for the user to comply with licensing requirements in case the code produced by StarCoder belongs to an already existing repository. For exact matches, enclose your query in double quotes.</span>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  if __name__ == "__main__":
73
  demo = gr.Blocks(
74
- css=".gradio-container {background-color: #20233fff; color:white}"
 
75
  )
76
 
77
  with demo:
@@ -84,7 +110,7 @@ if __name__ == "__main__":
84
  with gr.Row():
85
  submit_btn = gr.Button("Submit")
86
  with gr.Row():
87
- results = gr.HTML(label="Results", value="<img src='https://huggingface.co/datasets/bigcode/admin/resolve/main/bigcode_contact.png' alt='contact' style='display: block; margin: auto; max-width: 800px;'>")
88
 
89
  def submit(query, k, lang="en"):
90
  query = query.strip()
 
13
  def mark_tokens_bold(string, tokens):
14
  for token in tokens:
15
  pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
16
+ string = re.sub(pattern, "<span style='color: #e6b800;'><b>" + token + "</b></span>", string)
17
  return string
18
 
19
 
 
26
  text_html = result["text"]
27
  # text_html = mark_tokens_bold(text_html, highlight_terms)
28
  repository = result["repository"]
29
+ license = result["license"]
30
+ language = result["language"]
31
  results_html += """\
32
+ <p style='font-size:16px; text-align: left;'>Source: <span style='color: #00134d;'>{}</span></p>
33
  <br>
34
+ <p style='font-size:16px;> Language:<span style='color: #00134d;'>Python</span></p>
35
+ <br>
36
+ <p style='font-size:16px;> License:<span style='color: #00134d;'>MIT</span></p>
37
+ <br>
38
+ <pre style='height: 600px; overflow-y: scroll; overflow-x: hidden; color: #d9d9d9;border: 1px solid #e6b800; padding: 10px'><code>{}</code></pre>
39
  <br>
40
  <hr>
41
  <br>
 
64
  print(len(response))
65
  else:
66
  response = match_query(query, num_results=num_results)
67
+ results = [{"text": hit.content, "repository": f"{hit.repository}/{hit.path}", "license": hit.license, "language": hit.language} for hit in response]
68
  return process_results(results)
69
 
70
+ description = """# <p style="text-align: center; color: white;"><span style='color: #e6b800;'>StarCoder:</span> Dataset Search πŸ” </p>
71
+ <span style='color: white;'>When using <a href="https://huggingface.co/bigcode/large-model" style="color: #e6b800;">StarCoder</a> to generate code, it might produce exact copies of code in the pretraining dataset. \
72
+ In that case, the code license might have requirements to comply with. With this search tool, our aim is to help in identifying if the code belongs to an existing repository. For exact matches, enclose your query in double quotes.</span>"""
73
+
74
+ theme = gr.themes.Monochrome(
75
+ primary_hue="indigo",
76
+ secondary_hue="blue",
77
+ neutral_hue="slate",
78
+ radius_size=gr.themes.sizes.radius_sm,
79
+ font=[
80
+ gr.themes.GoogleFont("Open Sans"),
81
+ "ui-sans-serif",
82
+ "system-ui",
83
+ "sans-serif",
84
+ ],
85
+ )
86
+ css = ".generating {visibility: hidden}"
87
+
88
+ monospace_css = """
89
+ #q-input textarea {
90
+ font-family: monospace, 'Consolas', Courier, monospace;
91
+ }
92
+ """
93
+
94
+ css = monospace_css + ".gradio-container {color: black}"
95
 
96
 
97
  if __name__ == "__main__":
98
  demo = gr.Blocks(
99
+ theme=theme,
100
+ css=css,
101
  )
102
 
103
  with demo:
 
110
  with gr.Row():
111
  submit_btn = gr.Button("Submit")
112
  with gr.Row():
113
+ results = gr.HTML(label="Results", value="")
114
 
115
  def submit(query, k, lang="en"):
116
  query = query.strip()