ola13 commited on
Commit
6a9e7aa
1 Parent(s): 277debc

code style

Browse files
Files changed (1) hide show
  1. app.py +142 -119
app.py CHANGED
@@ -1,81 +1,92 @@
1
- import http.client as http_client
2
  import json
3
- import logging
4
  import os
5
  import pprint
6
  import re
7
- import string
8
 
 
9
  import streamlit as st
10
  import streamlit.components.v1 as components
11
- import requests
12
-
13
 
14
  pp = pprint.PrettyPrinter(indent=2)
15
  st.set_page_config(page_title="Gaia Search", layout="wide")
16
 
17
- os.makedirs(os.path.join(os.getcwd(),".streamlit"), exist_ok = True)
18
- with open(os.path.join(os.getcwd(),".streamlit/config.toml"), "w") as file:
19
- file.write(
20
- '[theme]\nbase="light"'
21
- )
22
-
23
- LANG_MAPPING = {'Arabic':'ar',
24
- 'Catalan':'ca',
25
- 'Code':'code',
26
- 'English':'en',
27
- 'Spanish':'es',
28
- 'French':'fr',
29
- 'Indonesian':'id',
30
- 'Indic':'indic',
31
- 'Niger-Congo':'nigercongo',
32
- 'Portuguese': 'pt',
33
- 'Vietnamese': 'vi',
34
- 'Chinese': 'zh',
35
- 'Detect Language':'detect_language',
36
- 'All':'all'}
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  st.sidebar.markdown(
41
- """
42
- <style>
43
- .aligncenter {
44
- text-align: center;
45
- font-weight: bold;
46
- font-size: 50px;
47
- }
48
- </style>
49
- <p class="aligncenter">Gaia Search 🌖🌏</p>
50
- <p style="text-align: center;"> A search engine for the LAION large scale image caption corpora</p>
51
- """,
52
- unsafe_allow_html=True,
53
  )
54
 
55
  st.sidebar.markdown(
56
- """
57
- <style>
58
- .aligncenter {
59
- text-align: center;
60
- }
61
- </style>
62
- <p style='text-align: center'>
63
- <a href="" >GitHub</a> | <a href="" >Project Report</a>
64
- </p>
65
- <p class="aligncenter">
66
- <a href="" target="_blank">
67
- <img src="https://colab.research.google.com/assets/colab-badge.svg"/>
68
- </a>
69
- </p>
70
- """,
71
- unsafe_allow_html=True,
72
  )
73
 
74
- query = st.sidebar.text_input(label='Search query', value='')
75
  language = st.sidebar.selectbox(
76
- 'Language',
77
- ('Arabic', 'Catalan', 'Code', 'English', 'Spanish', 'French', 'Indonesian', 'Indic', 'Niger-Congo', 'Portuguese', 'Vietnamese', 'Chinese', 'Detect Language', 'All'),
78
- index=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  max_results = st.sidebar.slider(
80
  "Maximum Number of Results",
81
  min_value=1,
@@ -84,22 +95,25 @@ max_results = st.sidebar.slider(
84
  value=10,
85
  help="Maximum Number of Documents to return",
86
  )
87
- footer="""<style>
88
- .footer {
89
- position: fixed;
90
- left: 0;
91
- bottom: 0;
92
- width: 100%;
93
- background-color: white;
94
- color: black;
95
- text-align: center;
96
- }
97
- </style>
98
- <div class="footer">
99
- <p>Powered by <a href="https://huggingface.co/" >HuggingFace 🤗</a> and <a href="https://github.com/castorini/pyserini" >Pyserini 🦆</a></p>
100
- </div>
 
 
 
101
  """
102
- st.sidebar.markdown(footer,unsafe_allow_html=True)
103
 
104
 
105
  def scisearch(query, language, num_results=10):
@@ -144,23 +158,29 @@ def scisearch(query, language, num_results=10):
144
 
145
  return results, highlight_terms
146
 
 
147
  PII_TAGS = {"KEY", "EMAIL", "USER", "IP_ADDRESS", "ID", "IPv4", "IPv6"}
148
  PII_PREFIX = "PI:"
149
 
 
150
  def process_pii(text):
151
  for tag in PII_TAGS:
152
  text = text.replace(
153
  PII_PREFIX + tag,
154
- """<b><mark style="background: Fuchsia; color: Lime;">REDACTED {}</mark></b>""".format(tag),
155
- )
 
 
156
  return text
157
 
 
158
  def highlight_string(paragraph: str, highlight_terms: list) -> str:
159
  for term in highlight_terms:
160
  paragraph = re.sub(f"\\b{term}\\b", f"<b>{term}</b>", paragraph, flags=re.I)
161
  paragraph = process_pii(paragraph)
162
  return paragraph
163
 
 
164
  def process_results(hits: list, highlight_terms: list) -> str:
165
  hit_list = []
166
  for i, hit in enumerate(hits):
@@ -169,7 +189,7 @@ def process_results(hits: list, highlight_terms: list) -> str:
169
  <h2>{i+1}. Document ID: {hit['docid']}</h2>
170
  <p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
171
  """
172
- for subhit in hit['meta']['docs']:
173
  res_head += f"""
174
  <button onclick="load_image({subhit['_id']})">Load Image</button><br>
175
  <p><img id='{subhit['_id']}' src='{subhit['URL']}' style="width:400px;height:auto;display:none;"></p>
@@ -189,22 +209,25 @@ if st.sidebar.button("Search"):
189
  hits, highlight_terms = scisearch(query, LANG_MAPPING[language], max_results)
190
  html_results = process_results(hits, highlight_terms)
191
  rendered_results = f"""
192
- <div id="searchresultsarea">
193
- <br>
194
- <p id="searchresultsnumber">About {max_results} results</p>
195
- {html_results}
196
- </div>
197
- """
198
- st.markdown("""
199
- <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
200
- integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
201
- """,
202
- unsafe_allow_html=True)
 
 
203
  st.markdown(
204
  """
205
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
206
  """,
207
- unsafe_allow_html=True)
 
208
  st.markdown(
209
  f"""
210
  <div class="row no-gutters mt-3 align-items-center">
@@ -219,38 +242,35 @@ if st.sidebar.button("Search"):
219
  </div>
220
  </div>
221
  """,
222
- unsafe_allow_html=True)
 
223
  components.html(
224
  """
225
  <style>
226
- #searchresultsarea {
227
- font-family: 'Arial';
228
- }
229
-
230
- #searchresultsnumber {
231
- font-size: 0.8rem;
232
- color: gray;
233
- }
234
-
235
- .searchresult h2 {
236
- font-size: 19px;
237
- line-height: 18px;
238
- font-weight: normal;
239
- color: rgb(7, 111, 222);
240
- margin-bottom: 0px;
241
- margin-top: 25px;
242
- }
243
-
244
- .searchresult a {
245
- font-size: 12px;
246
- line-height: 12px;
247
- color: green;
248
- margin-bottom: 0px;
249
- }
250
-
251
- .dark-mode {
252
- color: white;
253
- }
254
  </style>
255
  <script>
256
  function load_image(id){
@@ -269,5 +289,8 @@ if st.sidebar.button("Search"):
269
  }
270
  </script>
271
  <button onclick="myFunction()">Toggle dark mode</button>
272
- """ + rendered_results, height=800, scrolling=True
273
- )
 
 
 
 
 
1
  import json
 
2
  import os
3
  import pprint
4
  import re
 
5
 
6
+ import requests
7
  import streamlit as st
8
  import streamlit.components.v1 as components
 
 
9
 
10
  pp = pprint.PrettyPrinter(indent=2)
11
  st.set_page_config(page_title="Gaia Search", layout="wide")
12
 
13
+ os.makedirs(os.path.join(os.getcwd(), ".streamlit"), exist_ok=True)
14
+ with open(os.path.join(os.getcwd(), ".streamlit/config.toml"), "w") as file:
15
+ file.write('[theme]\nbase="light"')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ LANG_MAPPING = {
18
+ "Arabic": "ar",
19
+ "Catalan": "ca",
20
+ "Code": "code",
21
+ "English": "en",
22
+ "Spanish": "es",
23
+ "French": "fr",
24
+ "Indonesian": "id",
25
+ "Indic": "indic",
26
+ "Niger-Congo": "nigercongo",
27
+ "Portuguese": "pt",
28
+ "Vietnamese": "vi",
29
+ "Chinese": "zh",
30
+ "Detect Language": "detect_language",
31
+ "All": "all",
32
+ }
33
 
34
 
35
  st.sidebar.markdown(
36
+ """
37
+ <style>
38
+ .aligncenter {
39
+ text-align: center;
40
+ font-weight: bold;
41
+ font-size: 50px;
42
+ }
43
+ </style>
44
+ <p class="aligncenter">Gaia Search 🌖🌏</p>
45
+ <p style="text-align: center;"> A search engine for the LAION large scale image caption corpora</p>
46
+ """,
47
+ unsafe_allow_html=True,
48
  )
49
 
50
  st.sidebar.markdown(
51
+ """
52
+ <style>
53
+ .aligncenter {
54
+ text-align: center;
55
+ }
56
+ </style>
57
+ <p style='text-align: center'>
58
+ <a href="" >GitHub</a> | <a href="" >Project Report</a>
59
+ </p>
60
+ <p class="aligncenter">
61
+ <a href="" target="_blank">
62
+ <img src="https://colab.research.google.com/assets/colab-badge.svg"/>
63
+ </a>
64
+ </p>
65
+ """,
66
+ unsafe_allow_html=True,
67
  )
68
 
69
+ query = st.sidebar.text_input(label="Search query", value="")
70
  language = st.sidebar.selectbox(
71
+ "Language",
72
+ (
73
+ "Arabic",
74
+ "Catalan",
75
+ "Code",
76
+ "English",
77
+ "Spanish",
78
+ "French",
79
+ "Indonesian",
80
+ "Indic",
81
+ "Niger-Congo",
82
+ "Portuguese",
83
+ "Vietnamese",
84
+ "Chinese",
85
+ "Detect Language",
86
+ "All",
87
+ ),
88
+ index=3,
89
+ )
90
  max_results = st.sidebar.slider(
91
  "Maximum Number of Results",
92
  min_value=1,
 
95
  value=10,
96
  help="Maximum Number of Documents to return",
97
  )
98
+ footer = """
99
+ <style>
100
+ .footer {
101
+ position: fixed;
102
+ left: 0;
103
+ bottom: 0;
104
+ width: 100%;
105
+ background-color: white;
106
+ color: black;
107
+ text-align: center;
108
+ }
109
+ </style>
110
+ <div class="footer">
111
+ <p>
112
+ Powered by <a href="https://huggingface.co/" >HuggingFace 🤗</a> and <a href="https://github.com/castorini/pyserini" >Pyserini 🦆</a>
113
+ </p>
114
+ </div>
115
  """
116
+ st.sidebar.markdown(footer, unsafe_allow_html=True)
117
 
118
 
119
  def scisearch(query, language, num_results=10):
 
158
 
159
  return results, highlight_terms
160
 
161
+
162
  PII_TAGS = {"KEY", "EMAIL", "USER", "IP_ADDRESS", "ID", "IPv4", "IPv6"}
163
  PII_PREFIX = "PI:"
164
 
165
+
166
  def process_pii(text):
167
  for tag in PII_TAGS:
168
  text = text.replace(
169
  PII_PREFIX + tag,
170
+ """<b><mark style="background: Fuchsia; color: Lime;">REDACTED {}</mark></b>""".format(
171
+ tag
172
+ ),
173
+ )
174
  return text
175
 
176
+
177
  def highlight_string(paragraph: str, highlight_terms: list) -> str:
178
  for term in highlight_terms:
179
  paragraph = re.sub(f"\\b{term}\\b", f"<b>{term}</b>", paragraph, flags=re.I)
180
  paragraph = process_pii(paragraph)
181
  return paragraph
182
 
183
+
184
  def process_results(hits: list, highlight_terms: list) -> str:
185
  hit_list = []
186
  for i, hit in enumerate(hits):
 
189
  <h2>{i+1}. Document ID: {hit['docid']}</h2>
190
  <p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
191
  """
192
+ for subhit in hit["meta"]["docs"]:
193
  res_head += f"""
194
  <button onclick="load_image({subhit['_id']})">Load Image</button><br>
195
  <p><img id='{subhit['_id']}' src='{subhit['URL']}' style="width:400px;height:auto;display:none;"></p>
 
209
  hits, highlight_terms = scisearch(query, LANG_MAPPING[language], max_results)
210
  html_results = process_results(hits, highlight_terms)
211
  rendered_results = f"""
212
+ <div id="searchresultsarea">
213
+ <br>
214
+ <p id="searchresultsnumber">About {max_results} results</p>
215
+ {html_results}
216
+ </div>
217
+ """
218
+ st.markdown(
219
+ """
220
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
221
+ integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
222
+ """,
223
+ unsafe_allow_html=True,
224
+ )
225
  st.markdown(
226
  """
227
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
228
  """,
229
+ unsafe_allow_html=True,
230
+ )
231
  st.markdown(
232
  f"""
233
  <div class="row no-gutters mt-3 align-items-center">
 
242
  </div>
243
  </div>
244
  """,
245
+ unsafe_allow_html=True,
246
+ )
247
  components.html(
248
  """
249
  <style>
250
+ #searchresultsarea {
251
+ font-family: 'Arial';
252
+ }
253
+ #searchresultsnumber {
254
+ font-size: 0.8rem;
255
+ color: gray;
256
+ }
257
+ .searchresult h2 {
258
+ font-size: 19px;
259
+ line-height: 18px;
260
+ font-weight: normal;
261
+ color: rgb(7, 111, 222);
262
+ margin-bottom: 0px;
263
+ margin-top: 25px;
264
+ }
265
+ .searchresult a {
266
+ font-size: 12px;
267
+ line-height: 12px;
268
+ color: green;
269
+ margin-bottom: 0px;
270
+ }
271
+ .dark-mode {
272
+ color: white;
273
+ }
 
 
 
 
274
  </style>
275
  <script>
276
  function load_image(id){
 
289
  }
290
  </script>
291
  <button onclick="myFunction()">Toggle dark mode</button>
292
+ """
293
+ + rendered_results,
294
+ height=800,
295
+ scrolling=True,
296
+ )