ToluClassics commited on
Commit
1f8935a
β€’
1 Parent(s): ef13661

Add application file

Browse files
Files changed (1) hide show
  1. app.py +224 -0
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http.client as http_client
2
+ import json
3
+ import logging
4
+ import os
5
+ import pprint
6
+ import re
7
+ import string
8
+
9
+ import streamlit as st
10
+ import streamlit.components.v1 as components
11
+ import requests
12
+
13
+
14
+ pp = pprint.PrettyPrinter(indent=2)
15
+ st.set_page_config(page_title="Gaia Search", layout="wide")
16
+
17
+ LANG_MAPPING = {'Arabic':'ar',
18
+ 'Catalan':'ca',
19
+ 'Code':'code',
20
+ 'English':'en',
21
+ 'Spanish':'es',
22
+ 'French':'fr',
23
+ 'Indonesian':'id',
24
+ 'Indic':'indic',
25
+ 'Niger-Congo':'nigercongo',
26
+ 'Portuguese': 'pt',
27
+ 'Vietnamese': 'vi',
28
+ 'Chinese': 'zh',
29
+ 'Detect Language':'detect_language',
30
+ 'All':'all'}
31
+
32
+
33
+
34
+ st.sidebar.markdown(
35
+ """
36
+ <style>
37
+ .aligncenter {
38
+ text-align: center;
39
+ font-weight: bold;
40
+ font-size: 50px;
41
+ }
42
+ </style>
43
+ <p class="aligncenter">Gaia Search πŸŒ–πŸŒ</p>
44
+ """,
45
+ unsafe_allow_html=True,
46
+ )
47
+
48
+ st.sidebar.markdown(
49
+ """
50
+ <style>
51
+ .aligncenter {
52
+ text-align: center;
53
+ }
54
+ </style>
55
+ <p style='text-align: center'>
56
+ <a href="https://github.com/AlekseyKorshuk/huggingartists" target="_blank">GitHub</a> | <a href="https://wandb.ai/huggingartists/huggingartists/reportlist" target="_blank">Project Report</a>
57
+ </p>
58
+ <p class="aligncenter">
59
+ <a href="https://colab.research.google.com/github/AlekseyKorshuk/huggingartists/blob/master/huggingartists-demo.ipynb" target="_blank">
60
+ <img src="https://colab.research.google.com/assets/colab-badge.svg"/>
61
+ </a>
62
+ </p>
63
+ """,
64
+ unsafe_allow_html=True,
65
+ )
66
+
67
+ query = st.sidebar.text_input(label='Search query', value='')
68
+ language = st.sidebar.selectbox(
69
+ 'Language',
70
+ ('Arabic', 'Catalan', 'Code', 'English', 'Spanish', 'French', 'Indonesian', 'Indic', 'Niger-Congo', 'Portuguese', 'Vietnamese', 'Chinese', 'Detect Language', 'All'))
71
+ max_results = st.sidebar.slider(
72
+ "Maximum Number of Results",
73
+ min_value=1,
74
+ max_value=100,
75
+ step=1,
76
+ value=1,
77
+ help="Maximum Number of Documents to return",
78
+ )
79
+
80
+
81
+ def scisearch(query, language, num_results=10):
82
+ try:
83
+ query = query.strip()
84
+ if query == "" or query is None:
85
+ return
86
+
87
+ post_data = {"query": query, "k": num_results}
88
+ if language != "detect_language":
89
+ post_data["lang"] = language
90
+
91
+ output = requests.post(
92
+ os.environ.get("address"),
93
+ headers={"Content-type": "application/json"},
94
+ data=json.dumps(post_data),
95
+ timeout=60,
96
+ )
97
+
98
+ payload = json.loads(output.text)
99
+
100
+ if "err" in payload:
101
+ if payload["err"]["type"] == "unsupported_lang":
102
+ detected_lang = payload["err"]["meta"]["detected_lang"]
103
+ return f"""
104
+ <p style='font-size:18px; font-family: Arial; color:MediumVioletRed; text-align: center;'>
105
+ Detected language <b>{detected_lang}</b> is not supported.<br>
106
+ Please choose a language from the dropdown or type another query.
107
+ </p><br><hr><br>"""
108
+
109
+ results = payload["results"]
110
+ highlight_terms = payload["highlight_terms"]
111
+ except Exception as e:
112
+ results_html = f"""
113
+ <p style='font-size:18px; font-family: Arial; color:MediumVioletRed; text-align: center;'>
114
+ Raised {type(e).__name__}</p>
115
+ <p style='font-size:14px; font-family: Arial; '>
116
+ Check if a relevant discussion already exists in the Community tab. If not, please open a discussion.
117
+ </p>
118
+ """
119
+ print(e)
120
+
121
+ return results, highlight_terms
122
+
123
+ def highlight_string(paragraph: str, highlight_terms: list) -> str:
124
+ for term in highlight_terms:
125
+ paragraph = paragraph.replace(term, f"<mark>{term}</mark>")
126
+ return paragraph
127
+
128
+ def process_results(hits: list, highlight_terms: list) -> str:
129
+ hit_list = []
130
+ for hit in hits:
131
+ res_head = f"""
132
+ <div class="searchresult">
133
+ <h2>Document ID:: {hit['docid']}</h2>
134
+ <p>Language:: {hit['lang']}, Score:: {hit['score']}</p>
135
+ """
136
+ for subhit in hit['meta']['docs']:
137
+ res_head += f"""
138
+ <a href='{subhit['URL']}'>{subhit['URL']}</a> <button>β–Ό</button>
139
+ <p>{highlight_string(subhit['TEXT'], highlight_terms)}</p>
140
+ """
141
+ res_head += f"""
142
+ <p>{highlight_string(hit['text'], highlight_terms)}</p>
143
+ </div>
144
+ """
145
+ hit_list.append(res_head)
146
+ return " ".join(hit_list)
147
+
148
+
149
+ if st.sidebar.button("Search"):
150
+ hits, highlight_terms = scisearch(query, LANG_MAPPING[language])
151
+ html_results = process_results(hits, highlight_terms)
152
+ rendered_results = f"""
153
+ <div id="searchresultsarea">
154
+ <br>
155
+ <p id="searchresultsnumber">About 155,000 results (0.56 seconds) </p>
156
+ {html_results}
157
+ </div>
158
+ """
159
+ st.markdown("""
160
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
161
+ integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
162
+ """,
163
+ unsafe_allow_html=True)
164
+ st.markdown(
165
+ """
166
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
167
+ """,
168
+ unsafe_allow_html=True)
169
+ st.markdown(
170
+ f"""
171
+ <div class="row no-gutters mt-3 align-items-center">
172
+ Gaia Search πŸŒ–πŸŒ
173
+ <div class="col col-md-4">
174
+ <input class="form-control border-secondary rounded-pill pr-5" type="search" value="{query}" id="example-search-input2">
175
+ </div>
176
+ <div class="col-auto">
177
+ <button class="btn btn-outline-light text-dark border-0 rounded-pill ml-n5" type="button">
178
+ <i class="fa fa-search"></i>
179
+ </button>
180
+ </div>
181
+ </div>
182
+ """,
183
+ unsafe_allow_html=True)
184
+ components.html(
185
+ """
186
+ <style>
187
+ #searchresultsarea {
188
+ font-family: 'Arial';
189
+ }
190
+
191
+ #searchresultsnumber {
192
+ font-size: 0.8rem;
193
+ color: gray;
194
+ }
195
+
196
+ .searchresult h2 {
197
+ font-size: 19px;
198
+ line-height: 18px;
199
+ font-weight: normal;
200
+ color: rgb(29, 1, 189);
201
+ margin-bottom: 0px;
202
+ margin-top: 25px;
203
+ }
204
+
205
+ .searchresult a {
206
+ font-size: 14px;
207
+ line-height: 14px;
208
+ color: green;
209
+ margin-bottom: 0px;
210
+ }
211
+
212
+ .searchresult button {
213
+ font-size: 10px;
214
+ line-height: 14px;
215
+ color: green;
216
+ margin-bottom: 0px;
217
+ padding: 0px;
218
+ border-width: 0px;
219
+ background-color: white;
220
+ }
221
+
222
+ </style>
223
+ """ + rendered_results, height=1000, scrolling=True
224
+ )