bradley6597
commited on
Commit
•
fb7f2e1
1
Parent(s):
a3af591
Move from Dev environment
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import functions as funky
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
import os
|
@@ -14,14 +14,14 @@ import re
|
|
14 |
|
15 |
login(token = os.environ['HUB_TOKEN'])
|
16 |
|
17 |
-
|
18 |
logger = gr.HuggingFaceDatasetSaver(os.environ['HUB_TOKEN'], dataset_name='illustration_gdrive_logging_main', organization=None, private=True)
|
19 |
logger.setup([gr.Text(label="clicked_url"), gr.Text(label="seach_term"), gr.Text(label = 'sessionhash'), gr.Text(label = 'datetime')], './flagged_data_points')
|
20 |
|
|
|
21 |
logging_js = '''
|
22 |
function magicFunc(x){
|
23 |
let script = document.createElement('script');
|
24 |
-
script.
|
25 |
document.head.appendChild(script);
|
26 |
}
|
27 |
'''
|
@@ -29,6 +29,12 @@ function magicFunc(x){
|
|
29 |
dataset = load_dataset("bradley6597/illustration-test", data_files = 'data.csv')
|
30 |
df = pd.DataFrame(dataset['train']).drop_duplicates()
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
ill_links = df.copy()
|
33 |
ill_links = ill_links[ill_links['Description'] != 'Moved'].copy()
|
34 |
ill_links['code'] = ill_links['link'].str.replace("https://drive.google.com/file/d/", "", regex = False)
|
@@ -36,62 +42,72 @@ ill_links['code'] = ill_links['code'].str.replace("/view?usp=drivesdk", "", rege
|
|
36 |
ill_links['filename'] = ill_links['file'].str.replace(".*\\/", "", regex = True)
|
37 |
# ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=k'
|
38 |
ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=w320-h304'
|
39 |
-
ill_links['image_code'] = '<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a></center>'
|
40 |
ill_links['shared_drive'] = ill_links['file'].str.replace("/content/drive/Shareddrives/", "", regex = False)
|
41 |
ill_links['shared_drive'] = ill_links['shared_drive'].str.replace("(.*?)\\/.*", "\\1", regex = True)
|
42 |
ill_links['Description'] = ill_links['Description'].str.replace("No Description", "", regex = False)
|
43 |
|
44 |
-
ill_links_title = ill_links.copy()
|
45 |
-
|
46 |
ill_links['ID'] = ill_links.index
|
47 |
-
ill_links_title['ID'] = ill_links_title.index
|
48 |
ill_links['title'] = ill_links['filename']
|
49 |
-
ill_links_title['title'] = ill_links_title['filename']
|
50 |
ill_links['url'] = ill_links['image_code']
|
51 |
-
ill_links_title['url'] = ill_links_title['image_code']
|
52 |
-
ill_links['abstract'] = ill_links['filename'].str.replace("\\-|\\_", " ", regex = True) + ' ' + ill_links['Description'].str.replace(",", " ", regex = False).astype(str)
|
53 |
-
ill_links_title['abstract'] = ill_links_title['filename'].str.replace('\\-|\\_', " ", regex = True)
|
54 |
ill_links['filepath'] = ill_links['file']
|
55 |
-
ill_links_title['filepath'] = ill_links_title['file']
|
56 |
ill_links['post_filepath'] = ill_links['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True)
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
60 |
|
61 |
ill_check_lst = []
|
62 |
for i in range(0, 5):
|
63 |
-
tmp_links = ill_links['
|
64 |
-
tmp_links =
|
65 |
-
tmp_links = re.sub("(.*)>.*?<\\/a>", "\\1></a>", tmp_links)
|
66 |
-
tmp_links = tmp_links.replace("<center>", "")
|
67 |
-
tmp_links = tmp_links.replace("</center>", "")
|
68 |
tmp_links = f'<p>{i}</p>' + tmp_links
|
69 |
ill_check_lst.append(tmp_links)
|
70 |
ill_check_df = pd.DataFrame(ill_check_lst).T
|
71 |
ill_check_html = ill_check_df.to_html(escape = False, render_links = True, index = False, header = False)
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
ind_main, doc_main, tf_main = funky.index_documents(ill_links)
|
|
|
74 |
ind_title, doc_title, tf_title = funky.index_documents(ill_links_title)
|
75 |
-
|
|
|
|
|
76 |
|
77 |
def same_auth(username, password):
|
78 |
return(username == os.environ['username']) & (password == os.environ['password'])
|
79 |
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
82 |
if search_title:
|
83 |
-
output = funky.search(tf_title, doc_title, ind_title, search_text, search_type = 'AND', ranking = True)
|
84 |
else:
|
85 |
output = funky.search(tf_main, doc_main, ind_main, search_text, search_type='AND', ranking = True)
|
|
|
|
|
|
|
|
|
86 |
output = [x for o in output for x in o if type(x) is not float]
|
|
|
|
|
87 |
|
88 |
if len(output) > 0:
|
89 |
|
90 |
output_df = (pd.DataFrame(output)
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
|
96 |
output_df['url'] = output_df['url'].str.replace("/u/0/", f"/u/{int(user_num)}/", regex = False)
|
97 |
if len(sd) == 1:
|
@@ -118,6 +134,10 @@ def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_tit
|
|
118 |
output_df2 = output_df2.sort_values(by = ['title'], ascending = True)
|
119 |
|
120 |
total_returned = 'No. of Results to Return (Total: ' + str(output_df2.shape[0]) + ')'
|
|
|
|
|
|
|
|
|
121 |
if max_results != 'All':
|
122 |
output_df2 = output_df2.head(int(max_results))
|
123 |
output_df2 = output_df2[['url']].reset_index(drop = True)
|
@@ -139,12 +159,13 @@ def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_tit
|
|
139 |
|
140 |
if final_df.shape[0] == 0 :
|
141 |
final_df = pd.DataFrame(['<h3>No Results Found :(</h3>'])
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
return('<center>' +
|
146 |
final_df.to_html(escape = False, render_links = True, index = False, header = False) +
|
147 |
-
'</center>',
|
|
|
|
|
148 |
|
149 |
|
150 |
def search_logging(x: str, request: gr.Request):
|
@@ -157,7 +178,6 @@ back_to_top_btn_html = '''
|
|
157 |
</button>
|
158 |
'''
|
159 |
|
160 |
-
|
161 |
style = '''
|
162 |
footer{
|
163 |
display: none !important;
|
@@ -188,6 +208,32 @@ td img{
|
|
188 |
padding: .5px;
|
189 |
border-radius: 4px;
|
190 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
'''
|
192 |
|
193 |
with gr.Blocks(css=style) as app:
|
@@ -206,21 +252,21 @@ with gr.Blocks(css=style) as app:
|
|
206 |
sort_by = gr.Dropdown(choices = ['Relevance', 'Date Created', 'A-Z'], value = 'Relevance', multiselect = False, label = 'Sort By')
|
207 |
max_return = gr.Dropdown(choices = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'], value = '50', multiselect = False, label = 'No. of Results to Return (Total: 0)')
|
208 |
with gr.Row():
|
209 |
-
search_button = gr.Button(value="Search!")
|
210 |
with gr.Row():
|
211 |
-
output_df = gr.HTML()
|
212 |
back_top_btn = gr.HTML(back_to_top_btn_html)
|
213 |
-
|
214 |
-
|
|
|
215 |
search_button.click(search_logging, inputs=[search_prompt], outputs=None)
|
216 |
search_prompt.submit(search_logging, inputs=[search_prompt], outputs=None)
|
217 |
-
|
218 |
app.load(_js = logging_js)
|
219 |
|
220 |
app.auth = (same_auth)
|
221 |
app.auth_message = ''
|
222 |
|
223 |
-
|
224 |
fapi = FastAPI()
|
225 |
|
226 |
fapi.add_middleware(SessionMiddleware, secret_key=os.environ['session_key'])
|
@@ -243,10 +289,8 @@ async def track(url: str, q: str, request: Request):
|
|
243 |
logger.flag([url, q, request.cookies['access-token'], str(datetime.now())])
|
244 |
return {"message": "ok"}
|
245 |
|
246 |
-
|
247 |
# mount Gradio app to FastAPI app
|
248 |
app2 = gr.mount_gradio_app(fapi, app, path="/")
|
249 |
# serve the app
|
250 |
if __name__ == "__main__":
|
251 |
-
uvicorn.run(app2, host="0.0.0.0", port=7860)
|
252 |
-
|
|
|
1 |
+
import functions as funky # need to enable this for Hugging Face
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
import os
|
|
|
14 |
|
15 |
login(token = os.environ['HUB_TOKEN'])
|
16 |
|
|
|
17 |
logger = gr.HuggingFaceDatasetSaver(os.environ['HUB_TOKEN'], dataset_name='illustration_gdrive_logging_main', organization=None, private=True)
|
18 |
logger.setup([gr.Text(label="clicked_url"), gr.Text(label="seach_term"), gr.Text(label = 'sessionhash'), gr.Text(label = 'datetime')], './flagged_data_points')
|
19 |
|
20 |
+
|
21 |
logging_js = '''
|
22 |
function magicFunc(x){
|
23 |
let script = document.createElement('script');
|
24 |
+
script.src = "file/all_js_functions.js"
|
25 |
document.head.appendChild(script);
|
26 |
}
|
27 |
'''
|
|
|
29 |
dataset = load_dataset("bradley6597/illustration-test", data_files = 'data.csv')
|
30 |
df = pd.DataFrame(dataset['train']).drop_duplicates()
|
31 |
|
32 |
+
dataset_ai = load_dataset("bradley6597/illustration-test", data_files = 'ai_captions_data.csv')
|
33 |
+
ai_captions = pd.DataFrame(dataset_ai['train']).drop_duplicates()
|
34 |
+
|
35 |
+
df = df.merge(ai_captions, how = 'left', on = 'clean_link')
|
36 |
+
df['ai_description'] = df['ai_description'].fillna('')
|
37 |
+
|
38 |
ill_links = df.copy()
|
39 |
ill_links = ill_links[ill_links['Description'] != 'Moved'].copy()
|
40 |
ill_links['code'] = ill_links['link'].str.replace("https://drive.google.com/file/d/", "", regex = False)
|
|
|
42 |
ill_links['filename'] = ill_links['file'].str.replace(".*\\/", "", regex = True)
|
43 |
# ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=k'
|
44 |
ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=w320-h304'
|
45 |
+
ill_links['image_code'] = '<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a><a href="https://drive.google.com/u/0/uc?id=' + ill_links['code'] + '&export=download"><img src="/file/download_icon.png"></a><button class="submit-btn" onclick="mdFunc(this.parentNode)">Make Draggable</button></center>'
|
46 |
ill_links['shared_drive'] = ill_links['file'].str.replace("/content/drive/Shareddrives/", "", regex = False)
|
47 |
ill_links['shared_drive'] = ill_links['shared_drive'].str.replace("(.*?)\\/.*", "\\1", regex = True)
|
48 |
ill_links['Description'] = ill_links['Description'].str.replace("No Description", "", regex = False)
|
49 |
|
|
|
|
|
50 |
ill_links['ID'] = ill_links.index
|
|
|
51 |
ill_links['title'] = ill_links['filename']
|
|
|
52 |
ill_links['url'] = ill_links['image_code']
|
|
|
|
|
|
|
53 |
ill_links['filepath'] = ill_links['file']
|
|
|
54 |
ill_links['post_filepath'] = ill_links['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True)
|
55 |
+
|
56 |
+
ill_links_title = ill_links.copy()
|
57 |
+
ill_links_ai = ill_links.copy()
|
58 |
+
|
59 |
+
ill_links['abstract'] = ill_links['filename'].str.replace("\\-|\\_", " ", regex = True) + ' ' + ill_links['Description'].str.replace(",", " ", regex = False).astype(str)
|
60 |
+
ill_links_title['abstract'] = ill_links_title['filename'].str.replace('\\-|\\_', " ", regex = True)
|
61 |
+
ill_links_ai['abstract'] = ill_links_title['ai_description']
|
62 |
|
63 |
ill_check_lst = []
|
64 |
for i in range(0, 5):
|
65 |
+
tmp_links = f'https://lh3.google.com/u/{i}/d/' + ill_links['code'].iloc[0] + '=w320-h304'
|
66 |
+
tmp_links = '<img onmousedown="mdFunc(this)" src="' + tmp_links + '" style="max-height:400px; max-width:25%">'
|
|
|
|
|
|
|
67 |
tmp_links = f'<p>{i}</p>' + tmp_links
|
68 |
ill_check_lst.append(tmp_links)
|
69 |
ill_check_df = pd.DataFrame(ill_check_lst).T
|
70 |
ill_check_html = ill_check_df.to_html(escape = False, render_links = True, index = False, header = False)
|
71 |
+
|
72 |
+
ill_links = ill_links[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'post_filepath']]
|
73 |
+
ill_links_title = ill_links_title[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath']]
|
74 |
+
ill_links_ai = ill_links_ai[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath']]
|
75 |
+
|
76 |
ind_main, doc_main, tf_main = funky.index_documents(ill_links)
|
77 |
+
del ill_links
|
78 |
ind_title, doc_title, tf_title = funky.index_documents(ill_links_title)
|
79 |
+
del ill_links_title
|
80 |
+
ind_ai, doc_ai, tf_ai = funky.index_documents(ill_links_ai)
|
81 |
+
del ill_links_ai
|
82 |
|
83 |
def same_auth(username, password):
|
84 |
return(username == os.environ['username']) & (password == os.environ['password'])
|
85 |
|
86 |
|
87 |
+
|
88 |
+
def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_title, increase = None):
|
89 |
+
max_results_list = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All']
|
90 |
+
if increase:
|
91 |
+
max_results = max_results_list[max_results_list.index(max_results) + 1]
|
92 |
if search_title:
|
93 |
+
output = funky.search(tf_title, doc_title, ind_title, search_text, search_type = 'AND', ranking = True)
|
94 |
else:
|
95 |
output = funky.search(tf_main, doc_main, ind_main, search_text, search_type='AND', ranking = True)
|
96 |
+
# Don't need to order by AI as the AI ranking numbers are much lower than the default numbers
|
97 |
+
output_ai = funky.search(tf_ai, doc_ai, ind_ai, search_text, search_type = 'AND', ranking = True)
|
98 |
+
output.extend(output_ai)
|
99 |
+
|
100 |
output = [x for o in output for x in o if type(x) is not float]
|
101 |
+
|
102 |
+
load_more_visible = False
|
103 |
|
104 |
if len(output) > 0:
|
105 |
|
106 |
output_df = (pd.DataFrame(output)
|
107 |
+
.groupby('url')
|
108 |
+
.first()
|
109 |
+
.reset_index()
|
110 |
+
.drop_duplicates())
|
111 |
|
112 |
output_df['url'] = output_df['url'].str.replace("/u/0/", f"/u/{int(user_num)}/", regex = False)
|
113 |
if len(sd) == 1:
|
|
|
134 |
output_df2 = output_df2.sort_values(by = ['title'], ascending = True)
|
135 |
|
136 |
total_returned = 'No. of Results to Return (Total: ' + str(output_df2.shape[0]) + ')'
|
137 |
+
|
138 |
+
if output_df2.shape[0] > int(max_results):
|
139 |
+
load_more_visible = True
|
140 |
+
|
141 |
if max_results != 'All':
|
142 |
output_df2 = output_df2.head(int(max_results))
|
143 |
output_df2 = output_df2[['url']].reset_index(drop = True)
|
|
|
159 |
|
160 |
if final_df.shape[0] == 0 :
|
161 |
final_df = pd.DataFrame(['<h3>No Results Found :(</h3>'])
|
162 |
+
|
163 |
+
|
|
|
164 |
return('<center>' +
|
165 |
final_df.to_html(escape = False, render_links = True, index = False, header = False) +
|
166 |
+
'</center>',
|
167 |
+
gr.update(label = total_returned, value = max_results),
|
168 |
+
gr.update(visible = load_more_visible))
|
169 |
|
170 |
|
171 |
def search_logging(x: str, request: gr.Request):
|
|
|
178 |
</button>
|
179 |
'''
|
180 |
|
|
|
181 |
style = '''
|
182 |
footer{
|
183 |
display: none !important;
|
|
|
208 |
padding: .5px;
|
209 |
border-radius: 4px;
|
210 |
}
|
211 |
+
|
212 |
+
.submit-btn{
|
213 |
+
display:inline-block !important;
|
214 |
+
padding:0.7em 1.4em !important;
|
215 |
+
margin:0 0.3em 0.3em 0 !important;
|
216 |
+
border-radius:0.15em !important;
|
217 |
+
box-sizing: border-box !important;
|
218 |
+
text-decoration:none !important;
|
219 |
+
font-family:'Roboto',sans-serif !important;
|
220 |
+
text-transform:uppercase !important;
|
221 |
+
font-weight:400 !important;
|
222 |
+
color:#FFFFFF !important;
|
223 |
+
background-color:#3369ff !important;
|
224 |
+
box-shadow:inset 0 -0.6em 0 -0.35em rgba(0,0,0,0.17) !important;
|
225 |
+
text-align:center !important;
|
226 |
+
position:relative !important;
|
227 |
+
}
|
228 |
+
.submit-btn:active{
|
229 |
+
top:0.1em !important;
|
230 |
+
}
|
231 |
+
@media all and (max-width:30em){
|
232 |
+
.submit-btn{
|
233 |
+
display:block !important;
|
234 |
+
margin:0.4em auto !important;
|
235 |
+
}
|
236 |
+
}
|
237 |
'''
|
238 |
|
239 |
with gr.Blocks(css=style) as app:
|
|
|
252 |
sort_by = gr.Dropdown(choices = ['Relevance', 'Date Created', 'A-Z'], value = 'Relevance', multiselect = False, label = 'Sort By')
|
253 |
max_return = gr.Dropdown(choices = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'], value = '50', multiselect = False, label = 'No. of Results to Return (Total: 0)')
|
254 |
with gr.Row():
|
255 |
+
search_button = gr.Button(value="Search!", interactive = True)
|
256 |
with gr.Row():
|
257 |
+
output_df = gr.HTML()
|
258 |
back_top_btn = gr.HTML(back_to_top_btn_html)
|
259 |
+
load_more_results_btn = gr.Button(value = 'Load More Results', interactive = True, visible = False)
|
260 |
+
search_button.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search], outputs=[output_df, max_return, load_more_results_btn])
|
261 |
+
search_prompt.submit(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search], outputs=[output_df, max_return, load_more_results_btn])
|
262 |
search_button.click(search_logging, inputs=[search_prompt], outputs=None)
|
263 |
search_prompt.submit(search_logging, inputs=[search_prompt], outputs=None)
|
264 |
+
load_more_results_btn.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, load_more_results_btn], outputs=[output_df, max_return, load_more_results_btn])
|
265 |
app.load(_js = logging_js)
|
266 |
|
267 |
app.auth = (same_auth)
|
268 |
app.auth_message = ''
|
269 |
|
|
|
270 |
fapi = FastAPI()
|
271 |
|
272 |
fapi.add_middleware(SessionMiddleware, secret_key=os.environ['session_key'])
|
|
|
289 |
logger.flag([url, q, request.cookies['access-token'], str(datetime.now())])
|
290 |
return {"message": "ok"}
|
291 |
|
|
|
292 |
# mount Gradio app to FastAPI app
|
293 |
app2 = gr.mount_gradio_app(fapi, app, path="/")
|
294 |
# serve the app
|
295 |
if __name__ == "__main__":
|
296 |
+
uvicorn.run(app2, host="0.0.0.0", port=7860)
|
|