bradley6597 commited on
Commit
fb7f2e1
1 Parent(s): a3af591

Move from Dev environment

Browse files
Files changed (1) hide show
  1. app.py +86 -42
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import functions as funky
2
  import pandas as pd
3
  import gradio as gr
4
  import os
@@ -14,14 +14,14 @@ import re
14
 
15
  login(token = os.environ['HUB_TOKEN'])
16
 
17
-
18
  logger = gr.HuggingFaceDatasetSaver(os.environ['HUB_TOKEN'], dataset_name='illustration_gdrive_logging_main', organization=None, private=True)
19
  logger.setup([gr.Text(label="clicked_url"), gr.Text(label="seach_term"), gr.Text(label = 'sessionhash'), gr.Text(label = 'datetime')], './flagged_data_points')
20
 
 
21
  logging_js = '''
22
  function magicFunc(x){
23
  let script = document.createElement('script');
24
- script.innerHTML = "async function magicFunc(x){let z = document.getElementById('search_term').getElementsByTagName('textarea')[0].value; await fetch('/track?url=' + x + '&q=' + z)}";
25
  document.head.appendChild(script);
26
  }
27
  '''
@@ -29,6 +29,12 @@ function magicFunc(x){
29
  dataset = load_dataset("bradley6597/illustration-test", data_files = 'data.csv')
30
  df = pd.DataFrame(dataset['train']).drop_duplicates()
31
 
 
 
 
 
 
 
32
  ill_links = df.copy()
33
  ill_links = ill_links[ill_links['Description'] != 'Moved'].copy()
34
  ill_links['code'] = ill_links['link'].str.replace("https://drive.google.com/file/d/", "", regex = False)
@@ -36,62 +42,72 @@ ill_links['code'] = ill_links['code'].str.replace("/view?usp=drivesdk", "", rege
36
  ill_links['filename'] = ill_links['file'].str.replace(".*\\/", "", regex = True)
37
  # ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=k'
38
  ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=w320-h304'
39
- ill_links['image_code'] = '<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a></center>'
40
  ill_links['shared_drive'] = ill_links['file'].str.replace("/content/drive/Shareddrives/", "", regex = False)
41
  ill_links['shared_drive'] = ill_links['shared_drive'].str.replace("(.*?)\\/.*", "\\1", regex = True)
42
  ill_links['Description'] = ill_links['Description'].str.replace("No Description", "", regex = False)
43
 
44
- ill_links_title = ill_links.copy()
45
-
46
  ill_links['ID'] = ill_links.index
47
- ill_links_title['ID'] = ill_links_title.index
48
  ill_links['title'] = ill_links['filename']
49
- ill_links_title['title'] = ill_links_title['filename']
50
  ill_links['url'] = ill_links['image_code']
51
- ill_links_title['url'] = ill_links_title['image_code']
52
- ill_links['abstract'] = ill_links['filename'].str.replace("\\-|\\_", " ", regex = True) + ' ' + ill_links['Description'].str.replace(",", " ", regex = False).astype(str)
53
- ill_links_title['abstract'] = ill_links_title['filename'].str.replace('\\-|\\_', " ", regex = True)
54
  ill_links['filepath'] = ill_links['file']
55
- ill_links_title['filepath'] = ill_links_title['file']
56
  ill_links['post_filepath'] = ill_links['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True)
57
- ill_links_title['post_filepath'] = ill_links_title['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True)
58
- ill_links = ill_links[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'post_filepath']]
59
- ill_links_title = ill_links_title[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath']]
 
 
 
 
60
 
61
  ill_check_lst = []
62
  for i in range(0, 5):
63
- tmp_links = ill_links['url'].iloc[0].replace("/u/0/", f"/u/{i}/")
64
- tmp_links = tmp_links.replace('max-width:200px', 'max-width:25%')
65
- tmp_links = re.sub("(.*)>.*?<\\/a>", "\\1></a>", tmp_links)
66
- tmp_links = tmp_links.replace("<center>", "")
67
- tmp_links = tmp_links.replace("</center>", "")
68
  tmp_links = f'<p>{i}</p>' + tmp_links
69
  ill_check_lst.append(tmp_links)
70
  ill_check_df = pd.DataFrame(ill_check_lst).T
71
  ill_check_html = ill_check_df.to_html(escape = False, render_links = True, index = False, header = False)
72
-
 
 
 
 
73
  ind_main, doc_main, tf_main = funky.index_documents(ill_links)
 
74
  ind_title, doc_title, tf_title = funky.index_documents(ill_links_title)
75
-
 
 
76
 
77
  def same_auth(username, password):
78
  return(username == os.environ['username']) & (password == os.environ['password'])
79
 
80
 
81
- def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_title):
 
 
 
 
82
  if search_title:
83
- output = funky.search(tf_title, doc_title, ind_title, search_text, search_type = 'AND', ranking = True)
84
  else:
85
  output = funky.search(tf_main, doc_main, ind_main, search_text, search_type='AND', ranking = True)
 
 
 
 
86
  output = [x for o in output for x in o if type(x) is not float]
 
 
87
 
88
  if len(output) > 0:
89
 
90
  output_df = (pd.DataFrame(output)
91
- .groupby('url')
92
- .first()
93
- .reset_index()
94
- .drop_duplicates())
95
 
96
  output_df['url'] = output_df['url'].str.replace("/u/0/", f"/u/{int(user_num)}/", regex = False)
97
  if len(sd) == 1:
@@ -118,6 +134,10 @@ def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_tit
118
  output_df2 = output_df2.sort_values(by = ['title'], ascending = True)
119
 
120
  total_returned = 'No. of Results to Return (Total: ' + str(output_df2.shape[0]) + ')'
 
 
 
 
121
  if max_results != 'All':
122
  output_df2 = output_df2.head(int(max_results))
123
  output_df2 = output_df2[['url']].reset_index(drop = True)
@@ -139,12 +159,13 @@ def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_tit
139
 
140
  if final_df.shape[0] == 0 :
141
  final_df = pd.DataFrame(['<h3>No Results Found :(</h3>'])
142
- total_returned = 'No. of Results to Return (Total: 0)'
143
-
144
-
145
  return('<center>' +
146
  final_df.to_html(escape = False, render_links = True, index = False, header = False) +
147
- '</center>', gr.update(label = total_returned))
 
 
148
 
149
 
150
  def search_logging(x: str, request: gr.Request):
@@ -157,7 +178,6 @@ back_to_top_btn_html = '''
157
  </button>
158
  '''
159
 
160
-
161
  style = '''
162
  footer{
163
  display: none !important;
@@ -188,6 +208,32 @@ td img{
188
  padding: .5px;
189
  border-radius: 4px;
190
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  '''
192
 
193
  with gr.Blocks(css=style) as app:
@@ -206,21 +252,21 @@ with gr.Blocks(css=style) as app:
206
  sort_by = gr.Dropdown(choices = ['Relevance', 'Date Created', 'A-Z'], value = 'Relevance', multiselect = False, label = 'Sort By')
207
  max_return = gr.Dropdown(choices = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'], value = '50', multiselect = False, label = 'No. of Results to Return (Total: 0)')
208
  with gr.Row():
209
- search_button = gr.Button(value="Search!")
210
  with gr.Row():
211
- output_df = gr.HTML()
212
  back_top_btn = gr.HTML(back_to_top_btn_html)
213
- search_button.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search], outputs=[output_df, max_return])
214
- search_prompt.submit(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search], outputs=[output_df, max_return])
 
215
  search_button.click(search_logging, inputs=[search_prompt], outputs=None)
216
  search_prompt.submit(search_logging, inputs=[search_prompt], outputs=None)
217
-
218
  app.load(_js = logging_js)
219
 
220
  app.auth = (same_auth)
221
  app.auth_message = ''
222
 
223
-
224
  fapi = FastAPI()
225
 
226
  fapi.add_middleware(SessionMiddleware, secret_key=os.environ['session_key'])
@@ -243,10 +289,8 @@ async def track(url: str, q: str, request: Request):
243
  logger.flag([url, q, request.cookies['access-token'], str(datetime.now())])
244
  return {"message": "ok"}
245
 
246
-
247
  # mount Gradio app to FastAPI app
248
  app2 = gr.mount_gradio_app(fapi, app, path="/")
249
  # serve the app
250
  if __name__ == "__main__":
251
- uvicorn.run(app2, host="0.0.0.0", port=7860)
252
-
 
1
+ import functions as funky # need to enable this for Hugging Face
2
  import pandas as pd
3
  import gradio as gr
4
  import os
 
14
 
15
  login(token = os.environ['HUB_TOKEN'])
16
 
 
17
  logger = gr.HuggingFaceDatasetSaver(os.environ['HUB_TOKEN'], dataset_name='illustration_gdrive_logging_main', organization=None, private=True)
18
  logger.setup([gr.Text(label="clicked_url"), gr.Text(label="seach_term"), gr.Text(label = 'sessionhash'), gr.Text(label = 'datetime')], './flagged_data_points')
19
 
20
+
21
  logging_js = '''
22
  function magicFunc(x){
23
  let script = document.createElement('script');
24
+ script.src = "file/all_js_functions.js"
25
  document.head.appendChild(script);
26
  }
27
  '''
 
29
  dataset = load_dataset("bradley6597/illustration-test", data_files = 'data.csv')
30
  df = pd.DataFrame(dataset['train']).drop_duplicates()
31
 
32
+ dataset_ai = load_dataset("bradley6597/illustration-test", data_files = 'ai_captions_data.csv')
33
+ ai_captions = pd.DataFrame(dataset_ai['train']).drop_duplicates()
34
+
35
+ df = df.merge(ai_captions, how = 'left', on = 'clean_link')
36
+ df['ai_description'] = df['ai_description'].fillna('')
37
+
38
  ill_links = df.copy()
39
  ill_links = ill_links[ill_links['Description'] != 'Moved'].copy()
40
  ill_links['code'] = ill_links['link'].str.replace("https://drive.google.com/file/d/", "", regex = False)
 
42
  ill_links['filename'] = ill_links['file'].str.replace(".*\\/", "", regex = True)
43
  # ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=k'
44
  ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=w320-h304'
45
+ ill_links['image_code'] = '<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a><a href="https://drive.google.com/u/0/uc?id=' + ill_links['code'] + '&export=download"><img src="/file/download_icon.png"></a><button class="submit-btn" onclick="mdFunc(this.parentNode)">Make Draggable</button></center>'
46
  ill_links['shared_drive'] = ill_links['file'].str.replace("/content/drive/Shareddrives/", "", regex = False)
47
  ill_links['shared_drive'] = ill_links['shared_drive'].str.replace("(.*?)\\/.*", "\\1", regex = True)
48
  ill_links['Description'] = ill_links['Description'].str.replace("No Description", "", regex = False)
49
 
 
 
50
  ill_links['ID'] = ill_links.index
 
51
  ill_links['title'] = ill_links['filename']
 
52
  ill_links['url'] = ill_links['image_code']
 
 
 
53
  ill_links['filepath'] = ill_links['file']
 
54
  ill_links['post_filepath'] = ill_links['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True)
55
+
56
+ ill_links_title = ill_links.copy()
57
+ ill_links_ai = ill_links.copy()
58
+
59
+ ill_links['abstract'] = ill_links['filename'].str.replace("\\-|\\_", " ", regex = True) + ' ' + ill_links['Description'].str.replace(",", " ", regex = False).astype(str)
60
+ ill_links_title['abstract'] = ill_links_title['filename'].str.replace('\\-|\\_', " ", regex = True)
61
+ ill_links_ai['abstract'] = ill_links_title['ai_description']
62
 
63
  ill_check_lst = []
64
  for i in range(0, 5):
65
+ tmp_links = f'https://lh3.google.com/u/{i}/d/' + ill_links['code'].iloc[0] + '=w320-h304'
66
+ tmp_links = '<img onmousedown="mdFunc(this)" src="' + tmp_links + '" style="max-height:400px; max-width:25%">'
 
 
 
67
  tmp_links = f'<p>{i}</p>' + tmp_links
68
  ill_check_lst.append(tmp_links)
69
  ill_check_df = pd.DataFrame(ill_check_lst).T
70
  ill_check_html = ill_check_df.to_html(escape = False, render_links = True, index = False, header = False)
71
+
72
+ ill_links = ill_links[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'post_filepath']]
73
+ ill_links_title = ill_links_title[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath']]
74
+ ill_links_ai = ill_links_ai[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath']]
75
+
76
  ind_main, doc_main, tf_main = funky.index_documents(ill_links)
77
+ del ill_links
78
  ind_title, doc_title, tf_title = funky.index_documents(ill_links_title)
79
+ del ill_links_title
80
+ ind_ai, doc_ai, tf_ai = funky.index_documents(ill_links_ai)
81
+ del ill_links_ai
82
 
83
  def same_auth(username, password):
84
  return(username == os.environ['username']) & (password == os.environ['password'])
85
 
86
 
87
+
88
+ def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_title, increase = None):
89
+ max_results_list = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All']
90
+ if increase:
91
+ max_results = max_results_list[max_results_list.index(max_results) + 1]
92
  if search_title:
93
+ output = funky.search(tf_title, doc_title, ind_title, search_text, search_type = 'AND', ranking = True)
94
  else:
95
  output = funky.search(tf_main, doc_main, ind_main, search_text, search_type='AND', ranking = True)
96
+ # Don't need to order by AI as the AI ranking numbers are much lower than the default numbers
97
+ output_ai = funky.search(tf_ai, doc_ai, ind_ai, search_text, search_type = 'AND', ranking = True)
98
+ output.extend(output_ai)
99
+
100
  output = [x for o in output for x in o if type(x) is not float]
101
+
102
+ load_more_visible = False
103
 
104
  if len(output) > 0:
105
 
106
  output_df = (pd.DataFrame(output)
107
+ .groupby('url')
108
+ .first()
109
+ .reset_index()
110
+ .drop_duplicates())
111
 
112
  output_df['url'] = output_df['url'].str.replace("/u/0/", f"/u/{int(user_num)}/", regex = False)
113
  if len(sd) == 1:
 
134
  output_df2 = output_df2.sort_values(by = ['title'], ascending = True)
135
 
136
  total_returned = 'No. of Results to Return (Total: ' + str(output_df2.shape[0]) + ')'
137
+
138
+ if output_df2.shape[0] > int(max_results):
139
+ load_more_visible = True
140
+
141
  if max_results != 'All':
142
  output_df2 = output_df2.head(int(max_results))
143
  output_df2 = output_df2[['url']].reset_index(drop = True)
 
159
 
160
  if final_df.shape[0] == 0 :
161
  final_df = pd.DataFrame(['<h3>No Results Found :(</h3>'])
162
+
163
+
 
164
  return('<center>' +
165
  final_df.to_html(escape = False, render_links = True, index = False, header = False) +
166
+ '</center>',
167
+ gr.update(label = total_returned, value = max_results),
168
+ gr.update(visible = load_more_visible))
169
 
170
 
171
  def search_logging(x: str, request: gr.Request):
 
178
  </button>
179
  '''
180
 
 
181
  style = '''
182
  footer{
183
  display: none !important;
 
208
  padding: .5px;
209
  border-radius: 4px;
210
  }
211
+
212
+ .submit-btn{
213
+ display:inline-block !important;
214
+ padding:0.7em 1.4em !important;
215
+ margin:0 0.3em 0.3em 0 !important;
216
+ border-radius:0.15em !important;
217
+ box-sizing: border-box !important;
218
+ text-decoration:none !important;
219
+ font-family:'Roboto',sans-serif !important;
220
+ text-transform:uppercase !important;
221
+ font-weight:400 !important;
222
+ color:#FFFFFF !important;
223
+ background-color:#3369ff !important;
224
+ box-shadow:inset 0 -0.6em 0 -0.35em rgba(0,0,0,0.17) !important;
225
+ text-align:center !important;
226
+ position:relative !important;
227
+ }
228
+ .submit-btn:active{
229
+ top:0.1em !important;
230
+ }
231
+ @media all and (max-width:30em){
232
+ .submit-btn{
233
+ display:block !important;
234
+ margin:0.4em auto !important;
235
+ }
236
+ }
237
  '''
238
 
239
  with gr.Blocks(css=style) as app:
 
252
  sort_by = gr.Dropdown(choices = ['Relevance', 'Date Created', 'A-Z'], value = 'Relevance', multiselect = False, label = 'Sort By')
253
  max_return = gr.Dropdown(choices = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'], value = '50', multiselect = False, label = 'No. of Results to Return (Total: 0)')
254
  with gr.Row():
255
+ search_button = gr.Button(value="Search!", interactive = True)
256
  with gr.Row():
257
+ output_df = gr.HTML()
258
  back_top_btn = gr.HTML(back_to_top_btn_html)
259
+ load_more_results_btn = gr.Button(value = 'Load More Results', interactive = True, visible = False)
260
+ search_button.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search], outputs=[output_df, max_return, load_more_results_btn])
261
+ search_prompt.submit(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search], outputs=[output_df, max_return, load_more_results_btn])
262
  search_button.click(search_logging, inputs=[search_prompt], outputs=None)
263
  search_prompt.submit(search_logging, inputs=[search_prompt], outputs=None)
264
+ load_more_results_btn.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, load_more_results_btn], outputs=[output_df, max_return, load_more_results_btn])
265
  app.load(_js = logging_js)
266
 
267
  app.auth = (same_auth)
268
  app.auth_message = ''
269
 
 
270
  fapi = FastAPI()
271
 
272
  fapi.add_middleware(SessionMiddleware, secret_key=os.environ['session_key'])
 
289
  logger.flag([url, q, request.cookies['access-token'], str(datetime.now())])
290
  return {"message": "ok"}
291
 
 
292
  # mount Gradio app to FastAPI app
293
  app2 = gr.mount_gradio_app(fapi, app, path="/")
294
  # serve the app
295
  if __name__ == "__main__":
296
+ uvicorn.run(app2, host="0.0.0.0", port=7860)