import functions as funky |
import pandas as pd |
import gradio as gr |
import os |
from datasets import load_dataset |
from huggingface_hub import login |
import numpy as np |
from fastapi import FastAPI, Request |
import uvicorn |
from starlette.middleware.sessions import SessionMiddleware |
import fastapi |
from datetime import datetime |
import re |
login(token = os.environ['HUB_TOKEN']) |
logging_js = ''' |
function magicFunc(x){ |
let script = document.createElement('script'); |
script.src = "file/js_functions.js" |
document.head.appendChild(script); |
} |
''' |
dataset = load_dataset("bradley6597/illustration-test", data_files = 'data.csv') |
df = pd.DataFrame(dataset['train']).drop_duplicates() |
dataset_ai = load_dataset("bradley6597/illustration-test", data_files = 'ai_captions_data.csv') |
ai_captions = pd.DataFrame(dataset_ai['train']).drop_duplicates() |
df = df.merge(ai_captions, how = 'left', on = 'clean_link') |
df['ai_description'] = df['ai_description'].fillna('') |
ill_links = df.copy() |
ill_links = ill_links[ill_links['Description'] != 'Moved'].copy() |
ill_links['code'] = ill_links['link'].str.replace("https://drive.google.com/file/d/", "", regex = False) |
ill_links['code'] = ill_links['code'].str.replace("/view?usp=drivesdk", "", regex = False) |
ill_links['filename'] = ill_links['file'].str.replace(".*\\/", "", regex = True) |
ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=w320-h304' |
ill_links['image_code'] = np.where(ill_links['file'].str.contains("\\.png$", regex = True), |
'<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a><br><a class="icon" href="https://drive.google.com/u/0/uc?id=' + ill_links['code'] + '&export=download"><img src="/file/file-download.png" width="30" height="30"></a><a class="icon" href="https://drive.google.com/drive/u/0/folders/' + ill_links['parent_id'] + '" target="_blank"><img src="/file/folder-small.png" width="30" height="30"></a><button class="submit-btn" onclick="mdFunc(this.parentNode)">Make Draggable</button></center>', |
'<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a><br><a class="icon" href="https://drive.google.com/u/0/uc?id=' + ill_links['code'] + '&export=download"><img src="/file/file-download.png" width="30" height="30"></a><a class="icon" href="https://drive.google.com/drive/u/0/folders/' + ill_links['parent_id'] + '" target="_blank"><img src="/file/folder-small.png" width="30" height="30"></a></center>', |
) |
ill_links['shared_drive'] = ill_links['file'].str.replace("/content/drive/Shareddrives/", "", regex = False) |
ill_links['shared_drive'] = ill_links['shared_drive'].str.replace("(.*?)\\/.*", "\\1", regex = True) |
ill_links['Description'] = ill_links['Description'].str.replace("No Description", "", regex = False) |
ill_links['ID'] = ill_links.index |
ill_links['title'] = ill_links['filename'] |
ill_links['url'] = ill_links['image_code'] |
ill_links['filepath'] = ill_links['file'] |
ill_links['post_filepath'] = ill_links['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True) |
ill_links_title = ill_links.copy() |
ill_links_ai = ill_links.copy() |
ill_links['abstract'] = ill_links['filename'].str.replace("\\-|\\_", " ", regex = True) + ' ' + ill_links['Description'].str.replace(",", " ", regex = False).astype(str) |
ill_links_title['abstract'] = ill_links_title['filename'].str.replace('\\-|\\_', " ", regex = True) |
ill_links_ai['abstract'] = ill_links_title['ai_description'] |
ill_check_lst = [] |
for i in range(0, 5): |
tmp_links = f'https://lh3.google.com/u/{i}/d/' + ill_links['code'].iloc[0] + '=w320-h304' |
tmp_links = '<img onmousedown="mdFunc(this)" src="' + tmp_links + '" style="max-height:400px; max-width:25%">' |
tmp_links = f'<p>{i}</p>' + tmp_links |
ill_check_lst.append(tmp_links) |
ill_check_df = pd.DataFrame(ill_check_lst).T |
ill_check_html = ill_check_df.to_html(escape = False, render_links = True, index = False, header = False) |
ill_links = ill_links[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'post_filepath', 'parent_id']] |
ill_links_title = ill_links_title[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath', 'parent_id']] |
ill_links_ai = ill_links_ai[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath', 'parent_id']] |
ind_main, doc_main, tf_main = funky.index_documents(ill_links) |
del ill_links |
ind_title, doc_title, tf_title = funky.index_documents(ill_links_title) |
del ill_links_title |
ind_ai, doc_ai, tf_ai = funky.index_documents(ill_links_ai) |
del ill_links_ai |
def same_auth(username, password): |
return(username == os.environ['username']) & (password == os.environ['password']) |
def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_title, image_type, do_not_use, increase = None): |
max_results_list = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'] |
if increase: |
max_results = max_results_list[max_results_list.index(max_results) + 1] |
if search_title: |
output = funky.search(tf_title, doc_title, ind_title, search_text, search_type = 'AND', ranking = True) |
else: |
output = funky.search(tf_main, doc_main, ind_main, search_text, search_type='AND', ranking = True) |
output_ai = funky.search(tf_ai, doc_ai, ind_ai, search_text, search_type = 'AND', ranking = True) |
output.extend(output_ai) |
output = [x for o in output for x in o if type(x) is not float] |
load_more_visible = False |
extra_info = '' |
if 'map' in search_text: |
extra_info = '<div id="mapBorder"><strong>If real-world maps are needed please check they are from the folder: Illustrations Now > Maps - Using the New Guidance (2024)</strong></div><br>' |
if len(output) > 0: |
output_df = (pd.DataFrame(output) |
.groupby('url') |
.first() |
.reset_index() |
.drop_duplicates()) |
output_df['Date Created'] = pd.to_datetime(output_df['Date Created'], format = 'mixed') |
if do_not_use: |
output_df = output_df[~output_df['filepath'].str.lower().str.contains("do.*not.*use|not.*general|don\\'t.*use|do.*no.*use", regex = True)] |
map_df = output_df[output_df['title'].str.contains('map|Map', regex = True)] |
if map_df.shape[0] > 0: |
extra_info = '<div id="mapBorder"><strong>If real-world maps are needed please check they are from the folder: Illustrations Now > Maps - Using the New Guidance (2024)</strong></div><br>' |
output_df['url'] = output_df['url'].str.replace("/u/0/", f"/u/{int(user_num)}/", regex = False) |
output_df_temp = pd.DataFrame() |
if len(sd) > 0: |
for shared in sd: |
temp_df = output_df[(output_df['filepath'].str.contains(str(shared), regex = False))] |
output_df_temp = pd.concat([output_df_temp, temp_df]) |
output_df = output_df_temp.sort_index() |
if len(ks) > 0: |
keystage_filter = '|'.join(ks).lower() |
if search_title: |
output_df['abstract'] = output_df['abstract'] + ' ' + output_df['Description'] |
output_df['abstract'] = output_df['abstract'].str.lower() |
output_df['post_filepath'] = output_df['post_filepath'].str.lower() |
output_df['missing_desc'] = np.where(output_df['abstract'].str.contains('eyfs|ks1|ks2|ks3', regex = True), 0, 1) |
output_df2 = output_df[(output_df['abstract'].str.contains(keystage_filter, regex = True) | (output_df['missing_desc'] == 1))].copy() |
output_df2 = output_df2[(output_df2['post_filepath'].str.contains(keystage_filter, regex = True))] |
if output_df2.shape[0] == 0: |
output_df2 = output_df[(output_df['post_filepath'].str.contains(keystage_filter, regex = True))] |
else: |
output_df['abstract'] = output_df['abstract'].str.lower() |
output_df['post_filepath'] = output_df['post_filepath'].str.lower() |
output_df['missing_desc'] = np.where(output_df['abstract'].str.contains('eyfs|ks1|ks2|ks3', regex = True), 0, 1) |
output_df2 = output_df |
output_df2['ind'] = output_df2.index |
min_parent_score = output_df2.groupby('parent_id')['ind'].min().reset_index() |
min_parent_score.columns = ['parent_id', 'min_parent_ind'] |
output_df2 = output_df2.merge(min_parent_score, how = 'left', on = 'parent_id') |
if sort_by == 'Relevance': |
output_df2 = output_df2.sort_values(by = ['missing_desc', 'min_parent_ind'], ascending = [True, True]) |
elif sort_by == 'Date Created': |
output_df2 = output_df2.sort_values(by = ['Date Created'], ascending = False) |
elif sort_by == 'A-Z': |
output_df2 = output_df2.sort_values(by = ['title'], ascending = True) |
image_type_filter = '$|'.join(image_type).lower().replace("jpeg", "jpg") + '$' |
output_df2 = output_df2[output_df2['filepath'].str.contains(image_type_filter, regex = True)].reset_index(drop = True) |
total_returned = 'No. of Results to Return (Total: ' + str(output_df2.shape[0]) + ')' |
if max_results != 'All': |
if output_df2.shape[0] > int(max_results): |
load_more_visible = True |
output_df2 = output_df2.head(int(max_results)) |
output_df2 = output_df2[['url']].reset_index(drop = True) |
max_cols = 5 |
output_df2['row'] = output_df2.index % max_cols |
for x in range(0, max_cols): |
tmp = output_df2[output_df2['row'] == x].reset_index(drop = True) |
tmp = tmp[['url']] |
if x == 0: |
final_df = tmp |
else: |
final_df = pd.concat([final_df, tmp], axis = 1) |
final_df = final_df.fillna('') |
else: |
final_df = pd.DataFrame(['<h3>No Results Found :(</h3>']) |
total_returned = 'No. of Results to Return (Total: 0)' |
if final_df.shape[0] == 0 : |
final_df = pd.DataFrame(['<h3>No Results Found :(</h3>']) |
return('<center>' + |
extra_info + |
final_df.to_html(escape = False, render_links = True, index = False, header = False) + |
'</center>', |
gr.update(label = total_returned, value = max_results), |
gr.update(visible = load_more_visible)) |
def search_logging(x: str, request: gr.Request): |
x = 0 |
back_to_top_btn_html = ''' |
<button id="toTopBtn" onclick="'parentIFrame' in window ? window.parentIFrame.scrollTo({top: 0, behavior:'smooth'}) : window.scrollTo({ top: 0 })"> |
<a style="color:white; text-decoration:none;">Back to Top!</a> |
</button> |
''' |
style = ''' |
footer{ |
display: none !important; |
} |
td img{ |
background-image: |
linear-gradient(45deg, lightgrey 25%, transparent 25%), |
linear-gradient(135deg, lightgrey 25%, transparent 25%), |
linear-gradient(45deg, transparent 75%, lightgrey 75%), |
linear-gradient(135deg, transparent 75%, lightgrey 75%); |
background-size: 20px 20px; |
background-position: 0 0, 10px 0, 10px -10px, 0px 10px; |
} |
#toTopBtn { |
position: fixed; |
bottom: 10px; |
float: right; |
right: 18.5%; |
left: 77.25%; |
height: 30px; |
max-width: 100px; |
width: 100%; |
font-size: 12px; |
border-color: rgba(217,24,120, .5); |
background-color: rgba(35,153,249,.5); |
padding: .5px; |
border-radius: 4px; |
} |
.submit-btn{ |
display:inline-block !important; |
padding:0.7em 1.4em !important; |
margin:0 0.3em 0.3em 0 !important; |
border-radius:0.15em !important; |
box-sizing: border-box !important; |
text-decoration:none !important; |
font-family:'Roboto',sans-serif !important; |
text-transform:uppercase !important; |
font-weight:400 !important; |
color:#FFFFFF !important; |
background-color:#3369ff !important; |
box-shadow:inset 0 -0.6em 0 -0.35em rgba(0,0,0,0.17) !important; |
text-align:center !important; |
position:relative !important; |
} |
.submit-btn:active{ |
top:0.1em !important; |
} |
@media all and (max-width:30em){ |
.submit-btn{ |
display:block !important; |
margin:0.4em auto !important; |
} |
} |
#mapBorder { |
border-radius: 25px; |
border: 2px solid orange; |
} |
.icon { |
width:50%; |
float: left; |
} |
''' |
with gr.Blocks(css=style, |
js = logging_js |
) as app: |
with gr.Row(): |
with gr.Column(min_width = 10): |
with gr.Row(): |
gr.HTML("<center><p>If you can't see the images please make sure you are signed in to your Twinkl account on Google & you have access to the Shared Drives you are searching :)</p><p>To drag images click 'Make Draggable' button and wait until it says 'Drag It!'. After this you can drag the image into a folder on your computer</p></center>") |
gr.HTML(ill_check_html) |
user_num = gr.Number(value = 0, label = 'Put lowest number of the alarm clock you can see') |
with gr.Row(): |
with gr.Column(min_width = 0): |
search_prompt = gr.Textbox(placeholder = 'search for an illustration', label = 'Search', elem_id = 'search_term') |
title_search = gr.Checkbox(label = 'Search title only') |
do_not_use = gr.Checkbox(label = 'Remove Do Not Use Images', value = True) |
with gr.Column(min_width = 0): |
shared_drive = gr.Dropdown(choices = ['Illustrations - 01-10 to 07-22', 'Illustrations - Now', 'Shutter Stock Images', 'Beyond - Illustrations', 'DO NOT USE IN GENERAL RESOURCES - South Africa', 'Australia - Rhino Readers Illustrations', 'Aus and Nz - Phonics Illustrations', 'Twinkl Art Gallery'], multiselect = True, label = 'Shared Drive', value = ['Illustrations - 01-10 to 07-22', 'Illustrations - Now']) |
with gr.Column(min_width = 0): |
key_stage = gr.Dropdown(choices = ['EYFS', 'KS1', 'KS2', 'KS3'], multiselect = True, label = 'Key Stage', value = ['EYFS', 'KS1', 'KS2', 'KS3']) |
with gr.Column(min_width = 0): |
image_type = gr.Dropdown(choices = ['JPEG', 'PNG', 'TIF', 'TIFF'], multiselect = True, label = 'Image Type', value = ['PNG', 'JPEG', 'TIF', 'TIFF']) |
with gr.Column(min_width = 0): |
sort_by = gr.Dropdown(choices = ['Relevance', 'Date Created', 'A-Z'], value = 'Relevance', multiselect = False, label = 'Sort By') |
max_return = gr.Dropdown(choices = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'], value = '50', multiselect = False, label = 'No. of Results to Return (Total: 0)') |
with gr.Row(): |
search_button = gr.Button(value="Search!", interactive = True) |
with gr.Row(): |
output_df = gr.HTML() |
back_top_btn = gr.HTML(back_to_top_btn_html) |
load_more_results_btn = gr.Button(value = 'Load More Results', interactive = True, visible = False) |
search_button.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, image_type, do_not_use], outputs=[output_df, max_return, load_more_results_btn]) |
search_prompt.submit(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, image_type, do_not_use], outputs=[output_df, max_return, load_more_results_btn]) |
search_button.click(search_logging, inputs=[search_prompt], outputs=None) |
search_prompt.submit(search_logging, inputs=[search_prompt], outputs=None) |
load_more_results_btn.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, image_type, do_not_use, load_more_results_btn], outputs=[output_df, max_return, load_more_results_btn]) |
app.load() |
app.auth = (same_auth) |
app.auth_message = '' |
fapi = FastAPI() |
fapi.add_middleware(SessionMiddleware, secret_key=os.environ['session_key']) |
@fapi.middleware("http") |
async def add_session_hash(request: Request, call_next): |
response = await call_next(request) |
session = request.cookies.get('session') |
if session: |
response.set_cookie(key='session', value=request.cookies.get('session'), httponly=True) |
return response |
@ fapi.get("/track") |
async def track(url: str, q: str, request: Request): |
if q is None: |
q = '' |
return {"message": "ok"} |
app2 = gr.mount_gradio_app(fapi, app, path="/", allowed_paths = ["."], auth = same_auth) |
if __name__ == "__main__": |
uvicorn.run(app2, host="", port=7860) |