|
import time |
|
import os |
|
import gradio as gr |
|
from langchain.document_loaders import PyPDFLoader |
|
|
|
ABS_PATH = os.path.dirname(os.path.abspath(__file__)) |
|
DB_DIR = os.path.join(ABS_PATH, "db") |
|
|
|
on_load=""" |
|
async()=>{ |
|
console.log("HELLO"); |
|
} |
|
""" |
|
|
|
def get_documents(): |
|
return PyPDFLoader("AI Guide for Government - AI CoE.pdf","Ethics_of_Artificial_Intelligence-2.pdf","IPOL_BRI(2016)571380_EN.pdf").load() |
|
|
|
|
|
def extract_pdfs(x, request: gr.Request, progress=gr.Progress()): |
|
progress(0, desc="Test", unit = "Files") |
|
print("request", request) |
|
|
|
|
|
if os.path.exists(DB_DIR): |
|
import shutil |
|
shutil.rmtree(DB_DIR, ignore_errors=True) |
|
os.mkdir(DB_DIR) |
|
|
|
documents = [] |
|
all_text = "" |
|
for num, doc in enumerate(progress.tqdm(get_documents())): |
|
print(" {num} DocPg : ", doc.page_content) |
|
doc.page_content = replace_newlines_and_spaces(doc.page_content) |
|
documents.append(doc) |
|
all_text += doc.page_content |
|
time.sleep(0.1) |
|
|
|
return documents, all_text |
|
|
|
def replace_newlines_and_spaces(text): |
|
|
|
text = text.replace("\n", " ") |
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
return text |
|
|
|
|
|
def test(x, request: gr.Request, progress=gr.Progress()): |
|
progress(0, desc="Test", unit = "Files") |
|
print("request", request) |
|
a = "abcdefghijklmnopqrstuv" |
|
for letter in progress.tqdm(a, desc = "TEST", unit = "Files"): |
|
time.sleep(0.1) |
|
return a |
|
|
|
with gr.Blocks() as demo: |
|
|
|
selected = gr.Dataframe( |
|
interactive=False, |
|
col_count=(1, "fixed"), |
|
headers=["Selected Files"], |
|
) |
|
prog = gr.HTML( |
|
value="<h3 style='text-align: center'> Processing...<h1>" |
|
) |
|
|
|
b = gr.Button() |
|
|
|
b.click(test, selected, prog) |
|
|
|
demo.load(extract_pdfs, inputs=None, outputs=[prog, selected]) |
|
|
|
demo.launch() |