File size: 2,092 Bytes
6ba7703 ebe59c5 6ba7703 a21ca8d 6ba7703 ec9752e 9a9a041 84ac6a5 1415cc2 ec9752e 1415cc2 6ba7703 1415cc2 6ba7703 fdf645d 6ba7703 d9e6de6 4f90b25 769400f 6ba7703 caa9d5e d9e6de6 1415cc2 6ba7703 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import time
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
DB_DIR = os.path.join(ABS_PATH, "db")
on_load="""
async()=>{
console.log("HELLO");
}
"""
def get_documents():
return PyPDFLoader("AI Guide for Government - AI CoE.pdf","Ethics_of_Artificial_Intelligence-2.pdf","IPOL_BRI(2016)571380_EN.pdf").load()
#17357182991031590738file.pdf
def extract_pdfs(x, request: gr.Request, progress=gr.Progress()):
progress(0, desc="Test", unit = "Files")
print("request", request)
# Delete existing index directory and recreate the directory
if os.path.exists(DB_DIR):
import shutil
shutil.rmtree(DB_DIR, ignore_errors=True)
os.mkdir(DB_DIR)
documents = []
all_text = ""
for num, doc in enumerate(progress.tqdm(get_documents())):
print(" {num} DocPg : ", doc.page_content)
doc.page_content = replace_newlines_and_spaces(doc.page_content)
documents.append(doc)
all_text += doc.page_content
time.sleep(0.1)
return documents, all_text
def replace_newlines_and_spaces(text):
# Replace all newline characters with spaces
text = text.replace("\n", " ")
# Replace multiple spaces with a single space
text = re.sub(r'\s+', ' ', text)
return text
def test(x, request: gr.Request, progress=gr.Progress()):
progress(0, desc="Test", unit = "Files")
print("request", request)
a = "abcdefghijklmnopqrstuv"
for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
time.sleep(0.1)
return a
with gr.Blocks() as demo:
selected = gr.Dataframe(
interactive=False,
col_count=(1, "fixed"),
headers=["Selected Files"],
)
prog = gr.HTML(
value="<h3 style='text-align: center'> Processing...<h1>"
)
#gr.Interface(test, inputs=[selected])
b = gr.Button()
b.click(test, selected, prog)
demo.load(extract_pdfs, inputs=None, outputs=[prog, selected]) #, _js=on_load)
demo.launch() |