models / app1.py
nightfury's picture
Update app1.py
9a9a041 verified
raw
history blame
2.09 kB
import time
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
DB_DIR = os.path.join(ABS_PATH, "db")
on_load="""
async()=>{
console.log("HELLO");
}
"""
def get_documents():
return PyPDFLoader("AI Guide for Government - AI CoE.pdf","Ethics_of_Artificial_Intelligence-2.pdf","IPOL_BRI(2016)571380_EN.pdf").load()
#17357182991031590738file.pdf
def extract_pdfs(x, request: gr.Request, progress=gr.Progress()):
progress(0, desc="Test", unit = "Files")
print("request", request)
# Delete existing index directory and recreate the directory
if os.path.exists(DB_DIR):
import shutil
shutil.rmtree(DB_DIR, ignore_errors=True)
os.mkdir(DB_DIR)
documents = []
all_text = ""
for num, doc in enumerate(progress.tqdm(get_documents())):
print(" {num} DocPg : ", doc.page_content)
doc.page_content = replace_newlines_and_spaces(doc.page_content)
documents.append(doc)
all_text += doc.page_content
time.sleep(0.1)
return documents, all_text
def replace_newlines_and_spaces(text):
# Replace all newline characters with spaces
text = text.replace("\n", " ")
# Replace multiple spaces with a single space
text = re.sub(r'\s+', ' ', text)
return text
def test(x, request: gr.Request, progress=gr.Progress()):
progress(0, desc="Test", unit = "Files")
print("request", request)
a = "abcdefghijklmnopqrstuv"
for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
time.sleep(0.1)
return a
with gr.Blocks() as demo:
selected = gr.Dataframe(
interactive=False,
col_count=(1, "fixed"),
headers=["Selected Files"],
)
prog = gr.HTML(
value="<h3 style='text-align: center'> Processing...<h1>"
)
#gr.Interface(test, inputs=[selected])
b = gr.Button()
b.click(test, selected, prog)
demo.load(extract_pdfs, inputs=None, outputs=[prog, selected]) #, _js=on_load)
demo.launch()