File size: 2,092 Bytes
6ba7703
ebe59c5
6ba7703
a21ca8d
6ba7703
ec9752e
 
9a9a041
84ac6a5
 
 
 
 
 
1415cc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec9752e
1415cc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ba7703
 
1415cc2
 
6ba7703
 
fdf645d
6ba7703
 
d9e6de6
4f90b25
 
 
 
 
 
 
 
769400f
6ba7703
 
caa9d5e
d9e6de6
1415cc2
6ba7703
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import time
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader

ABS_PATH = os.path.dirname(os.path.abspath(__file__))
DB_DIR = os.path.join(ABS_PATH, "db")
 
on_load="""
async()=>{
    console.log("HELLO");
}
"""

def get_documents():
    return PyPDFLoader("AI Guide for Government - AI CoE.pdf","Ethics_of_Artificial_Intelligence-2.pdf","IPOL_BRI(2016)571380_EN.pdf").load()
    #17357182991031590738file.pdf

def extract_pdfs(x, request: gr.Request, progress=gr.Progress()):
    progress(0, desc="Test", unit = "Files")
    print("request", request)

    # Delete existing index directory and recreate the directory
    if os.path.exists(DB_DIR):
        import shutil
        shutil.rmtree(DB_DIR, ignore_errors=True)
        os.mkdir(DB_DIR)
    
    documents = []
    all_text = ""
    for num, doc in enumerate(progress.tqdm(get_documents())):
        print(" {num} DocPg : ", doc.page_content)
        doc.page_content = replace_newlines_and_spaces(doc.page_content)
        documents.append(doc)
        all_text += doc.page_content
        time.sleep(0.1)

    return documents, all_text

def replace_newlines_and_spaces(text):
    # Replace all newline characters with spaces
    text = text.replace("\n", " ")
    # Replace multiple spaces with a single space
    text = re.sub(r'\s+', ' ', text)
    return text


def test(x, request: gr.Request, progress=gr.Progress()):
    progress(0, desc="Test", unit = "Files")
    print("request", request)
    a = "abcdefghijklmnopqrstuv"
    for letter in progress.tqdm(a, desc = "TEST", unit = "Files"):
        time.sleep(0.1)
    return a  

with gr.Blocks() as demo:

    selected = gr.Dataframe(
        interactive=False,
        col_count=(1, "fixed"),
        headers=["Selected Files"],
    )
    prog = gr.HTML(
        value="<h3 style='text-align: center'> Processing...<h1>"
    )
    #gr.Interface(test, inputs=[selected])
    b = gr.Button()
    
    b.click(test, selected, prog)
    
    demo.load(extract_pdfs, inputs=None, outputs=[prog, selected]) #, _js=on_load)    
    
demo.launch()