File size: 1,076 Bytes
e98eb43
 
 
 
 
 
 
 
 
 
 
 
db7b23d
 
e98eb43
 
 
 
 
 
 
 
 
 
e974628
e98eb43
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from PyPDF2 import PdfReader

def read_pdf(*inps):
    pdf_file, start_index, end_index, text_rule = inps
    reader = PdfReader(pdf_file)
    pages = reader.pages 
    text = ""
    for page in pages[int(start_index):int(end_index)+1]:
        sub = page.extract_text()
        if text_rule:
            for rule in text_rule.split(";"):
                if rule:
                    sub = eval(rule)
        text += sub
    return text 

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            pdf_file = gr.File(label="PDF", interactive=True)
            with gr.Row():
                start_index = gr.Number(label="start_page",value=0)
                end_index = gr.Number(label="end_page",value=0)
            text_rule = gr.Textbox(label="rule", value='sub.replace("  ", " ");')
            submit = gr.Button(value="submit")
        text_output = gr.Textbox(interactive=True)
    
    inputs = [pdf_file, start_index, end_index, text_rule]
    submit.click(fn=read_pdf, inputs=inputs, outputs=text_output)

demo.launch()