Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PyPDF2 import PdfReader | |
def read_pdf(*inps): | |
pdf_file, start_index, end_index, text_rule = inps | |
reader = PdfReader(pdf_file) | |
pages = reader.pages | |
text = "" | |
for page in pages[int(start_index):int(end_index)+1]: | |
sub = page.extract_text() | |
if text_rule: | |
for rule in text_rule.split(";"): | |
if rule: | |
sub = eval(rule) | |
text += sub | |
return text | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
pdf_file = gr.File(label="PDF", interactive=True) | |
with gr.Row(): | |
start_index = gr.Number(label="start_page",value=0) | |
end_index = gr.Number(label="end_page",value=0) | |
text_rule = gr.Textbox(label="rule", value='sub.replace(" ", " ");') | |
submit = gr.Button(value="submit") | |
text_output = gr.Textbox(interactive=True) | |
inputs = [pdf_file, start_index, end_index, text_rule] | |
submit.click(fn=read_pdf, inputs=inputs, outputs=text_output) | |
demo.launch() | |