IndexingAlpha / app.py
ewgewgewg's picture
add custom insert syntax
073a71c
# GNU
import gradio as gr
from generate import generate
demo = gr.Blocks()
def attempted_items_changer(attempted_items_input):
if (not attempted_items_input.isdigit()):
return {
attempted_items: 50
}
return {
attempted_items: max(int(attempted_items_input), 0)
}
def offset_changer(offset_input):
if(not offset_input.isdigit() and not (offset_input[0] == '-' and offset_input[1:].isdigit())):
return {
offset: 0
}
return {
offset: int(offset_input)
}
def custom_changer (custom_input):
return {
custom: custom_input
}
with demo:
attempted_items = gr.State(50)
offset = gr.State(0)
custom = gr.State("")
gr.Markdown("# PDF to Index")
with gr.Column():
gr.Markdown("### Load Inputs")
uploaded_file = gr.File(
label="Upload a PDF file",
file_count="single",
type="file"
)
with gr.Row():
attempted_items_input = gr.Textbox(value="50", show_label=True, label="Attempted Generated Items")
offset_input = gr.Textbox(value="0", show_label=True, label="Page Offset")
attempted_items_input.change(attempted_items_changer, [attempted_items_input], [attempted_items])
offset_input.change(offset_changer, [offset_input], [offset])
gr.HTML("<p><em>Attempted Generated Items is the number of terms intended to be automatically generated for index (output may be slightly lower), while Page Offset is a value added to each page number found in the file. In the case of invalid values, Attempted Items will default to 50 and Page Offset will default to 0. If the fields do not produce expected values, you may be clicking too quickly -- please adjust the field, wait, and try again.</em></p>")
with gr.Row():
custom_input = gr.Textbox(value="", show_label=True, label="Custom")
custom_input.change(custom_changer, [custom_input], [custom])
gr.HTML("<p><em>You can add semicolon-separated values in Custom to add custom fields to index. Optionally, you can comma-separate terms between semicolons if you want multiple terms to contribute to a single index entry -- the first term will be the label for the index entry. If Custom does not produce expected values, you may be clicking too quickly -- please adjust the field, wait, and try again.</em></p>")
gr.Markdown("---")
with gr.Column():
gr.Markdown("### Index From PDF")
convert_button = gr.Button("Generate Index From PDF", variant="primary")
out_placeholder = gr.HTML('<p><em>Output will appear below, with <a href="https://pypi.org/project/PyPDF2/">PyPDF2</a> for preprocessing and <a href="https://www.sciencedirect.com/science/article/abs/pii/S0020025519308588?via%3Dihub">yake</a> for processing:</em></p>')
gr.Markdown("### Index")
index = gr.Textbox(
label="Index", placeholder="The index will appear here"
)
convert_button.click(
fn=generate,
inputs=[uploaded_file, attempted_items, offset, custom],
outputs=[index],
)
demo.launch()