File size: 3,225 Bytes
d380e2b
 
073a71c
d380e2b
 
 
3e9b30e
 
 
3717d6e
3e9b30e
 
073a71c
3e9b30e
 
 
 
 
 
 
 
 
 
d380e2b
32ba37a
 
 
 
 
d380e2b
 
3e9b30e
 
32ba37a
3e9b30e
d380e2b
 
 
 
3e9b30e
d380e2b
 
 
 
 
 
 
3e9b30e
c6fbe5d
3e9b30e
 
 
 
c6fbe5d
32ba37a
 
 
 
 
073a71c
32ba37a
3e9b30e
d380e2b
 
 
3e9b30e
 
 
 
d380e2b
3e9b30e
d380e2b
 
 
 
32ba37a
d380e2b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# GNU
import gradio as gr
from generate import generate

demo = gr.Blocks()

def attempted_items_changer(attempted_items_input):
    if (not attempted_items_input.isdigit()):
        return {
            attempted_items: 50
        }
    return {
        attempted_items: max(int(attempted_items_input), 0)
    }

def offset_changer(offset_input):
    if(not offset_input.isdigit() and not (offset_input[0] == '-' and offset_input[1:].isdigit())):
        return {
            offset: 0
        }
    return {
        offset: int(offset_input)
    }

def custom_changer (custom_input):
    return {
        custom: custom_input
    }

with demo:

    attempted_items = gr.State(50)
    offset = gr.State(0)
    custom = gr.State("")

    gr.Markdown("# PDF to Index")

    with gr.Column():

        gr.Markdown("### Load Inputs")

        uploaded_file = gr.File(
            label="Upload a PDF file",
            file_count="single",
            type="file"
        )

        with gr.Row():
            attempted_items_input = gr.Textbox(value="50", show_label=True, label="Attempted Generated Items")
            offset_input = gr.Textbox(value="0", show_label=True, label="Page Offset")
            attempted_items_input.change(attempted_items_changer, [attempted_items_input], [attempted_items])
            offset_input.change(offset_changer, [offset_input], [offset])

        gr.HTML("<p><em>Attempted Generated Items is the number of terms intended to be automatically generated for index (output may be slightly lower), while Page Offset is a value added to each page number found in the file. In the case of invalid values, Attempted Items will default to 50 and Page Offset will default to 0. If the fields do not produce expected values, you may be clicking too quickly -- please adjust the field, wait, and try again.</em></p>")

        with gr.Row():
            custom_input = gr.Textbox(value="", show_label=True, label="Custom")
            custom_input.change(custom_changer, [custom_input], [custom])

        gr.HTML("<p><em>You can add semicolon-separated values in Custom to add custom fields to index. Optionally, you can comma-separate terms between semicolons if you want multiple terms to contribute to a single index entry -- the first term will be the label for the index entry. If Custom does not produce expected values, you may be clicking too quickly -- please adjust the field, wait, and try again.</em></p>")


        gr.Markdown("---")

    with gr.Column():
            gr.Markdown("### Index From PDF")
            convert_button = gr.Button("Generate Index From PDF", variant="primary")
            out_placeholder = gr.HTML('<p><em>Output will appear below, with <a href="https://pypi.org/project/PyPDF2/">PyPDF2</a> for preprocessing and <a href="https://www.sciencedirect.com/science/article/abs/pii/S0020025519308588?via%3Dihub">yake</a> for processing:</em></p>')
            gr.Markdown("### Index")
            index = gr.Textbox(
                label="Index", placeholder="The index will appear here"
            )

    convert_button.click(
        fn=generate,
        inputs=[uploaded_file, attempted_items, offset, custom],
        outputs=[index],
    )

demo.launch()