File size: 4,341 Bytes
52d559e
 
 
6d97912
52d559e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f86b8b
52d559e
 
 
 
 
 
 
83b518c
 
 
 
 
6f86b8b
 
 
 
 
52d559e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from transformers import pipeline
import fitz
import spaces

# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to summarize text
def summarize_text(text, model, max_length=1024):
    # Split the input text into smaller chunks
    chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]

    # Summarize each chunk separately (You can use list comprehension)
    summaries = []
    for chunk in chunks:
        summary = model(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
        summaries.append(summary)

    # Concatenate the summaries into a single string
    summary = ' '.join(summaries)

    return summary

# Function to read PDF and summarize
def summarize_pdf(pdf_file, model):
    with fitz.open(pdf_file.name) as doc:
        text = ""
        for page in doc:
            text += page.get_text()
    return summarize_text(text, model)

@spaces.GPU(duration=60)
def summarize(input_type, input_text, uploaded_file):
    try:
        if input_type == "Text":
            summary = summarize_text(input_text, summarizer)
        else:
            summary = summarize_pdf(uploaded_file, summarizer)
        return summary
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            return "There was a problem summarizing the text due to insufficient GPU memory. Please try again with a smaller input."
        else:
            return "There was a problem summarizing the text. Please try again later."
    except gradio.exceptions.Error as e:
        if "GPU task aborted" in str(e):
            return "The summarization process took too long and was aborted. Please try again with a smaller input."
        else:
            return "There was a problem summarizing the text. Please try again later."
    except Exception as e:
        return "There was a problem summarizing the text. Please try again later."

# Define the footer
footer = """
<div style="text-align: center; margin-top: 20px;">
    <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
    <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
    <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
    <br>
    Made with πŸ’– by Pejman Ebrahimi
</div>
"""

# Define the inputs and outputs
inputs = [
    gr.Radio(["Text", "PDF"], label="Input Type"),
    gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False),
    gr.File(label="Upload PDF file", visible=False)
]
outputs = [
    gr.Textbox(label="Summary"),
    gr.HTML(footer)
]

# Define the submit button
submit_btn = gr.Button("Submit")

# Define the Gradio interface
with gr.Blocks(theme='gradio/soft') as app:
    gr.Markdown("# Text and PDF Summarization App")
    gr.Markdown("Note: This model can handle a maximum of 1024 tokens. A token is a unit of text that the model can process at a time. When summarizing text, the input text is split into smaller chunks of up to 1024 tokens each, and each chunk is summarized separately. The summaries are then concatenated into a single summary.")
    with gr.Row():
        input_type = gr.Radio(["Text", "PDF"], label="Input Type")
    with gr.Row():
        input_text = gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False)
        uploaded_file = gr.File(label="Upload PDF file", visible=False)
    with gr.Row():
        submit_btn = gr.Button("Submit")
    with gr.Row():
        summary = gr.Textbox(label="Summary")
    with gr.Row():
        footer = gr.HTML(footer)

    # Define the change event handler for the input type radio buttons
    def input_type_change(input_type):
        if input_type == "Text":
            return {input_text: gr.Textbox(visible=True), uploaded_file: gr.File(visible=False)}
        else:
            return {input_text: gr.Textbox(visible=False), uploaded_file: gr.File(visible=True)}
    input_type.change(fn=input_type_change, inputs=[input_type], outputs=[input_text, uploaded_file])

    # Define the click event handler for the submit button
    submit_btn.click(fn=summarize, inputs=[input_type, input_text, uploaded_file], outputs=[summary])

# Launch the Gradio interface
if __name__ == "__main__":
    app.launch()