arad1367's picture
Update app.py
6f86b8b verified
import gradio as gr
from transformers import pipeline
import fitz
import spaces
# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to summarize text
def summarize_text(text, model, max_length=1024):
# Split the input text into smaller chunks
chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
# Summarize each chunk separately (You can use list comprehension)
summaries = []
for chunk in chunks:
summary = model(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
summaries.append(summary)
# Concatenate the summaries into a single string
summary = ' '.join(summaries)
return summary
# Function to read PDF and summarize
def summarize_pdf(pdf_file, model):
with fitz.open(pdf_file.name) as doc:
text = ""
for page in doc:
text += page.get_text()
return summarize_text(text, model)
@spaces.GPU(duration=60)
def summarize(input_type, input_text, uploaded_file):
try:
if input_type == "Text":
summary = summarize_text(input_text, summarizer)
else:
summary = summarize_pdf(uploaded_file, summarizer)
return summary
except RuntimeError as e:
if "CUDA out of memory" in str(e):
return "There was a problem summarizing the text due to insufficient GPU memory. Please try again with a smaller input."
else:
return "There was a problem summarizing the text. Please try again later."
except gradio.exceptions.Error as e:
if "GPU task aborted" in str(e):
return "The summarization process took too long and was aborted. Please try again with a smaller input."
else:
return "There was a problem summarizing the text. Please try again later."
except Exception as e:
return "There was a problem summarizing the text. Please try again later."
# Define the footer
footer = """
<div style="text-align: center; margin-top: 20px;">
<a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
<a href="https://github.com/arad1367" target="_blank">GitHub</a> |
<a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
<br>
Made with πŸ’– by Pejman Ebrahimi
</div>
"""
# Define the inputs and outputs
inputs = [
gr.Radio(["Text", "PDF"], label="Input Type"),
gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False),
gr.File(label="Upload PDF file", visible=False)
]
outputs = [
gr.Textbox(label="Summary"),
gr.HTML(footer)
]
# Define the submit button
submit_btn = gr.Button("Submit")
# Define the Gradio interface
with gr.Blocks(theme='gradio/soft') as app:
gr.Markdown("# Text and PDF Summarization App")
gr.Markdown("Note: This model can handle a maximum of 1024 tokens. A token is a unit of text that the model can process at a time. When summarizing text, the input text is split into smaller chunks of up to 1024 tokens each, and each chunk is summarized separately. The summaries are then concatenated into a single summary.")
with gr.Row():
input_type = gr.Radio(["Text", "PDF"], label="Input Type")
with gr.Row():
input_text = gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False)
uploaded_file = gr.File(label="Upload PDF file", visible=False)
with gr.Row():
submit_btn = gr.Button("Submit")
with gr.Row():
summary = gr.Textbox(label="Summary")
with gr.Row():
footer = gr.HTML(footer)
# Define the change event handler for the input type radio buttons
def input_type_change(input_type):
if input_type == "Text":
return {input_text: gr.Textbox(visible=True), uploaded_file: gr.File(visible=False)}
else:
return {input_text: gr.Textbox(visible=False), uploaded_file: gr.File(visible=True)}
input_type.change(fn=input_type_change, inputs=[input_type], outputs=[input_text, uploaded_file])
# Define the click event handler for the submit button
submit_btn.click(fn=summarize, inputs=[input_type, input_text, uploaded_file], outputs=[summary])
# Launch the Gradio interface
if __name__ == "__main__":
app.launch()