Spaces:
Runtime error
Runtime error
File size: 4,341 Bytes
52d559e 6d97912 52d559e 6f86b8b 52d559e 83b518c 6f86b8b 52d559e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
from transformers import pipeline
import fitz
import spaces
# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to summarize text
def summarize_text(text, model, max_length=1024):
# Split the input text into smaller chunks
chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
# Summarize each chunk separately (You can use list comprehension)
summaries = []
for chunk in chunks:
summary = model(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
summaries.append(summary)
# Concatenate the summaries into a single string
summary = ' '.join(summaries)
return summary
# Function to read PDF and summarize
def summarize_pdf(pdf_file, model):
with fitz.open(pdf_file.name) as doc:
text = ""
for page in doc:
text += page.get_text()
return summarize_text(text, model)
@spaces.GPU(duration=60)
def summarize(input_type, input_text, uploaded_file):
try:
if input_type == "Text":
summary = summarize_text(input_text, summarizer)
else:
summary = summarize_pdf(uploaded_file, summarizer)
return summary
except RuntimeError as e:
if "CUDA out of memory" in str(e):
return "There was a problem summarizing the text due to insufficient GPU memory. Please try again with a smaller input."
else:
return "There was a problem summarizing the text. Please try again later."
except gradio.exceptions.Error as e:
if "GPU task aborted" in str(e):
return "The summarization process took too long and was aborted. Please try again with a smaller input."
else:
return "There was a problem summarizing the text. Please try again later."
except Exception as e:
return "There was a problem summarizing the text. Please try again later."
# Define the footer
footer = """
<div style="text-align: center; margin-top: 20px;">
<a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
<a href="https://github.com/arad1367" target="_blank">GitHub</a> |
<a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
<br>
Made with π by Pejman Ebrahimi
</div>
"""
# Define the inputs and outputs
inputs = [
gr.Radio(["Text", "PDF"], label="Input Type"),
gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False),
gr.File(label="Upload PDF file", visible=False)
]
outputs = [
gr.Textbox(label="Summary"),
gr.HTML(footer)
]
# Define the submit button
submit_btn = gr.Button("Submit")
# Define the Gradio interface
with gr.Blocks(theme='gradio/soft') as app:
gr.Markdown("# Text and PDF Summarization App")
gr.Markdown("Note: This model can handle a maximum of 1024 tokens. A token is a unit of text that the model can process at a time. When summarizing text, the input text is split into smaller chunks of up to 1024 tokens each, and each chunk is summarized separately. The summaries are then concatenated into a single summary.")
with gr.Row():
input_type = gr.Radio(["Text", "PDF"], label="Input Type")
with gr.Row():
input_text = gr.Textbox(lines=10, label="Enter Text to Summarize", visible=False)
uploaded_file = gr.File(label="Upload PDF file", visible=False)
with gr.Row():
submit_btn = gr.Button("Submit")
with gr.Row():
summary = gr.Textbox(label="Summary")
with gr.Row():
footer = gr.HTML(footer)
# Define the change event handler for the input type radio buttons
def input_type_change(input_type):
if input_type == "Text":
return {input_text: gr.Textbox(visible=True), uploaded_file: gr.File(visible=False)}
else:
return {input_text: gr.Textbox(visible=False), uploaded_file: gr.File(visible=True)}
input_type.change(fn=input_type_change, inputs=[input_type], outputs=[input_text, uploaded_file])
# Define the click event handler for the submit button
submit_btn.click(fn=summarize, inputs=[input_type, input_text, uploaded_file], outputs=[summary])
# Launch the Gradio interface
if __name__ == "__main__":
app.launch()
|