Spaces:
Runtime error
Runtime error
#https://huggingface.co/spaces/gianb/PDF_Summarized_TTS | |
# Here are the imports | |
import gradio as gr | |
import PyPDF2 | |
from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer | |
from PyPDF2 import PdfReader | |
import torch | |
import soundfile as sf | |
from IPython.display import Audio | |
from datasets import load_dataset | |
from pdfminer.high_level import extract_pages, extract_text | |
from io import BytesIO | |
#Here is the code | |
summarization = pipeline('summarization', model='pszemraj/long-t5-tglobal-base-16384-book-summary') | |
synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng') | |
def abstract_extract(uploaded_file): | |
pdf_bytes = BytesIO(uploaded_file) | |
pdf_reader = PyPDF2.PdfReader(pdf_bytes) | |
abstract = "" | |
for page_number in range(len(pdf_reader.pages)): | |
text = pdf_reader.pages[page_number].extract_text() | |
if "abstract" in text.lower(): | |
start_index = text.lower().find("abstract") | |
end_index = text.lower().find("introduction") | |
abstract = text[start_index:end_index] | |
break | |
return abstract | |
def summarize_and_speech(pdf_file): | |
abstract_text = abstract_extract(pdf_file) | |
summary = summarization(abstract_text, max_length=15, min_length=10)[0]['summary_text'] | |
tts_output = synthesiser(summary) | |
audio_data = tts_output[0]["audio"] | |
return summary, audio_data | |
iface = gr.Interface( | |
fn=summarize_and_speech, | |
inputs=gr.File(label="Upload PDF", type="binary"), | |
outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary Speech")], | |
live=True, | |
title="Abstract Research Paper Summarizer", | |
description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio." | |
) | |
iface.launch() |