Spaces:

gianb
/

PDF_Summarizer_and_TTS

Runtime error

App Files Files Community

PDF_Summarizer_and_TTS / app.py

gianb

Update app.py

815bf2f 11 months ago

raw

history blame

1.84 kB

	#https://huggingface.co/spaces/gianb/PDF_Summarized_TTS

	# Here are the imports

	import gradio as gr
	import PyPDF2
	from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer
	from PyPDF2 import PdfReader
	import torch
	import soundfile as sf
	from IPython.display import Audio
	from datasets import load_dataset
	from pdfminer.high_level import extract_pages, extract_text
	from io import BytesIO

	#Here is the code

	summarization = pipeline('summarization', model='pszemraj/long-t5-tglobal-base-16384-book-summary')
	synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')

	def abstract_extract(uploaded_file):
	pdf_bytes = BytesIO(uploaded_file)
	pdf_reader = PyPDF2.PdfReader(pdf_bytes)

	abstract = ""

	for page_number in range(len(pdf_reader.pages)):
	text = pdf_reader.pages[page_number].extract_text()

	if "abstract" in text.lower():
	start_index = text.lower().find("abstract")
	end_index = text.lower().find("introduction")
	abstract = text[start_index:end_index]
	break

	return abstract

	def summarize_and_speech(pdf_file):
	abstract_text = abstract_extract(pdf_file)

	summary = summarization(abstract_text, max_length=15, min_length=10)[0]['summary_text']

	tts_output = synthesiser(summary)
	audio_data = tts_output[0]["audio"]

	return summary, audio_data

	iface = gr.Interface(
	fn=summarize_and_speech,
	inputs=gr.File(label="Upload PDF", type="binary"),
	outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary Speech")],
	live=True,
	title="Abstract Research Paper Summarizer",
	description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio."
	)

	iface.launch()