Spaces:

Xuratron
/

abstract-speech-summarizer

Running

App Files Files Community

abstract-speech-summarizer / app.py

Xuratron

Update app.py

f7c9274 8 months ago

raw

history blame contribute delete

No virus

4.3 kB

	# Here are the imports
	import PyPDF2
	import re
	import torch
	from transformers import pipeline
	from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
	from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
	import gradio as gr
	import io
	import numpy as np
	import soundfile as sf
	import tempfile

	# Here is the code

	# Function to extract and clean abstract from PDF
	def extract_and_clean_abstract(uploaded_file):
	if uploaded_file is None:
	return "No file uploaded."

	# Read the file using its temporary file path
	with open(uploaded_file.name, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	full_text = ""
	for page in reader.pages:
	full_text += page.extract_text()

	# Find the abstract
	pattern = r"(Abstract\|ABSTRACT\|abstract)(.*?)(Introduction\|INTRODUCTION\|introduction\|1\|Keywords\|KEYWORDS\|keywords)"
	match = re.search(pattern, full_text, re.DOTALL)

	if match:
	abstract = match.group(2).strip()
	else:
	return "Abstract not found."

	# Clean the abstract
	cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')

	return cleaned_abstract

	# Function to summarize text
	def summarize_text(text):
	# Initialize the summarization pipeline with the summarization model
	summarizer = pipeline(
	"summarization",
	"pszemraj/led-base-book-summary",
	device=0 if torch.cuda.is_available() else -1,
	)

	# Generate the summary
	result = summarizer(
	text,
	min_length=8,
	max_length=25,
	no_repeat_ngram_size=3,
	encoder_no_repeat_ngram_size=3,
	repetition_penalty=3.5,
	num_beams=4,
	do_sample=False,
	early_stopping=True,
	)
	# Extract the first sentence from the summary
	first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0]

	return first_sentence

	# Function for text-to-speech
	def text_to_speech(text):
	# Check if CUDA is available and set the device accordingly
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load the TTS model and task from Hugging Face Hub
	models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
	"facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice
	arg_overrides={"vocoder": "hifigan", "fp16": False}
	)

	# Ensure the model is on the correct device
	model = models[0].to(device)

	# Update the config with the data config from the task
	TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)

	# Build the generator
	generator = task.build_generator([model], cfg)

	# Get the model input from the text
	sample = TTSHubInterface.get_model_input(task, text)
	sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device)
	sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device)

	# Generate the waveform
	wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)

	# Move the waveform to CPU if it's on GPU
	if wav.is_cuda:
	wav = wav.cpu()

	# Write the waveform to a temporary file and return the file path
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
	sf.write(tmp_file.name, wav.numpy(), rate)
	return tmp_file.name

	def process_pdf(uploaded_file):
	"""
	Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
	"""
	abstract = extract_and_clean_abstract(uploaded_file)
	summary = summarize_text(abstract)
	audio_output = text_to_speech(summary)
	return audio_output

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_pdf,
	inputs=gr.File(label="Upload PDF"),
	outputs=gr.Audio(label="Audio Summary"),
	title="PDF Abstract Summary to Speech",
	description="Upload only a PDF file that has an abstract. The model will extract its abstract, summarize it, and converts the summary to speech.",
	examples=[["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"], ["Article 7 Efficient Estimation of Word Representations in Vector Space.pdf"],["Article 6 BloombergGPT_ A Large Language Model for Finance.pdf"]]
	)

	# Run the Gradio app
	iface.launch()