# my space: https://huggingface.co/spaces/vividsd/practice # I tried to use my previous code but with some adaptions to any PDF that contains an abstract #imports import PyPDF2 from transformers import pipeline from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech from datasets import load_dataset import torch from transformers import SpeechT5HifiGan from gradio import gr import gradio as gr # Now copying my code and adapting it for any PDF def extract_abstract(pdf_file_path): with open(pdf_file_path, 'rb') as pdf_file: reader = PyPDF2.PdfReader(pdf_file) text = reader.pages[0].extract_text() # in order to extract the exact part on the first page that is useful to me, # I needed to consider that the papers follow a pattern in which after the Abstract, there is an Introduction # and then cut the text right before the introduction abstract_start_index = text.find('Abstract') introduction_start_index = text.find('Introduction') if abstract_start_index == -1 or introduction_start_index == -1: return "" # Abstract or introduction section not found abstract = text[abstract_start_index + len('Abstract'):introduction_start_index].strip() return abstract return "" #to be acessed to any pdf abstract_text = extract_abstract(pdf_file_path) print(abstract_text) from transformers import pipeline summarizer = pipeline("summarization", model="Falconsai/text_summarization") print(summarizer(abstract_text, max_length=25, min_length=10, do_sample=False)) output = summarizer(abstract_text, max_length=26, min_length=10, do_sample=False) summary = output[0]['summary_text'] print(summary) # proceeding to the audio function def audio(text): processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") summary inputs = processor(text=summary, return_tensors="pt") embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") with torch.no_grad(): speech = vocoder(spectrogram) speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) Audio(speech, rate=16000) # Creating the Gradio app input_component = gr.File(file_types=["pdf"]) output_component = gr.Audio() demo = gr.Interface( fn=audio, inputs=input_component, outputs=output_component, title="Reading your abstract summary outloud", description="Upload a PDF that contains an Abstract. Get your abstract summarized in 1 sentence and read outloud. We only accept with PDfs that contains the section Abstract followed by one called Introduction" ) demo.launch()