# my space: https://huggingface.co/spaces/vividsd/practice

# I tried to use my previous code but with some adaptions to any PDF that contains an abstract

#imports
import PyPDF2
from transformers import pipeline
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
from datasets import load_dataset
import torch
from transformers import SpeechT5HifiGan
from gradio import gr
import gradio as gr

# Now copying my code and adapting it for any PDF
def extract_abstract(pdf_file_path):
    with open(pdf_file_path, 'rb') as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        text = reader.pages[0].extract_text()
        
        # in order to extract the exact part on the first page that is useful to me, 
        # I needed to consider that the papers follow a pattern in which after the Abstract, there is an Introduction
        # and then cut the text right before the introduction
        
        abstract_start_index = text.find('Abstract')
        introduction_start_index = text.find('Introduction')
        
        if abstract_start_index == -1 or introduction_start_index == -1:
            return ""  # Abstract or introduction section not found

        abstract = text[abstract_start_index + len('Abstract'):introduction_start_index].strip()
        return abstract
    
    return ""

#to be acessed to any pdf
abstract_text = extract_abstract(pdf_file_path)
print(abstract_text)

from transformers import pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
print(summarizer(abstract_text, max_length=25, min_length=10, do_sample=False))

output = summarizer(abstract_text, max_length=26, min_length=10, do_sample=False)
summary = output[0]['summary_text']
print(summary)

# proceeding to the audio function

def audio(text):
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
  summary
  inputs = processor(text=summary, return_tensors="pt")
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
  with torch.no_grad():
    speech = vocoder(spectrogram)

speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
Audio(speech, rate=16000)

# Creating the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()

demo = gr.Interface(
    fn=audio,
    inputs=input_component,
    outputs=output_component,
    title="Reading your abstract summary outloud",
    description="Upload a PDF that contains an Abstract. Get your abstract summarized in 1 sentence and read outloud. We only accept with PDfs that contains the section Abstract followed by one called Introduction" 
)

demo.launch()