# Here are the imports
import PyPDF2
import re
import torch
from transformers import pipeline
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
import gradio as gr
import io
import numpy as np
import soundfile as sf
import tempfile

# Here is the code

# Function to extract and clean abstract from PDF
def extract_and_clean_abstract(uploaded_file):
    if uploaded_file is None:
        return "No file uploaded."

    # Read the file using its temporary file path
    with open(uploaded_file.name, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        full_text = ""
        for page in reader.pages:
            full_text += page.extract_text()

    # Find the abstract
    pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
    match = re.search(pattern, full_text, re.DOTALL)

    if match:
        abstract = match.group(2).strip()
    else:
        return "Abstract not found."

    # Clean the abstract
    cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')

    return cleaned_abstract

# Function to summarize text
def summarize_text(text):
    # Initialize the summarization pipeline with the summarization model
    summarizer = pipeline(
        "summarization",
        "pszemraj/led-base-book-summary",
        device=0 if torch.cuda.is_available() else -1,
    )

    # Generate the summary
    result = summarizer(
        text,
        min_length=8,
        max_length=25,
        no_repeat_ngram_size=3,
        encoder_no_repeat_ngram_size=3,
        repetition_penalty=3.5,
        num_beams=4,
        do_sample=False,
        early_stopping=True,
    )
     # Extract the first sentence from the summary
    first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0]

    return first_sentence

# Function for text-to-speech
def text_to_speech(text):
    # Check if CUDA is available and set the device accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the TTS model and task from Hugging Face Hub
    models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
        "facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice
        arg_overrides={"vocoder": "hifigan", "fp16": False}
    )

    # Ensure the model is on the correct device
    model = models[0].to(device)

    # Update the config with the data config from the task
    TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)

    # Build the generator
    generator = task.build_generator([model], cfg)

    # Get the model input from the text
    sample = TTSHubInterface.get_model_input(task, text)
    sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device)
    sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device)

    # Generate the waveform
    wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)

    # Move the waveform to CPU if it's on GPU
    if wav.is_cuda:
        wav = wav.cpu()

    # Write the waveform to a temporary file and return the file path
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
        sf.write(tmp_file.name, wav.numpy(), rate)
        return tmp_file.name

def process_pdf(uploaded_file):
    """
    Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
    """
    abstract = extract_and_clean_abstract(uploaded_file)
    summary = summarize_text(abstract)
    audio_output = text_to_speech(summary)
    return audio_output

# Create Gradio interface
iface = gr.Interface(
    fn=process_pdf,
    inputs=gr.File(label="Upload PDF"),
    outputs=gr.Audio(label="Audio Summary"),
    title="PDF Abstract Summary to Speech",
    description="Upload only a PDF file that has an abstract. The model will extract its abstract, summarize it, and converts the summary to speech.",
    examples=[["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"], ["Article 7 Efficient Estimation of Word Representations in Vector Space.pdf"],["Article 6 BloombergGPT_ A Large Language Model for Finance.pdf"]]
)

# Run the Gradio app
iface.launch()