import os
import fitz
from dotenv import find_dotenv, load_dotenv 
import gradio as gr
from pathlib import Path
from langchain_community.document_loaders import PyMuPDFLoader

from langchain.text_splitter import CharacterTextSplitter
from langchain_community.llms import HuggingFaceEndpoint

from pptx import Presentation

_=load_dotenv(find_dotenv())
hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN")
llm=HuggingFaceEndpoint(repo_id="Mistralai/Mistral-7B-Instruct-v0.2", temperature=0.1, max_new_tokens=1000)

def load_file (input_file):
    pages=[]
    loader = PyMuPDFLoader(input_file)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    pages = text_splitter.split_documents(documents)
    
    #limit to five pages if pages > 5
    pdf_document = fitz.open(input_file)
    pdf_writer = fitz.open()
    total_pages = pdf_document.page_count
    if total_pages > 5:
        pages=pages[:5]
    return pages

def predict (text,num_bullets):
   prompt= f"You are an expert in making presentatinos with excellent titles and summarized content in lines. Give a title (max 5 words) and summary containing a maximum of {num_bullets} lines in list format. Do not append bullets to summary lines: {text}"
   answer = llm.invoke(prompt)
   return answer

def extract_title_and_summary(answer):
    # Provided text
    text = answer
    
    # Splitting text into lines
    lines = text.strip().split('\n')
    
    # Initialize title and summary variables
    title = None
    summary = []
    
    # Iterate through the lines
    for line in lines:
        # Check if the line contains "Title:"
        if "Title:" in line:
            # Extract title
            title = line.split(":", 1)[1].strip()
        # Check if the line contains "Summary:"
        elif "Summary:" in line:
            # Extract summary lines
            summary = [line.split(":", 1)[1].strip()]
        # If we've already found the title, and the line is not empty, add it to the summary
        elif title is not None and line.strip() != "":
            summary.append(line.strip())
    
    # Join summary lines into a single string
    summary = '\n'.join(summary)
    return title, summary

def generate_presentation(input_file, num_slides,num_bullets, progress=gr.Progress()):
    
    pages=load_file (input_file)
    
    cps =  len(pages) / num_slides
    chunks_per_slide = int (cps)

    if chunks_per_slide == 0:
        chunks_per_slide=1
    
    #extract page content from pdf pages splits
    chunks = [item.page_content for item in pages]
    
    prs = Presentation() # Generate and empty presentation 

    #the heart of the method that iterates through all chunks, concatenates them, calls LLM, and generates slides 
    for i in range(0, len(chunks), chunks_per_slide): 
        # Update progress on UI
        description=f"Generating slide: {i+1}"
        progress  ((i+1)/num_slides, desc= description)
        
        #Concatenate chunks to map no. of pages with no. of slides required
        concatenated_chunks= ""
        group_of_chunks = chunks[i:i+chunks_per_slide]
        concatenated_chunks = '\n\n'.join(group_of_chunks)

        #call the LLM
        answer=predict(concatenated_chunks,num_bullets)
        title, summary = extract_title_and_summary(answer)

        #add new slide
        new_slide = prs.slides.add_slide(prs.slide_layouts[1])
        title_1 = new_slide.shapes.title
        if title is  not None:
            title_1.text = title
        content_1 = new_slide.placeholders[1]
        if summary is not None:
            content_1.text = summary
        
    # save the presentation and return 
    input_file=Path(input_file)
    pres_path=f'./{input_file.stem}.pptx'
    prs.save(pres_path)
    return pres_path

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # PDF2PPTX  
        """
    )
    with gr.Column():
        input_file = gr.File(label='Upload your PDF file...', file_count='single', file_types=['.pdf'])
        num_slides=gr.Slider (label= "Number of slides",  interactive=True,minimum=1, maximum=50, value=5, step=1)
        num_bullets=gr.Slider(label= "Number of bullets per slides", interactive=True, minimum=1, maximum=10, value=5, step=1)
        fileuploadbtn= gr.Button ("Generate Presentation")
        presentation = gr.File(label="Your Presentation", interactive=False)

    gr.Markdown(
        """
        # Responsible AI Usage
        Your documents uploaded to the system or presentations generated are not saved.
        """
    )

    fileuploadbtn.click(fn=generate_presentation, inputs=[input_file, num_slides,num_bullets], outputs=[presentation])
if __name__ == "__main__":
    demo.launch()