Spaces:

Qazi-Mudassar-Ilyas
/

PDF2PPTX

Runtime error

App Files Files Community

Qazi-Mudassar-Ilyas commited on Mar 31

Commit

72ed163

•

1 Parent(s): 32e4aa1

Create app.py

Browse files

Files changed (1) hide show

app.py +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import os
+import fitz
+from dotenv import find_dotenv, load_dotenv
+import gradio as gr
+from pathlib import Path
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.llms import HuggingFaceEndpoint
+from pptx import Presentation
+_=load_dotenv(find_dotenv())
+hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+llm=HuggingFaceEndpoint(repo_id="Mistralai/Mistral-7B-Instruct-v0.2", temperature=0.1, max_new_tokens=1000)
+def load_file (input_file):
+    pages=[]
+    loader = PyMuPDFLoader(input_file)
+    documents = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    pages = text_splitter.split_documents(documents)
+    #limit to five pages if pages > 5
+    pdf_document = fitz.open(input_file)
+    pdf_writer = fitz.open()
+    total_pages = pdf_document.page_count
+    if total_pages > 5:
+        pages=pages[:5]
+    return pages
+def predict (text,num_bullets):
+   prompt= f"You are an expert in making presentatinos with excellent titles and summarized content in lines. Give a title (max 5 words) and summary containing a maximum of {num_bullets} lines in list format. Do not append bullets to summary lines: {text}"
+   answer = llm.invoke(prompt)
+   return answer
+def extract_title_and_summary(answer):
+    # Provided text
+    text = answer
+    # Splitting text into lines
+    lines = text.strip().split('\n')
+    # Initialize title and summary variables
+    title = None
+    summary = []
+    # Iterate through the lines
+    for line in lines:
+        # Check if the line contains "Title:"
+        if "Title:" in line:
+            # Extract title
+            title = line.split(":", 1)[1].strip()
+        # Check if the line contains "Summary:"
+        elif "Summary:" in line:
+            # Extract summary lines
+            summary = [line.split(":", 1)[1].strip()]
+        # If we've already found the title, and the line is not empty, add it to the summary
+        elif title is not None and line.strip() != "":
+            summary.append(line.strip())
+    # Join summary lines into a single string
+    summary = '\n'.join(summary)
+    return title, summary
+def generate_presentation(input_file, num_slides,num_bullets, progress=gr.Progress()):
+    pages=load_file (input_file)
+    cps =  len(pages) / num_slides
+    chunks_per_slide = int (cps)
+    if chunks_per_slide == 0:
+        chunks_per_slide=1
+    #extract page content from pdf pages splits
+    chunks = [item.page_content for item in pages]
+    prs = Presentation() # Generate and empty presentation
+    #the heart of the method that iterates through all chunks, concatenates them, calls LLM, and generates slides
+    for i in range(0, len(chunks), chunks_per_slide):
+        # Update progress on UI
+        description=f"Generating slide: {i+1}"
+        progress  ((i+1)/num_slides, desc= description)
+        #Concatenate chunks to map no. of pages with no. of slides required
+        concatenated_chunks= ""
+        group_of_chunks = chunks[i:i+chunks_per_slide]
+        concatenated_chunks = '\n\n'.join(group_of_chunks)
+        #call the LLM
+        answer=predict(concatenated_chunks,num_bullets)
+        title, summary = extract_title_and_summary(answer)
+        #add new slide
+        new_slide = prs.slides.add_slide(prs.slide_layouts[1])
+        title_1 = new_slide.shapes.title
+        if title is  not None:
+            title_1.text = title
+        content_1 = new_slide.placeholders[1]
+        if summary is not None:
+            content_1.text = summary
+    # save the presentation and return
+    input_file=Path(input_file)
+    pres_path=f'./{input_file.stem}.pptx'
+    prs.save(pres_path)
+    return pres_path
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # PDF2PPTX
+        """
+    )
+    with gr.Column():
+        input_file = gr.File(label='Upload your PDF file...', file_count='single', file_types=['.pdf'])
+        num_slides=gr.Slider (label= "Number of slides",  interactive=True,minimum=1, maximum=50, value=5, step=1)
+        num_bullets=gr.Slider(label= "Number of bullets per slides", interactive=True, minimum=1, maximum=10, value=5, step=1)
+        fileuploadbtn= gr.Button ("Generate Presentation")
+        presentation = gr.File(label="Your Presentation", interactive=False)
+    gr.Markdown(
+        """
+        # Responsible AI Usage
+        Your documents uploaded to the system or presentations generated are not saved.
+        """
+    )
+    fileuploadbtn.click(fn=generate_presentation, inputs=[input_file, num_slides,num_bullets], outputs=[presentation])
+if __name__ == "__main__":
+    demo.launch()