Spaces:
Runtime error
Runtime error
import os | |
import fitz | |
from dotenv import find_dotenv, load_dotenv | |
import gradio as gr | |
from pathlib import Path | |
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.llms import HuggingFaceEndpoint | |
from pptx import Presentation | |
_=load_dotenv(find_dotenv()) | |
hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
llm=HuggingFaceEndpoint(repo_id="Mistralai/Mistral-7B-Instruct-v0.2", temperature=0.1, max_new_tokens=1000) | |
def load_file (input_file): | |
pages=[] | |
loader = PyMuPDFLoader(input_file) | |
documents = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
pages = text_splitter.split_documents(documents) | |
#limit to five pages if pages > 5 | |
pdf_document = fitz.open(input_file) | |
pdf_writer = fitz.open() | |
total_pages = pdf_document.page_count | |
if total_pages > 5: | |
pages=pages[:5] | |
return pages | |
def predict (text,num_bullets): | |
prompt= f"You are an expert in making presentatinos with excellent titles and summarized content in lines. Give a title (max 5 words) and summary containing a maximum of {num_bullets} lines in list format. Do not append bullets to summary lines: {text}" | |
answer = llm.invoke(prompt) | |
return answer | |
def extract_title_and_summary(answer): | |
# Provided text | |
text = answer | |
# Splitting text into lines | |
lines = text.strip().split('\n') | |
# Initialize title and summary variables | |
title = None | |
summary = [] | |
# Iterate through the lines | |
for line in lines: | |
# Check if the line contains "Title:" | |
if "Title:" in line: | |
# Extract title | |
title = line.split(":", 1)[1].strip() | |
# Check if the line contains "Summary:" | |
elif "Summary:" in line: | |
# Extract summary lines | |
summary = [line.split(":", 1)[1].strip()] | |
# If we've already found the title, and the line is not empty, add it to the summary | |
elif title is not None and line.strip() != "": | |
summary.append(line.strip()) | |
# Join summary lines into a single string | |
summary = '\n'.join(summary) | |
return title, summary | |
def generate_presentation(input_file, num_slides,num_bullets, progress=gr.Progress()): | |
pages=load_file (input_file) | |
cps = len(pages) / num_slides | |
chunks_per_slide = int (cps) | |
if chunks_per_slide == 0: | |
chunks_per_slide=1 | |
#extract page content from pdf pages splits | |
chunks = [item.page_content for item in pages] | |
prs = Presentation() # Generate and empty presentation | |
#the heart of the method that iterates through all chunks, concatenates them, calls LLM, and generates slides | |
for i in range(0, len(chunks), chunks_per_slide): | |
# Update progress on UI | |
description=f"Generating slide: {i+1}" | |
progress ((i+1)/num_slides, desc= description) | |
#Concatenate chunks to map no. of pages with no. of slides required | |
concatenated_chunks= "" | |
group_of_chunks = chunks[i:i+chunks_per_slide] | |
concatenated_chunks = '\n\n'.join(group_of_chunks) | |
#call the LLM | |
answer=predict(concatenated_chunks,num_bullets) | |
title, summary = extract_title_and_summary(answer) | |
#add new slide | |
new_slide = prs.slides.add_slide(prs.slide_layouts[1]) | |
title_1 = new_slide.shapes.title | |
if title is not None: | |
title_1.text = title | |
content_1 = new_slide.placeholders[1] | |
if summary is not None: | |
content_1.text = summary | |
# save the presentation and return | |
input_file=Path(input_file) | |
pres_path=f'./{input_file.stem}.pptx' | |
prs.save(pres_path) | |
return pres_path | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# PDF2PPTX | |
""" | |
) | |
with gr.Column(): | |
input_file = gr.File(label='Upload your PDF file...', file_count='single', file_types=['.pdf']) | |
num_slides=gr.Slider (label= "Number of slides", interactive=True,minimum=1, maximum=50, value=5, step=1) | |
num_bullets=gr.Slider(label= "Number of bullets per slides", interactive=True, minimum=1, maximum=10, value=5, step=1) | |
fileuploadbtn= gr.Button ("Generate Presentation") | |
presentation = gr.File(label="Your Presentation", interactive=False) | |
gr.Markdown( | |
""" | |
# Responsible AI Usage | |
Your documents uploaded to the system or presentations generated are not saved. | |
""" | |
) | |
fileuploadbtn.click(fn=generate_presentation, inputs=[input_file, num_slides,num_bullets], outputs=[presentation]) | |
if __name__ == "__main__": | |
demo.launch() | |