Qazi-Mudassar-Ilyas commited on
Commit
72ed163
1 Parent(s): 32e4aa1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz
3
+ from dotenv import find_dotenv, load_dotenv
4
+ import gradio as gr
5
+ from pathlib import Path
6
+ from langchain_community.document_loaders import PyMuPDFLoader
7
+
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain_community.llms import HuggingFaceEndpoint
10
+
11
+ from pptx import Presentation
12
+
13
+ _=load_dotenv(find_dotenv())
14
+ hf_api = os.getenv("HUGGINGFACEHUB_API_TOKEN")
15
+ llm=HuggingFaceEndpoint(repo_id="Mistralai/Mistral-7B-Instruct-v0.2", temperature=0.1, max_new_tokens=1000)
16
+
17
+ def load_file (input_file):
18
+ pages=[]
19
+ loader = PyMuPDFLoader(input_file)
20
+ documents = loader.load()
21
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
22
+ pages = text_splitter.split_documents(documents)
23
+
24
+ #limit to five pages if pages > 5
25
+ pdf_document = fitz.open(input_file)
26
+ pdf_writer = fitz.open()
27
+ total_pages = pdf_document.page_count
28
+ if total_pages > 5:
29
+ pages=pages[:5]
30
+ return pages
31
+
32
+ def predict (text,num_bullets):
33
+ prompt= f"You are an expert in making presentatinos with excellent titles and summarized content in lines. Give a title (max 5 words) and summary containing a maximum of {num_bullets} lines in list format. Do not append bullets to summary lines: {text}"
34
+ answer = llm.invoke(prompt)
35
+ return answer
36
+
37
+ def extract_title_and_summary(answer):
38
+ # Provided text
39
+ text = answer
40
+
41
+ # Splitting text into lines
42
+ lines = text.strip().split('\n')
43
+
44
+ # Initialize title and summary variables
45
+ title = None
46
+ summary = []
47
+
48
+ # Iterate through the lines
49
+ for line in lines:
50
+ # Check if the line contains "Title:"
51
+ if "Title:" in line:
52
+ # Extract title
53
+ title = line.split(":", 1)[1].strip()
54
+ # Check if the line contains "Summary:"
55
+ elif "Summary:" in line:
56
+ # Extract summary lines
57
+ summary = [line.split(":", 1)[1].strip()]
58
+ # If we've already found the title, and the line is not empty, add it to the summary
59
+ elif title is not None and line.strip() != "":
60
+ summary.append(line.strip())
61
+
62
+ # Join summary lines into a single string
63
+ summary = '\n'.join(summary)
64
+ return title, summary
65
+
66
+ def generate_presentation(input_file, num_slides,num_bullets, progress=gr.Progress()):
67
+
68
+ pages=load_file (input_file)
69
+
70
+ cps = len(pages) / num_slides
71
+ chunks_per_slide = int (cps)
72
+
73
+ if chunks_per_slide == 0:
74
+ chunks_per_slide=1
75
+
76
+ #extract page content from pdf pages splits
77
+ chunks = [item.page_content for item in pages]
78
+
79
+ prs = Presentation() # Generate and empty presentation
80
+
81
+ #the heart of the method that iterates through all chunks, concatenates them, calls LLM, and generates slides
82
+ for i in range(0, len(chunks), chunks_per_slide):
83
+ # Update progress on UI
84
+ description=f"Generating slide: {i+1}"
85
+ progress ((i+1)/num_slides, desc= description)
86
+
87
+ #Concatenate chunks to map no. of pages with no. of slides required
88
+ concatenated_chunks= ""
89
+ group_of_chunks = chunks[i:i+chunks_per_slide]
90
+ concatenated_chunks = '\n\n'.join(group_of_chunks)
91
+
92
+ #call the LLM
93
+ answer=predict(concatenated_chunks,num_bullets)
94
+ title, summary = extract_title_and_summary(answer)
95
+
96
+ #add new slide
97
+ new_slide = prs.slides.add_slide(prs.slide_layouts[1])
98
+ title_1 = new_slide.shapes.title
99
+ if title is not None:
100
+ title_1.text = title
101
+ content_1 = new_slide.placeholders[1]
102
+ if summary is not None:
103
+ content_1.text = summary
104
+
105
+ # save the presentation and return
106
+ input_file=Path(input_file)
107
+ pres_path=f'./{input_file.stem}.pptx'
108
+ prs.save(pres_path)
109
+ return pres_path
110
+
111
+ with gr.Blocks() as demo:
112
+ gr.Markdown(
113
+ """
114
+ # PDF2PPTX
115
+ """
116
+ )
117
+ with gr.Column():
118
+ input_file = gr.File(label='Upload your PDF file...', file_count='single', file_types=['.pdf'])
119
+ num_slides=gr.Slider (label= "Number of slides", interactive=True,minimum=1, maximum=50, value=5, step=1)
120
+ num_bullets=gr.Slider(label= "Number of bullets per slides", interactive=True, minimum=1, maximum=10, value=5, step=1)
121
+ fileuploadbtn= gr.Button ("Generate Presentation")
122
+ presentation = gr.File(label="Your Presentation", interactive=False)
123
+
124
+ gr.Markdown(
125
+ """
126
+ # Responsible AI Usage
127
+ Your documents uploaded to the system or presentations generated are not saved.
128
+ """
129
+ )
130
+
131
+ fileuploadbtn.click(fn=generate_presentation, inputs=[input_file, num_slides,num_bullets], outputs=[presentation])
132
+ if __name__ == "__main__":
133
+ demo.launch()
134
+
135
+