wuchangsheng951 commited on
Commit
a30e539
1 Parent(s): 6087436

Add application file

Browse files
Files changed (1) hide show
  1. app.py +313 -0
app.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import shutil
4
+ import requests
5
+ import zipfile
6
+ from PyPDF2 import PdfFileReader, PdfFileWriter
7
+ import PyPDF2
8
+ from io import BytesIO
9
+ from reportlab.lib.pagesizes import letter
10
+ from reportlab.platypus import SimpleDocTemplate,Preformatted
11
+ from reportlab.platypus import Image as RLImage
12
+ from reportlab.platypus import Paragraph, Spacer
13
+ from reportlab.lib.styles import getSampleStyleSheet
14
+ from reportlab.lib.utils import ImageReader
15
+ from PIL import Image
16
+ import os
17
+ from langchain.indexes.vectorstore import VectorstoreIndexCreator
18
+ from langchain.chains import VectorDBQA
19
+ from langchain import OpenAI
20
+ from langchain.document_loaders import UnstructuredPDFLoader
21
+ from langchain.vectorstores.faiss import FAISS
22
+ from langchain.embeddings.openai import OpenAIEmbeddings
23
+ from flask import send_file
24
+
25
+
26
+ class REPOGPT:
27
+ def __init__(self) -> None:
28
+
29
+ self.repo_link = None
30
+ self.api_key = None
31
+
32
+ def init_agent(self, api_key, repo_link = None, load_vectorstore = None):
33
+ self.repo_link = repo_link
34
+ self.api_key = api_key
35
+ self.load_vectorstore = load_vectorstore
36
+ #assert if api key is valid
37
+ assert self.api_key != None, "You need to provide an API key"
38
+ self.REPOGPT_Initialized()
39
+ return gr.update(visible = True),'Initialize Finished'
40
+
41
+
42
+
43
+ def REPOGPT_Initialized(self,):
44
+
45
+
46
+ os.environ["OPENAI_API_KEY"] = self.api_key
47
+ if self.load_vectorstore == None:
48
+
49
+ loader = UnstructuredPDFLoader( self.create_repo_pdf(self.repo_link))
50
+ pages = loader.load_and_split()
51
+ self.index = VectorstoreIndexCreator(vectorstore_cls = FAISS).from_loaders([loader])
52
+ self.vectorstore = self.index.vectorstore
53
+ else:
54
+ embeddings = OpenAIEmbeddings()
55
+ self.vectorstore = FAISS.load_local('asd.json',embeddings =embeddings)
56
+
57
+ self.qa = VectorDBQA.from_chain_type(llm =OpenAI(temperature=0, model_name="gpt-3.5-turbo"), chain_type = "stuff",vectorstore = self.vectorstore )
58
+
59
+
60
+
61
+
62
+
63
+ def download_repo_zip(self, link, output_folder = "main.zip"):
64
+ username = link.split('/')[3]
65
+ repo = link.split('/')[4]
66
+ # zip_url = f"https://github.com/{username}/{repo}/archive/refs/heads/main.zip"
67
+ zip_url = f"https://github.com/{username}/{repo}/archive/refs/heads/master.zip"
68
+ self.zip_url = zip_url
69
+ response = requests.get(zip_url)
70
+ response.raise_for_status()
71
+ #down load the zip file
72
+ with open('main.zip', 'wb') as f:
73
+ f.write(response.content)
74
+ # return BytesIO(response.content)
75
+
76
+ def extract_zip(self, zip_file, destination_folder):
77
+ with zipfile.ZipFile(zip_file) as zf:
78
+ zf.extractall(destination_folder)
79
+ #get the name of the extracted folder
80
+ folder_name = zf.namelist()[0]
81
+ return folder_name
82
+
83
+ def convert_to_pdf(self, input_path, output_path):
84
+ if input_path.endswith(".pdf"):
85
+ # Create a new PDF with the file path heading
86
+ buffer = BytesIO()
87
+ doc = SimpleDocTemplate(buffer, pagesize=letter)
88
+ styles = getSampleStyleSheet()
89
+ elements = []
90
+ heading = Paragraph(f"File path: {input_path}", styles["Heading2"])
91
+ elements.append(heading)
92
+ elements.append(Spacer(1, 12))
93
+ doc.build(elements)
94
+
95
+ # Read the newly created PDF with heading
96
+ buffer.seek(0)
97
+ new_pdf = PdfFileReader(buffer)
98
+
99
+ # Read the input PDF
100
+ with open(input_path, "rb") as f:
101
+ input_pdf = PdfFileReader(f)
102
+
103
+ # Merge the new PDF with heading and the input PDF
104
+ pdf_writer = PdfFileWriter()
105
+ for page_num in range(new_pdf.getNumPages()):
106
+ pdf_writer.addPage(new_pdf.getPage(page_num))
107
+
108
+ for page_num in range(input_pdf.getNumPages()):
109
+ pdf_writer.addPage(input_pdf.getPage(page_num))
110
+
111
+ # Save the merged PDF to the output file
112
+ with open(output_path, "wb") as f:
113
+ pdf_writer.write(f)
114
+
115
+ elif input_path.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff")):
116
+ img = Image.open(input_path)
117
+ img_reader = ImageReader(img)
118
+ img_width, img_height = img.size
119
+ aspect_ratio = img_height / img_width
120
+
121
+
122
+ max_pdf_width = letter[0] - 2 * 72 # 1 inch margin on each side
123
+ max_pdf_height = letter[1] - 2 * 72 # 1 inch margin on top and bottom
124
+
125
+ if img_width > max_pdf_width:
126
+ img_width = max_pdf_width
127
+ img_height = img_width * aspect_ratio
128
+ if img_height > max_pdf_height:
129
+ img_height = max_pdf_height
130
+ img_width = img_height / aspect_ratio
131
+ img_width = int(img_width)
132
+ img_height = int(img_height)
133
+ # Resize the image
134
+ img = img.resize((int(img_width), int(img_height)))
135
+
136
+ img = img.resize((int(img_width), int(img_height)))
137
+
138
+ img.save(output_path, "PNG")
139
+ # Create a new PDF with the image
140
+ doc = SimpleDocTemplate(output_path, pagesize=letter)
141
+ styles = getSampleStyleSheet()
142
+
143
+ elements = []
144
+ heading = Paragraph(f" {input_path}", styles["Heading2"])
145
+ elements.append(heading)
146
+ elements.append(Spacer(1, 12))
147
+
148
+ img_rl = RLImage(input_path, width=img_width, height=img_height, kind='proportional')
149
+ elements.append(img_rl)
150
+
151
+ doc.build(elements)
152
+
153
+ else:
154
+ with open(input_path, "r") as f:
155
+ content = f.read()
156
+
157
+ doc = SimpleDocTemplate(output_path, pagesize=letter)
158
+ styles = getSampleStyleSheet()
159
+ elements = []
160
+
161
+ # Add the file path heading
162
+ heading = Paragraph(f"{input_path}", styles["Heading2"])
163
+ elements.append(heading)
164
+ elements.append(Spacer(1, 12))
165
+
166
+ # Add the content as Preformatted text
167
+ text = Preformatted(content, style=styles["Code"])
168
+ elements.append(text)
169
+
170
+ doc.build(elements)
171
+
172
+ def merge_pdfs(self, pdf_files, output_path):
173
+ pdf_writer = PyPDF2.PdfWriter()
174
+ for pdf_file in pdf_files:
175
+ with open(pdf_file, "rb") as f:
176
+ try:
177
+ pdf_reader = PyPDF2.PdfReader(f)
178
+ if pdf_reader.is_encrypted:
179
+ print(f"{pdf_file} is encrypted. Skipping.")
180
+ continue
181
+ except:
182
+ print(f"{pdf_file} is not a valid PDF. Skipping.")
183
+ continue
184
+
185
+
186
+ for page_num in range(len(pdf_reader.pages)):
187
+ pdf_writer.add_page(pdf_reader.pages[page_num])
188
+
189
+ with open(output_path, "wb") as f:
190
+ pdf_writer.write(f)
191
+
192
+ def get_pdf(self):
193
+ return self.merged_pdf_path
194
+
195
+ def save_indexDB(self,save_path = 'indexDB.json'):
196
+ self.vectorstore.save_local(save_path)
197
+ print("indexDB saved at: ", save_path)
198
+
199
+
200
+
201
+ def create_repo_pdf(self, repo_link, merged_pdf = "merged.pdf"):
202
+ self.merged_pdf_path = merged_pdf
203
+ self.download_repo_zip(repo_link)
204
+ folder_name = self.extract_zip('./main.zip', './')
205
+ ingnore_list = ['__pycache__',]
206
+ pdf_files = []
207
+ for root, dirs, files in os.walk(folder_name):
208
+ for file in files:
209
+
210
+ input_file = os.path.join(root, file)
211
+ #if the file contains any of the strings in the ignore list, skip it
212
+ if any(x in input_file for x in ingnore_list):
213
+ continue
214
+ #create a temp folder to store the pdf files
215
+ os.makedirs("temp", exist_ok=True)
216
+ output_file = os.path.join("temp", os.path.splitext(file)[0] + ".pdf")
217
+
218
+ try:
219
+ self.convert_to_pdf(input_file, output_file)
220
+ except:
221
+ print("Error converting file: ", input_file)
222
+ continue
223
+ pdf_files.append(output_file)
224
+
225
+
226
+
227
+ self.merge_pdfs(pdf_files, self.merged_pdf_path)
228
+ #clean up the temp folder and downloaded zip file
229
+ os.remove("main.zip")
230
+ shutil.rmtree(folder_name)
231
+ shutil.rmtree("temp")
232
+
233
+ return merged_pdf
234
+
235
+
236
+ def Answer_quetsion(self, question):
237
+ return self.qa.run(question)
238
+
239
+ repogpt = REPOGPT()
240
+
241
+
242
+ def call_output(string = 'REPOGPT Initializing'):
243
+ return string
244
+
245
+ def download_file(filename = 'merged.pdf'):
246
+ # filename = repogpt.get_pdf()
247
+ return send_file(filename, as_attachment=True)
248
+
249
+ with gr.Blocks() as demo:
250
+ with gr.Row():
251
+ gr.Markdown("<h3><center>REPO ChatGPT</center></h3>")
252
+ gr.Markdown(
253
+ """This is a demo to the work [Visual ChatGPT: Talking, Drawing and Editing with Visual Foundation Models](https://github.com/microsoft/visual-chatgpt).<br>
254
+ This space connects ChatGPT and a series of Visual Foundation Models to enable sending and receiving images during chatting.<br>
255
+ """
256
+ )
257
+ with gr.Row():
258
+ apikey = gr.Textbox(
259
+ placeholder="Paste your OpenAI API key here to start Visual ChatGPT(sk-...) and press Enter ↵️",
260
+ show_label=True,
261
+ label = 'OpenAI API key',
262
+ lines=1,
263
+ type="password",
264
+ )
265
+ with gr.Row():
266
+ repo_link = gr.Textbox(
267
+ placeholder="Paste your repo_link and press Enter ↵️",
268
+ label = 'repo_link',
269
+
270
+ show_label=True,
271
+ lines=1,
272
+ )
273
+
274
+ with gr.Column(scale=0.7):
275
+ Initialize = gr.Button("Initialize RepoGPT")
276
+
277
+ output = gr.Textbox(label="Output Box")
278
+
279
+ with gr.Row(visible=False) as input_raws:
280
+ with gr.Column(scale=0.7):
281
+ txt = gr.Textbox(show_label=False, placeholder="Enter your question").style(container=False)
282
+
283
+ with gr.Column(scale=0.4):
284
+ AQ = gr.Button("Ask a Question").style(container=False)
285
+
286
+ # with gr.Row():
287
+ # Download = gr.Button("Download PDF")
288
+
289
+
290
+ gr.Examples(
291
+ examples=["Whats the name of this repo?",
292
+ "Whats this repo for?",
293
+ "How can I use this. Example code ? Step by step",
294
+ "how can I use this Experiment trackers ? Step by step",
295
+ "how can I Performing gradient accumulation with Accelerate? Step by step?",
296
+ "Make it like water-color painting",
297
+ "What is the background color",
298
+ "Describe this image",
299
+ "please detect the depth of this image",
300
+ "Can you use this depth image to generate a cute dog",
301
+ ],
302
+ inputs=txt
303
+ )
304
+
305
+ apikey.submit(repogpt.init_agent, [apikey,repo_link], [input_raws, output])
306
+ Initialize.click(repogpt.init_agent, [apikey,repo_link], [input_raws, output])
307
+ apikey.submit(call_output, [],[output])
308
+ txt.submit(repogpt.Answer_quetsion, [txt], [output])
309
+ AQ.click(repogpt.Answer_quetsion, [txt], [output])
310
+ # Download.click(download_file, [], [Download])
311
+
312
+
313
+ demo.launch()