alanwnl commited on
Commit
d01b1e8
1 Parent(s): e8c78f3

update the UI

Browse files
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. app.py +105 -73
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -2,6 +2,7 @@ import urllib.request
2
  import fitz
3
  import re
4
  import numpy as np
 
5
  import tensorflow_hub as hub
6
  import openai
7
  import gradio as gr
@@ -36,7 +37,7 @@ def pdf_to_text(path, start_page=1, end_page=None):
36
  return text_list
37
 
38
 
39
- def text_to_chunks(texts, word_length=150, start_page=1):
40
  text_toks = [t.split(' ') for t in texts]
41
  page_nums = []
42
  chunks = []
@@ -98,20 +99,17 @@ def load_recommender(path, start_page=1):
98
 
99
 
100
  ####################
101
- def generate_text(openAI_key,
102
- openAI_base,
103
- openAI_API_version,
104
- prompt,
105
- engine="chatgpt"):
106
- openai.api_type = "azure"
107
  openai.api_base = openAI_base
108
  openai.api_version = openAI_API_version
109
  openai.api_key = openAI_key
110
- completions = openai.ChatCompletion.create(engine="chatgpt",
111
- max_tokens=1024,
112
  n=1,
113
  stop=None,
114
- temperature=1.0,
115
  messages=[{
116
  "role": "user",
117
  "content": prompt
@@ -121,47 +119,30 @@ def generate_text(openAI_key,
121
  return message
122
 
123
 
124
- def generate_answer(question, openAI_key, openAI_base, openAI_API_version):
 
125
  topn_chunks = recommender(question)
126
- # print(len(topn_chunks))
127
- # print(*topn_chunks, sep="\n")
128
  prompt = ""
129
  prompt += 'search results:\n\n'
130
  for c in topn_chunks:
131
  prompt += c + '\n\n'
132
 
 
 
133
 
134
- # prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
135
- # "Cite each reference using [ Page Number] notation (every result has this number at the beginning). "\
136
- # "Citation should be done at the end of each sentence. "\
137
- # "If the search results mention multiple subjects with the same name, create separate answers for each. "\
138
- # "Make sure the answer is correct and don't output false content. "\
139
- # "Only answer what is asked. The answer should be in details."\
140
- # "\n\nQuery: {question}\nAnswer: "
141
- prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
142
- "Cite each reference using [ Page Number] notation (every result has this number at the beginning). "\
143
- "Citation should be done at the end of each sentence. "\
144
- "If the search results mention multiple subjects with the same name, create separate answers for each. "\
145
- "Only include information found in the results and don't add any additional information."\
146
- "Make sure the answer is correct and don't output false content. "\
147
- "If the text does not relate to the query, simply state 'Found Nothing'. "\
148
- "Ignore outlier search results which has nothing to do with the question."\
149
- "Only answer what is asked. The answer should be short and concise."\
150
- "\n\nQuery: {question}\nAnswer: "
151
-
152
- prompt += f"Query: {question}\nAnswer:"
153
  print(prompt)
154
- answer = generate_text(openAI_key, openAI_base, openAI_API_version, prompt,
155
- "chatgpt")
156
  return answer
157
 
158
 
159
- def question_answer(url, file, question, openAI_key, openAI_base,
160
- openAI_API_version):
 
161
  if openAI_key.strip() == '':
162
  return '[ERROR]: Please enter you Open AI Key. Get your key here : https://platform.openai.com/account/api-keys'
163
  if url.strip() == '' and file == None:
164
- return '[ERROR]: Both URL and PDF is empty. Provide atleast one.'
165
 
166
  if url.strip() != '' and file != None:
167
  return '[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF).'
@@ -181,71 +162,122 @@ def question_answer(url, file, question, openAI_key, openAI_base,
181
  if question.strip() == '':
182
  return '[ERROR]: Question field is empty'
183
 
184
- return generate_answer(question, openAI_key, openAI_base,
185
- openAI_API_version)
 
186
 
187
 
188
- def chatbot_respond(question, chat_history, url, file, openAI_key, openAI_base,
189
- openAI_API_version):
 
190
 
191
- bot_message = question_answer(url, file, question, openAI_key, openAI_base,
192
- openAI_API_version)
193
- chat_history.append((question, bot_message))
194
- return "", chat_history
 
195
 
196
  recommender = SemanticSearch()
197
 
198
- title = 'PDF GPT Azure'
199
  description = """
200
- PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI.
201
- It gives hallucination free response than other tools as the embeddings are better than OpenAI.
202
- The returned response can even cite the page number in square brackets([]) where the information is located,
203
- adding credibility to the responses and helping to locate pertinent information quickly.
204
  """
205
 
 
 
 
 
 
206
  with gr.Blocks() as demo:
207
 
208
  gr.Markdown(f'<center><h1>{title}</h1></center>')
209
  gr.Markdown(description)
210
 
211
- with gr.Row():
212
-
213
- with gr.Group():
214
- gr.Markdown(
215
- f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>'
216
- )
217
- gr.Dropdown(label="API Type",
218
- choices=["azure", "OpenAI"],
219
- info="Azure or Open AI",
220
- value="azure"),
 
 
 
 
 
 
 
 
 
 
221
  #####################
222
  ##
223
  ## REMEMBER to remove the key before public deploy
224
  ##
225
  #####################
226
  openAI_key = gr.Textbox(
227
- label='Enter your Azure OpenAI API key here')
228
- openAI_base = gr.Textbox(label='api_base',
229
- value="https://api.hku.hk")
230
- openAI_API_version = gr.Textbox(label='API version',
231
- value="2023-03-15-preview")
232
  url = gr.Textbox(label='Enter PDF URL here')
233
  gr.Markdown("<center><h4>OR<h4></center>")
234
  file = gr.File(label='Upload your PDF/ Research Paper / Book here',
235
  file_types=['.pdf'])
236
- with gr.Group():
237
- chatbot = gr.Chatbot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  question = gr.Textbox()
239
- clear = gr.ClearButton([question, chatbot])
240
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  question.submit(
242
  chatbot_respond,
243
  inputs=[
244
- question, chatbot, url, file, openAI_key, openAI_base,
245
- openAI_API_version
 
246
  ],
247
  outputs=[question, chatbot],
248
  )
249
-
250
-
251
  demo.launch()
 
2
  import fitz
3
  import re
4
  import numpy as np
5
+ import tensorflow as tf
6
  import tensorflow_hub as hub
7
  import openai
8
  import gradio as gr
 
37
  return text_list
38
 
39
 
40
+ def text_to_chunks(texts, word_length=200, start_page=1):
41
  text_toks = [t.split(' ') for t in texts]
42
  page_nums = []
43
  chunks = []
 
99
 
100
 
101
  ####################
102
+ def generate_text(api_type, engine, openAI_key, openAI_base,
103
+ openAI_API_version, temperature, prompt):
104
+ openai.api_type = api_type
 
 
 
105
  openai.api_base = openAI_base
106
  openai.api_version = openAI_API_version
107
  openai.api_key = openAI_key
108
+ completions = openai.ChatCompletion.create(engine=engine,
109
+ max_tokens=2056,
110
  n=1,
111
  stop=None,
112
+ temperature=temperature,
113
  messages=[{
114
  "role": "user",
115
  "content": prompt
 
119
  return message
120
 
121
 
122
+ def generate_answer(api_type, engine, openAI_key, openAI_base,
123
+ openAI_API_version, temperature, user_prompt, question):
124
  topn_chunks = recommender(question)
 
 
125
  prompt = ""
126
  prompt += 'search results:\n\n'
127
  for c in topn_chunks:
128
  prompt += c + '\n\n'
129
 
130
+ prompt += f"Instructions: {user_prompt}"\
131
+ f"\n\nQuery: {question}\nAnswer: "
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  print(prompt)
134
+ answer = generate_text(api_type, engine, openAI_key, openAI_base,
135
+ openAI_API_version, temperature, prompt)
136
  return answer
137
 
138
 
139
+ def question_answer(api_type, engine, openAI_key, openAI_base,
140
+ openAI_API_version, url, file, temperature, user_prompt,
141
+ question):
142
  if openAI_key.strip() == '':
143
  return '[ERROR]: Please enter you Open AI Key. Get your key here : https://platform.openai.com/account/api-keys'
144
  if url.strip() == '' and file == None:
145
+ return '[ERROR]: Both URL and PDF is empty. Provide at least one.'
146
 
147
  if url.strip() != '' and file != None:
148
  return '[ERROR]: Both URL and PDF is provided. Please provide only one (eiter URL or PDF).'
 
162
  if question.strip() == '':
163
  return '[ERROR]: Question field is empty'
164
 
165
+ return generate_answer(api_type, engine, openAI_key, openAI_base,
166
+ openAI_API_version, temperature, user_prompt,
167
+ question)
168
 
169
 
170
+ def chatbot_respond(api_type, engine, openAI_key, openAI_base,
171
+ openAI_API_version, url, file, temperature, user_prompt,
172
+ question, chatbot):
173
 
174
+ bot_message = question_answer(api_type, engine, openAI_key, openAI_base,
175
+ openAI_API_version, url, file, temperature,
176
+ user_prompt, question)
177
+ chatbot.append((question, bot_message))
178
+ return "", chatbot
179
 
180
  recommender = SemanticSearch()
181
 
182
+ title = 'HKU PDF GPT Azure'
183
  description = """
184
+ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located,adding credibility to the responses and helping to locate pertinent information quickly.
 
 
 
185
  """
186
 
187
+
188
+ def save_settings():
189
+ return gr.Tabs.update(selected=1)
190
+
191
+
192
  with gr.Blocks() as demo:
193
 
194
  gr.Markdown(f'<center><h1>{title}</h1></center>')
195
  gr.Markdown(description)
196
 
197
+ with gr.Tabs() as tabs:
198
+ with gr.TabItem("Setup", id=0):
199
+ with gr.Accordion("Detail Settings", open=False):
200
+ api_type = gr.Dropdown(label="API Type",
201
+ choices=["azure", "OpenAI"],
202
+ info="Azure or Open AI",
203
+ value="azure",
204
+ interactive=False)
205
+ openAI_base = gr.Textbox(label='api_base',
206
+ value="https://api.hku.hk",
207
+ interactive=True)
208
+ openAI_API_version = gr.Textbox(label='API version',
209
+ value="2023-03-15-preview",
210
+ interactive=True)
211
+ engine = gr.Dropdown(
212
+ label="Engine",
213
+ choices=["chatgpt", "chatgpt-4", "chatgpt-4-32k"],
214
+ info="ChatGPT 3.5, ChatGPT 4 ,ChatGPT 4-32k",
215
+ value="chatgpt",
216
+ interactive=True)
217
  #####################
218
  ##
219
  ## REMEMBER to remove the key before public deploy
220
  ##
221
  #####################
222
  openAI_key = gr.Textbox(
223
+ label='Enter your HKU Azure OpenAI API key here',
224
+ value="561c52b8d3ec4733bab55ee9515e6deb")
225
+
 
 
226
  url = gr.Textbox(label='Enter PDF URL here')
227
  gr.Markdown("<center><h4>OR<h4></center>")
228
  file = gr.File(label='Upload your PDF/ Research Paper / Book here',
229
  file_types=['.pdf'])
230
+ btn = gr.Button(value="Save Settings")
231
+ btn.click(save_settings, None, tabs)
232
+ with gr.TabItem("Chat", id=1):
233
+ with gr.Accordion("Edit Customized Prompt", open=False):
234
+ user_prompt = gr.Textbox(
235
+ lines=10,
236
+ interactive=True,
237
+ label="Prompt",
238
+ value=
239
+ """Compose a comprehensive reply to the query using the search results given.
240
+ Cite each reference using [p: Page Number] notation (every result has this number at the beginning).
241
+ Citation should be done at the end of each sentence.
242
+ If the search results mention multiple subjects with the same name, create separate answers for each.
243
+ Only include information found in the results and don't add any additional information.
244
+ Make sure the answer is correct and don't output false content.
245
+ If the text does not relate to the query, simply state 'Found Nothing'.
246
+ Ignore outlier search results which has nothing to do with the question.
247
+ Only answer what is asked. The answer should be short and concise.""")
248
+ temperature = gr.Slider(
249
+ 0.0,
250
+ 1.0,
251
+ label="Temperature",
252
+ info="More focused 0.0 <---> 1.0 Highly creative",
253
+ value=0.5,
254
+ interactive=True)
255
+ chatbot = gr.Chatbot(interactive=True)
256
  question = gr.Textbox()
257
+ with gr.Row():
258
+ clear = gr.ClearButton([question, chatbot],
259
+ scale=1,
260
+ interactive=True)
261
+ submit_btn = gr.Button(value="Submit",
262
+ scale=2,
263
+ interactive=True)
264
+
265
+ submit_btn.click(
266
+ chatbot_respond,
267
+ inputs=[
268
+ api_type, engine, openAI_key, openAI_base,
269
+ openAI_API_version, url, file, temperature, user_prompt,
270
+ question, chatbot
271
+ ],
272
+ outputs=[question, chatbot],
273
+ )
274
  question.submit(
275
  chatbot_respond,
276
  inputs=[
277
+ api_type, engine, openAI_key, openAI_base,
278
+ openAI_API_version, url, file, temperature, user_prompt,
279
+ question, chatbot
280
  ],
281
  outputs=[question, chatbot],
282
  )
 
 
283
  demo.launch()