Refactor the project

#5
by thomasmz1 - opened
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +47 -92
  3. requirements.txt +5 -7
README.md CHANGED
@@ -4,11 +4,11 @@ emoji: 🏢
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  license: afl-3.0
11
  duplicated_from: bhaskartripathi/pdfChatter
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.20.1
8
  app_file: app.py
9
  pinned: false
10
  license: afl-3.0
11
  duplicated_from: bhaskartripathi/pdfChatter
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,13 +1,11 @@
1
- import os
2
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TF logging
3
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Disable GPU
4
  import urllib.request
5
- import fitz
6
  import re
7
  import numpy as np
8
  import tensorflow_hub as hub
9
  import openai
10
  import gradio as gr
 
11
  from sklearn.neighbors import NearestNeighbors
12
 
13
  def download_pdf(url, output_path):
@@ -102,11 +100,11 @@ def load_recommender(path, start_page=1):
102
 
103
  def generate_text(openAI_key, prompt, model="gpt-3.5-turbo"):
104
  openai.api_key = openAI_key
105
- temperature = 0.1
106
- max_tokens = 256
107
- top_p = 1
108
- frequency_penalty = 0
109
- presence_penalty = 0
110
 
111
  if model == "text-davinci-003":
112
  completions = openai.Completion.create(
@@ -119,19 +117,19 @@ def generate_text(openAI_key, prompt, model="gpt-3.5-turbo"):
119
  )
120
  message = completions.choices[0].text
121
  else:
122
- response = openai.ChatCompletion.create(
123
  model=model,
124
  messages=[
125
  {"role": "system", "content": "You are a helpful assistant."},
 
126
  {"role": "user", "content": prompt}
127
  ],
128
- temperature=temperature,
129
  max_tokens=max_tokens,
130
  top_p=top_p,
131
  frequency_penalty=frequency_penalty,
132
  presence_penalty=presence_penalty,
133
- )
134
- message = response['choices'][0]['message']['content']
135
  return message
136
 
137
 
@@ -240,93 +238,50 @@ title = 'PDF GPT Turbo'
240
  description = """ PDF GPT Turbo allows you to chat with your PDF files. It uses Google's Universal Sentence Encoder with Deep averaging network (DAN) to give hallucination free response by improving the embedding quality of OpenAI. It cites the page number in square brackets([Page No.]) and shows where the information is located, adding credibility to the responses."""
241
 
242
  with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:
 
243
  gr.Markdown(f'<center><h3>{title}</h3></center>')
244
  gr.Markdown(description)
245
 
246
  with gr.Row():
247
- with gr.Column():
248
- # API Key and File Inputs
249
- with gr.Accordion("API Key and PDF"):
250
- openAI_key = gr.Textbox(label='Enter your OpenAI API key here', type='password')
251
- url = gr.Textbox(label='Enter PDF URL here (Example: https://arxiv.org/pdf/1706.03762.pdf ; https://link.springer.com/content/pdf/10.1007/s10614-022-10325-8.pdf)')
 
252
  gr.Markdown("<center><h4>OR<h4></center>")
253
- file = gr.File(label='Upload your PDF/Research Paper/Book here', file_types=['.pdf'])
254
-
255
- # Model Selection
256
- model = gr.Radio(
257
- choices=[
258
- 'gpt-4o-mini',
259
- 'gpt-4o',
260
- 'gpt-4',
261
- ],
262
- label='Select Model',
263
- value='gpt-4o-mini'
264
- )
265
-
266
- # Chat Interface
267
- chatbot = gr.Chatbot(label="Chat History", type="messages")
268
- msg = gr.Textbox(label="Enter your question here", lines=2)
269
- submit_btn = gr.Button("Submit")
270
- clear = gr.ClearButton([msg, chatbot])
271
-
272
- # Example Questions
273
  gr.Examples(
274
  [[q] for q in questions],
275
- inputs=[msg],
276
- label="PRE-DEFINED QUESTIONS: Click on a question to auto-fill the input box",
277
  )
278
-
279
- def respond(message, chat_history, url_value, file_value, key_value, model_value):
280
- if message.strip() == "":
281
- return "", chat_history # Return empty message if no input
282
-
283
- try:
284
- # Ensure chat_history is initialized properly
285
- if chat_history is None:
286
- chat_history = []
287
-
288
- if key_value.strip() == '':
289
- chat_history.append({"role": "user", "content": message})
290
- chat_history.append({"role": "assistant", "content": '[ERROR]: Please enter your OpenAI API key'})
291
- return "", chat_history
292
-
293
- if url_value.strip() == '' and file_value is None:
294
- chat_history.append({"role": "user", "content": message})
295
- chat_history.append({"role": "assistant", "content": '[ERROR]: Both URL and PDF are empty. Provide at least one'})
296
- return "", chat_history
297
-
298
- # Process PDF and generate answer
299
- if url_value.strip() != '':
300
- download_pdf(url_value, 'corpus.pdf')
301
- load_recommender('corpus.pdf')
302
- else:
303
- old_file_name = file_value.name
304
- file_name = old_file_name[:-12] + old_file_name[-4:]
305
- os.rename(old_file_name, file_name)
306
- load_recommender(file_name)
307
-
308
- answer = generate_answer(message, key_value, model_value)
309
-
310
- chat_history.append({"role": "user", "content": message})
311
- chat_history.append({"role": "assistant", "content": answer})
312
-
313
- return "", chat_history
314
-
315
- except Exception as e:
316
- chat_history.append({"role": "user", "content": message})
317
- chat_history.append({"role": "assistant", "content": f'[ERROR]: {str(e)}'})
318
- return "", chat_history
319
-
320
- submit_btn.click(
321
- respond,
322
- [msg, chatbot, url, file, openAI_key, model],
323
- [msg, chatbot]
324
  )
325
 
326
- msg.submit(
327
- respond,
328
- [msg, chatbot, url, file, openAI_key, model],
329
- [msg, chatbot]
330
- )
331
 
332
- demo.launch()
 
 
 
 
1
  import urllib.request
2
+ import fitz
3
  import re
4
  import numpy as np
5
  import tensorflow_hub as hub
6
  import openai
7
  import gradio as gr
8
+ import os
9
  from sklearn.neighbors import NearestNeighbors
10
 
11
  def download_pdf(url, output_path):
 
100
 
101
  def generate_text(openAI_key, prompt, model="gpt-3.5-turbo"):
102
  openai.api_key = openAI_key
103
+ temperature=0.7
104
+ max_tokens=256
105
+ top_p=1
106
+ frequency_penalty=0
107
+ presence_penalty=0
108
 
109
  if model == "text-davinci-003":
110
  completions = openai.Completion.create(
 
117
  )
118
  message = completions.choices[0].text
119
  else:
120
+ message = openai.ChatCompletion.create(
121
  model=model,
122
  messages=[
123
  {"role": "system", "content": "You are a helpful assistant."},
124
+ {"role": "assistant", "content": "Here is some initial assistant message."},
125
  {"role": "user", "content": prompt}
126
  ],
127
+ temperature=.3,
128
  max_tokens=max_tokens,
129
  top_p=top_p,
130
  frequency_penalty=frequency_penalty,
131
  presence_penalty=presence_penalty,
132
+ ).choices[0].message['content']
 
133
  return message
134
 
135
 
 
238
  description = """ PDF GPT Turbo allows you to chat with your PDF files. It uses Google's Universal Sentence Encoder with Deep averaging network (DAN) to give hallucination free response by improving the embedding quality of OpenAI. It cites the page number in square brackets([Page No.]) and shows where the information is located, adding credibility to the responses."""
239
 
240
  with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:
241
+
242
  gr.Markdown(f'<center><h3>{title}</h3></center>')
243
  gr.Markdown(description)
244
 
245
  with gr.Row():
246
+
247
+ with gr.Group():
248
+ gr.Markdown(f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>')
249
+ with gr.Accordion("API Key"):
250
+ openAI_key = gr.Textbox(label='Enter your OpenAI API key here', password=True)
251
+ url = gr.Textbox(label='Enter PDF URL here (Example: https://arxiv.org/pdf/1706.03762.pdf )')
252
  gr.Markdown("<center><h4>OR<h4></center>")
253
+ file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf'])
254
+ question = gr.Textbox(label='Enter your question here')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  gr.Examples(
256
  [[q] for q in questions],
257
+ inputs=[question],
258
+ label="PRE-DEFINED QUESTIONS: Click on a question to auto-fill the input box, then press Enter!",
259
  )
260
+ model = gr.Radio([
261
+ 'gpt-3.5-turbo',
262
+ 'gpt-3.5-turbo-16k',
263
+ 'gpt-3.5-turbo-0613',
264
+ 'gpt-3.5-turbo-16k-0613',
265
+ 'text-davinci-003',
266
+ 'gpt-4',
267
+ 'gpt-4-32k'
268
+ ], label='Select Model', default='gpt-3.5-turbo')
269
+ btn = gr.Button(value='Submit')
270
+
271
+ btn.style(full_width=True)
272
+
273
+ with gr.Group():
274
+ chatbot = gr.Chatbot(placeholder="Chat History", label="Chat History", lines=50, elem_id="chatbot")
275
+
276
+
277
+ #
278
+ # Bind the click event of the button to the question_answer function
279
+ btn.click(
280
+ question_answer,
281
+ inputs=[chatbot, url, file, question, openAI_key, model],
282
+ outputs=[chatbot],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  )
284
 
285
+ demo.launch()
286
+
 
 
 
287
 
 
requirements.txt CHANGED
@@ -1,9 +1,7 @@
1
- urllib3
2
- PyMuPDF==1.18.19
3
  numpy
4
- tensorflow-hub
5
- openai
6
- gradio>=4.44.1
7
  scikit-learn
8
- openai==0.28
9
- anthropic>=0.7.0
 
 
1
+ gradio
2
+ PyMuPDF
3
  numpy
 
 
 
4
  scikit-learn
5
+ tensorflow
6
+ tensorflow-hub
7
+ openai