bhaskartripathi commited on
Commit
e933bca
1 Parent(s): 54e37e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -11
app.py CHANGED
@@ -55,8 +55,15 @@ def text_to_chunks(texts, word_length=150, start_page=1):
55
 
56
  class SemanticSearch:
57
 
58
- def __init__(self):
59
- self.use = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')
 
 
 
 
 
 
 
60
  self.fitted = False
61
 
62
 
@@ -79,15 +86,29 @@ class SemanticSearch:
79
  return neighbors
80
 
81
 
82
- def get_text_embedding(self, texts, batch=1000):
83
  embeddings = []
84
  for i in range(0, len(texts), batch):
85
  text_batch = texts[i:(i+batch)]
86
  emb_batch = self.use(text_batch)
87
  embeddings.append(emb_batch)
88
  embeddings = np.vstack(embeddings)
89
- return embeddings
90
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
  def load_recommender(path, start_page=1):
@@ -163,13 +184,16 @@ recommender = SemanticSearch()
163
  title = 'PDF GPT'
164
  description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
165
 
166
- with gr.Blocks() as demo:
 
167
 
 
 
 
 
168
  gr.Markdown(f'<center><h1>{title}</h1></center>')
169
  gr.Markdown(description)
170
-
171
  with gr.Row():
172
-
173
  with gr.Group():
174
  gr.Markdown(f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>')
175
  openAI_key=gr.Textbox(label='Enter your OpenAI API key here')
@@ -179,10 +203,7 @@ with gr.Blocks() as demo:
179
  question = gr.Textbox(label='Enter your question here')
180
  btn = gr.Button(value='Submit')
181
  btn.style(full_width=True)
182
-
183
  with gr.Group():
184
  answer = gr.Textbox(label='The answer to your question is :')
185
-
186
  btn.click(question_answer, inputs=[url, file, question,openAI_key], outputs=[answer])
187
- #openai.api_key = os.getenv('Your_Key_Here')
188
  demo.launch()
 
55
 
56
  class SemanticSearch:
57
 
58
+ class SemanticSearch:
59
+
60
+ def __init__(self, embedder='openai'):
61
+ if embedder == 'openai':
62
+ self.embedder = openai.Engine("davinci")
63
+ elif embedder == 'use':
64
+ self.embedder = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')
65
+ else:
66
+ raise ValueError("Invalid embedder. Must be either 'openai' or 'use'.")
67
  self.fitted = False
68
 
69
 
 
86
  return neighbors
87
 
88
 
89
+ '''def get_text_embedding(self, texts, batch=1000):
90
  embeddings = []
91
  for i in range(0, len(texts), batch):
92
  text_batch = texts[i:(i+batch)]
93
  emb_batch = self.use(text_batch)
94
  embeddings.append(emb_batch)
95
  embeddings = np.vstack(embeddings)
96
+ return embeddings'''
97
+ def get_text_embedding(self, texts):
98
+ embeddings = []
99
+ if isinstance(self.embedder, openai.Engine):
100
+ for text in texts:
101
+ response = self.embedder.search(
102
+ documents=texts,
103
+ query=text,
104
+ max_rerank=1
105
+ )
106
+ embeddings.append(response["data"][0]["score"])
107
+ elif isinstance(self.embedder, hub.Module):
108
+ embeddings = self.embedder(texts)
109
+ else:
110
+ raise ValueError("Invalid embedder.")
111
+ return np.array(embeddings)
112
 
113
 
114
  def load_recommender(path, start_page=1):
 
184
  title = 'PDF GPT'
185
  description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
186
 
187
+ title = 'PDF GPT'
188
+ description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Open AI. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
189
 
190
+ embedder = gr.Dropdown(['openai', 'use'], label='Select Embedder')
191
+ recommender = SemanticSearch(embedder=embedder)
192
+
193
+ with gr.Blocks() as demo:
194
  gr.Markdown(f'<center><h1>{title}</h1></center>')
195
  gr.Markdown(description)
 
196
  with gr.Row():
 
197
  with gr.Group():
198
  gr.Markdown(f'<p style="text-align:center">Get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a></p>')
199
  openAI_key=gr.Textbox(label='Enter your OpenAI API key here')
 
203
  question = gr.Textbox(label='Enter your question here')
204
  btn = gr.Button(value='Submit')
205
  btn.style(full_width=True)
 
206
  with gr.Group():
207
  answer = gr.Textbox(label='The answer to your question is :')
 
208
  btn.click(question_answer, inputs=[url, file, question,openAI_key], outputs=[answer])
 
209
  demo.launch()