IlyasMoutawwakil HF staff commited on
Commit
a8236f5
β€’
1 Parent(s): bb01ced

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -27
app.py CHANGED
@@ -7,17 +7,28 @@ from haystack.document_stores.base import BaseDocumentStore
7
  from haystack.schema import Document
8
 
9
  from typing import Optional, List
 
 
 
10
  from time import perf_counter
11
  import gradio as gr
12
  import numpy as np
13
  import requests
14
  import os
15
 
 
16
  RETRIEVER_URL = os.getenv("RETRIEVER_URL")
17
  RANKER_URL = os.getenv("RANKER_URL")
18
  HF_TOKEN = os.getenv("HF_TOKEN")
19
 
20
 
 
 
 
 
 
 
 
21
 
22
  def post(url, payload):
23
  response = requests.post(
@@ -128,16 +139,7 @@ class Ranker(BaseRanker):
128
 
129
  TOP_K = 2
130
  BATCH_SIZE = 16
131
- EXAMPLES = [
132
- "There is a blue house on Oxford Street.",
133
- "Paris is the capital of France.",
134
- "The Eiffel Tower is in Paris.",
135
- "The Louvre is in Paris.",
136
- "London is the capital of England.",
137
- "Cairo is the capital of Egypt.",
138
- "The pyramids are in Egypt.",
139
- "The Sphinx is in Egypt.",
140
- ]
141
 
142
  if (
143
  os.path.exists("/data/faiss_document_store.db")
@@ -148,7 +150,6 @@ if (
148
  retriever = Retriever(
149
  document_store=document_store, top_k=TOP_K, batch_size=BATCH_SIZE
150
  )
151
- document_store.update_embeddings(retriever=retriever)
152
  document_store.save(index_path="/data/faiss_index")
153
  else:
154
  try:
@@ -163,12 +164,9 @@ else:
163
  return_embedding=True,
164
  embedding_dim=384,
165
  )
166
- document_store.write_documents(
167
- [Document(content=d, id=i) for i, d in enumerate(EXAMPLES)]
168
- )
169
- retriever = Retriever(
170
- document_store=document_store, top_k=TOP_K, batch_size=BATCH_SIZE
171
- )
172
  document_store.update_embeddings(retriever=retriever)
173
  document_store.save(index_path="/data/faiss_index")
174
 
@@ -180,20 +178,66 @@ pipe.add_node(component=ranker, name="Ranker", inputs=["Retriever"])
180
 
181
 
182
  def run(query: str) -> dict:
183
- output = pipe.run(query=query)
184
- closest_documents = [d.content for d in output["documents"]]
185
- return f"Closest ({TOP_K}) document(s): {closest_documents}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
 
188
- run("What is the capital of France?")
189
- print("Warmed up successfully!")
 
 
 
 
 
190
 
191
  gr.Interface(
192
  fn=run,
193
- inputs="text",
194
- outputs="text",
195
  title="End-to-End Retrieval & Ranking",
196
- examples=["What is the capital of France?"],
197
- description="A pipeline for retrieving and ranking documents "
198
- "from a memory persistent FAISS document store, using Inference Endpoints.",
 
 
199
  ).launch()
 
7
  from haystack.schema import Document
8
 
9
  from typing import Optional, List
10
+
11
+ from huggingface_hub import get_inference_endpoint
12
+ from datasets import load_dataset
13
  from time import perf_counter
14
  import gradio as gr
15
  import numpy as np
16
  import requests
17
  import os
18
 
19
+
20
  RETRIEVER_URL = os.getenv("RETRIEVER_URL")
21
  RANKER_URL = os.getenv("RANKER_URL")
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
 
24
 
25
+ RETRIEVER_IE = get_inference_endpoint(
26
+ "fastrag-retriever", namespace="optimum-intel", token=HF_TOKEN
27
+ )
28
+ RANKER_IE = get_inference_endpoint(
29
+ "fastrag-ranker", namespace="optimum-intel", token=HF_TOKEN
30
+ )
31
+
32
 
33
  def post(url, payload):
34
  response = requests.post(
 
139
 
140
  TOP_K = 2
141
  BATCH_SIZE = 16
142
+
 
 
 
 
 
 
 
 
 
143
 
144
  if (
145
  os.path.exists("/data/faiss_document_store.db")
 
150
  retriever = Retriever(
151
  document_store=document_store, top_k=TOP_K, batch_size=BATCH_SIZE
152
  )
 
153
  document_store.save(index_path="/data/faiss_index")
154
  else:
155
  try:
 
164
  return_embedding=True,
165
  embedding_dim=384,
166
  )
167
+ DATASET = load_dataset("bilgeyucel/seven-wonders", split="train")
168
+ document_store.write_documents(DATASET)
169
+ retriever = Retriever(document_store=document_store, top_k=TOP_K, batch_size=BATCH_SIZE)
 
 
 
170
  document_store.update_embeddings(retriever=retriever)
171
  document_store.save(index_path="/data/faiss_index")
172
 
 
178
 
179
 
180
  def run(query: str) -> dict:
181
+ if RETRIEVER_IE.status != "running":
182
+ RETRIEVER_IE.resume()
183
+ raise gr.Error(
184
+ "Retriever Inference Endpoint is not running. "
185
+ "Sent a request to resume it. Please try again in a few minutes."
186
+ )
187
+
188
+ if RANKER_IE.status != "running":
189
+ RANKER_IE.resume()
190
+ raise gr.Error(
191
+ "Ranker Inference Endpoint is not running. "
192
+ "Sent a request to resume it. Please try again in a few minutes."
193
+ )
194
+
195
+ pipe_output = pipe.run(query=query)
196
+
197
+ output = f"""
198
+ <h2>Query</h2>
199
+ <p>{query}</p>
200
+ <h2>Top {TOP_K} Documents</h2>
201
+ """
202
+
203
+ for i, doc in enumerate(pipe_output["documents"]):
204
+ output += f"""
205
+ <h3>Document {i + 1}</h3>
206
+ <p><strong>ID:</strong> {doc.id}</p>
207
+ <p><strong>Score:</strong> {doc.score}</p>
208
+ <p><strong>Content:</strong> {doc.content}</p>
209
+ """
210
+
211
+ return output
212
+
213
+
214
+ examples = [
215
+ "Where is Gardens of Babylon?",
216
+ "Why did people build Great Pyramid of Giza?",
217
+ "What does Rhodes Statue look like?",
218
+ "Why did people visit the Temple of Artemis?",
219
+ "What is the importance of Colossus of Rhodes?",
220
+ "What happened to the Tomb of Mausolus?",
221
+ "How did Colossus of Rhodes collapse?",
222
+ ]
223
 
224
 
225
+ input_text = gr.components.Textbox(
226
+ label="Query",
227
+ placeholder="Enter a query",
228
+ value=examples[0],
229
+ lines=3,
230
+ )
231
+ output_html = gr.components.HTML(label="Results")
232
 
233
  gr.Interface(
234
  fn=run,
235
+ inputs=input_text,
236
+ outputs=output_html,
237
  title="End-to-End Retrieval & Ranking",
238
+ examples=examples,
239
+ description="A [haystack](https://haystack.deepset.ai/) pipeline for retrieving and ranking "
240
+ "documents from the [seven-wonders dataset](bilgeyucel/seven-wonders) based on a query, "
241
+ "using a FAISS database as a document store (kept in the space's persistent storage) "
242
+ "and two [Inference Endpoints for the Retriever and Ranker](https://huggingface.co/collections/optimum-intel/fast-rag-inference-endpoints-6641c6cbb98ddf3fe49c7728).",
243
  ).launch()