Spaces:

Abhilashvj
/

haystack_QA

Runtime error

App Files Files Community

abhi001vj commited on Dec 28, 2022

Commit

2c560b7

•

1 Parent(s): c98aa7a

added packages for linux

Browse files

Files changed (2) hide show

packages.txt +2 -0
search.py +60 -0

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ poppler-utils
2	+ xpdf

search.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import pinecone
+index_name = "abstractive-question-answering"
+# check if the abstractive-question-answering index exists
+if index_name not in pinecone.list_indexes():
+    # create the index if it does not exist
+    pinecone.create_index(
+        index_name,
+        dimension=768,
+        metric="cosine"
+    )
+# connect to abstractive-question-answering index we created
+index = pinecone.Index(index_name)
+# we will use batches of 64
+batch_size = 64
+for i in tqdm(range(0, len(df), batch_size)):
+    # find end of batch
+    i_end = min(i+batch_size, len(df))
+    # extract batch
+    batch = df.iloc[i:i_end]
+    # generate embeddings for batch
+    emb = retriever.encode(batch["passage_text"].tolist()).tolist()
+    # get metadata
+    meta = batch.to_dict(orient="records")
+    # create unique IDs
+    ids = [f"{idx}" for idx in range(i, i_end)]
+    # add all to upsert list
+    to_upsert = list(zip(ids, emb, meta))
+    # upsert/insert these records to pinecone
+    _ = index.upsert(vectors=to_upsert)
+# check that we have all vectors in index
+index.describe_index_stats()
+# from transformers import BartTokenizer, BartForConditionalGeneration
+# # load bart tokenizer and model from huggingface
+# tokenizer = BartTokenizer.from_pretrained('vblagoje/bart_lfqa')
+# generator = BartForConditionalGeneration.from_pretrained('vblagoje/bart_lfqa')
+# def query_pinecone(query, top_k):
+#     # generate embeddings for the query
+#     xq = retriever.encode([query]).tolist()
+#     # search pinecone index for context passage with the answer
+#     xc = index.query(xq, top_k=top_k, include_metadata=True)
+#     return xc
+# def format_query(query, context):
+#     # extract passage_text from Pinecone search result and add the  tag
+#     context = [f" {m['metadata']['passage_text']}" for m in context]
+#     # concatinate all context passages
+#     context = " ".join(context)
+#     # contcatinate the query and context passages
+#     query = f"question: {query} context: {context}"
+#     return query