switch owid vectorstore to pinecone
Browse files- app.py +3 -2
- climateqa/engine/chains/retriever.py +4 -3
- climateqa/engine/vectorstore.py +2 -2
app.py
CHANGED
@@ -92,11 +92,12 @@ share_client = service.get_share_client(file_share_name)
|
|
92 |
|
93 |
user_id = create_user_id()
|
94 |
|
95 |
-
vectorstore_graphs = Chroma(persist_directory="/home/tim/ai4s/climate_qa/climate-question-answering/data/vectorstore_owid", embedding_function=embeddings_function) # TODO make it api call
|
96 |
|
97 |
|
98 |
# Create vectorstore and retriever
|
99 |
-
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
|
|
|
|
100 |
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
101 |
reranker = get_reranker("nano")
|
102 |
# agent = make_graph_agent(llm,vectorstore,reranker)
|
|
|
92 |
|
93 |
user_id = create_user_id()
|
94 |
|
|
|
95 |
|
96 |
|
97 |
# Create vectorstore and retriever
|
98 |
+
vectorstore = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX"))
|
99 |
+
vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX_OWID"), text_key="title")
|
100 |
+
|
101 |
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
102 |
reranker = get_reranker("nano")
|
103 |
# agent = make_graph_agent(llm,vectorstore,reranker)
|
climateqa/engine/chains/retriever.py
CHANGED
@@ -87,9 +87,10 @@ def make_retriever_node(vectorstore,reranker,rerank_by_question=True, k_final=15
|
|
87 |
vectorstore=vectorstore,
|
88 |
sources = sources,
|
89 |
# reports = ias_reports,
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
93 |
)
|
94 |
docs_question = retriever.get_relevant_documents(question)
|
95 |
|
|
|
87 |
vectorstore=vectorstore,
|
88 |
sources = sources,
|
89 |
# reports = ias_reports,
|
90 |
+
min_size = 200,
|
91 |
+
k_summary = k_summary,
|
92 |
+
k_total = k_before_reranking,
|
93 |
+
threshold = 0.5,
|
94 |
)
|
95 |
docs_question = retriever.get_relevant_documents(question)
|
96 |
|
climateqa/engine/vectorstore.py
CHANGED
@@ -19,7 +19,7 @@ def get_chroma_vectorstore(embedding_function, persist_directory="/home/dora/cli
|
|
19 |
return vectorstore
|
20 |
|
21 |
|
22 |
-
def get_pinecone_vectorstore(embeddings,text_key = "content"):
|
23 |
|
24 |
# # initialize pinecone
|
25 |
# pinecone.init(
|
@@ -33,7 +33,7 @@ def get_pinecone_vectorstore(embeddings,text_key = "content"):
|
|
33 |
# return vectorstore
|
34 |
|
35 |
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
|
36 |
-
index = pc.Index(
|
37 |
|
38 |
vectorstore = PineconeVectorstore(
|
39 |
index, embeddings, text_key,
|
|
|
19 |
return vectorstore
|
20 |
|
21 |
|
22 |
+
def get_pinecone_vectorstore(embeddings,text_key = "content", index_name = os.getenv("PINECONE_API_INDEX")):
|
23 |
|
24 |
# # initialize pinecone
|
25 |
# pinecone.init(
|
|
|
33 |
# return vectorstore
|
34 |
|
35 |
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
|
36 |
+
index = pc.Index(index_name)
|
37 |
|
38 |
vectorstore = PineconeVectorstore(
|
39 |
index, embeddings, text_key,
|