Spaces:
Running
Running
ryanrwatkins
commited on
Commit
•
78b8854
1
Parent(s):
8ad7e64
Update app.py
Browse files
app.py
CHANGED
@@ -320,6 +320,7 @@ vectorstore,search_type="similarity",k=4,score_threshold=None
|
|
320 |
k: number of documents to return (Default: 4)
|
321 |
score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
|
322 |
"""
|
|
|
323 |
search_kwargs={}
|
324 |
if k is not None:
|
325 |
search_kwargs['k'] = k
|
@@ -330,6 +331,7 @@ vectorstore,search_type="similarity",k=4,score_threshold=None
|
|
330 |
search_type=search_type,
|
331 |
search_kwargs=search_kwargs
|
332 |
)
|
|
|
333 |
return retriever
|
334 |
|
335 |
# similarity search
|
@@ -353,7 +355,7 @@ def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=1
|
|
353 |
k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16.
|
354 |
similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None.
|
355 |
"""
|
356 |
-
|
357 |
# 1. splitting documents into smaller chunks
|
358 |
splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ")
|
359 |
|
@@ -378,10 +380,11 @@ def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=1
|
|
378 |
base_compressor=pipeline_compressor,
|
379 |
base_retriever=base_retriever
|
380 |
)
|
381 |
-
|
382 |
return compression_retriever
|
383 |
|
384 |
def CohereRerank_retriever(
|
|
|
385 |
base_retriever,
|
386 |
cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
|
387 |
):
|
@@ -403,6 +406,7 @@ def CohereRerank_retriever(
|
|
403 |
base_compressor=compressor,
|
404 |
base_retriever=base_retriever
|
405 |
)
|
|
|
406 |
return retriever_Cohere
|
407 |
|
408 |
|
@@ -418,6 +422,7 @@ def retrieval_blocks(
|
|
418 |
compression_retriever_k=16,
|
419 |
cohere_api_key="***", cohere_model="rerank-multilingual-v2.0", cohere_top_n=8,
|
420 |
):
|
|
|
421 |
"""
|
422 |
Rertieval includes: document loaders, text splitter, vectorstore and retriever.
|
423 |
|
@@ -506,7 +511,7 @@ def retrieval_blocks(
|
|
506 |
print(f"\n{retriever_type} is created successfully!")
|
507 |
print(f"Relevant documents will be retrieved from vectorstore ({vectorstore_name}) which uses {LLM_service} embeddings \
|
508 |
and has {vector_store._collection.count()} chunks.")
|
509 |
-
|
510 |
return retriever
|
511 |
except Exception as e:
|
512 |
print(e)
|
@@ -652,6 +657,7 @@ def answer_template(language="english"):
|
|
652 |
</context>
|
653 |
|
654 |
Question: {{question}}
|
|
|
655 |
Language: {language}.
|
656 |
|
657 |
"""
|
|
|
320 |
k: number of documents to return (Default: 4)
|
321 |
score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
|
322 |
"""
|
323 |
+
print("vector_backed retriever started")
|
324 |
search_kwargs={}
|
325 |
if k is not None:
|
326 |
search_kwargs['k'] = k
|
|
|
331 |
search_type=search_type,
|
332 |
search_kwargs=search_kwargs
|
333 |
)
|
334 |
+
print("vector_backed retriever done")
|
335 |
return retriever
|
336 |
|
337 |
# similarity search
|
|
|
355 |
k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16.
|
356 |
similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None.
|
357 |
"""
|
358 |
+
print("compression retriever started")
|
359 |
# 1. splitting documents into smaller chunks
|
360 |
splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ")
|
361 |
|
|
|
380 |
base_compressor=pipeline_compressor,
|
381 |
base_retriever=base_retriever
|
382 |
)
|
383 |
+
print("compression retriever done")
|
384 |
return compression_retriever
|
385 |
|
386 |
def CohereRerank_retriever(
|
387 |
+
print("cohere rerank started")
|
388 |
base_retriever,
|
389 |
cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
|
390 |
):
|
|
|
406 |
base_compressor=compressor,
|
407 |
base_retriever=base_retriever
|
408 |
)
|
409 |
+
print("cohere rerank done")
|
410 |
return retriever_Cohere
|
411 |
|
412 |
|
|
|
422 |
compression_retriever_k=16,
|
423 |
cohere_api_key="***", cohere_model="rerank-multilingual-v2.0", cohere_top_n=8,
|
424 |
):
|
425 |
+
print("retrieval blocks started")
|
426 |
"""
|
427 |
Rertieval includes: document loaders, text splitter, vectorstore and retriever.
|
428 |
|
|
|
511 |
print(f"\n{retriever_type} is created successfully!")
|
512 |
print(f"Relevant documents will be retrieved from vectorstore ({vectorstore_name}) which uses {LLM_service} embeddings \
|
513 |
and has {vector_store._collection.count()} chunks.")
|
514 |
+
print("retrieval blocks done")
|
515 |
return retriever
|
516 |
except Exception as e:
|
517 |
print(e)
|
|
|
657 |
</context>
|
658 |
|
659 |
Question: {{question}}
|
660 |
+
Question: {question}
|
661 |
Language: {language}.
|
662 |
|
663 |
"""
|