samlonka commited on
Commit
3545498
1 Parent(s): 0e51c38

'packages_changed'

Browse files
Files changed (1) hide show
  1. function_tools.py +0 -86
function_tools.py CHANGED
@@ -500,92 +500,6 @@ vedamantra_summary_tool =StructuredTool.from_function(
500
  return_direct=False
501
  )
502
 
503
- ## vector tool
504
- import os
505
- import time
506
- import pickle
507
- import streamlit as st
508
- from dotenv import load_dotenv
509
- from pinecone import Pinecone, ServerlessSpec
510
- from utils import load_pickle, initialize_embedding_model
511
- from langchain_community.retrievers import BM25Retriever
512
- from langchain_pinecone import PineconeVectorStore
513
- from langchain.retrievers import EnsembleRetriever
514
- from langchain.tools.retriever import create_retriever_tool
515
-
516
-
517
-
518
- # Load .env file
519
- load_dotenv()
520
-
521
- # Constants
522
- INDEX_NAME = "veda-index-v2"
523
- MODEL_NAME = "BAAI/bge-large-en-v1.5"
524
- DOCS_DIRECTORY = r"Docs\ramana_docs_ids.pkl"
525
- CURRENT_DIRECTORY = os.getcwd()
526
-
527
-
528
- # Initialize Pinecone client
529
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY_SAM")
530
- pc = Pinecone(api_key=PINECONE_API_KEY)
531
-
532
- #@st.cache_resource
533
- def create_or_load_index():
534
- # Check if index already exists
535
- if INDEX_NAME not in pc.list_indexes().names():
536
- # Create index if it does not exist
537
- pc.create_index(
538
- INDEX_NAME,
539
- dimension=1024,
540
- metric='dotproduct',
541
- spec=ServerlessSpec(
542
- cloud="aws",
543
- region="us-east-1"
544
- )
545
- )
546
- # Wait for index to be initialized
547
- while not pc.describe_index(INDEX_NAME).status['ready']:
548
- time.sleep(1)
549
- # Connect to index
550
- return pc.Index(INDEX_NAME)
551
-
552
- # Load documents
553
- docs = load_pickle(DOCS_DIRECTORY)
554
- # Initialize embedding model
555
- embedding = initialize_embedding_model(MODEL_NAME)
556
- # Create or load index
557
- index = create_or_load_index()
558
-
559
- # Initialize BM25 retriever
560
- bm25_retriever = BM25Retriever.from_texts(
561
- [text['document'].page_content for text in docs],
562
- metadatas=[text['document'].metadata for text in docs]
563
- )
564
- bm25_retriever.k = 2
565
-
566
- # Switch back to normal index for LangChain
567
- vector_store = PineconeVectorStore(index, embedding)
568
- retriever = vector_store.as_retriever(search_type="mmr")
569
-
570
- # Initialize the ensemble retriever
571
- ensemble_retriever = EnsembleRetriever(
572
- retrievers=[bm25_retriever, retriever], weights=[0.2, 0.8]
573
- )
574
-
575
- class VectorResponse(BaseModel):
576
- query:str = Field(description="user query")
577
-
578
- def vector_retrieve(query):
579
- response = retriever.get_relevant_documents(query)
580
- return response
581
-
582
- vector_tool = StructuredTool.from_function(
583
- func = vector_retrieve,
584
- name = "vector_retrieve",
585
- description="Search and return documents related user query from the vector index.",
586
- args_schema=VectorResponse,
587
- return_direct=False
588
- )
589
 
590
  tools_list = [pada_morphological_tool, sql_tool, pada_meaning_tool, pada_word_sense_tool, vedamantra_tool, vedamantra_summary_tool]
591
  #vector_tool,
 
500
  return_direct=False
501
  )
502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
  tools_list = [pada_morphological_tool, sql_tool, pada_meaning_tool, pada_word_sense_tool, vedamantra_tool, vedamantra_summary_tool]
505
  #vector_tool,