from langchain_mongodb import MongoDBAtlasVectorSearch from langchain_community.embeddings import HuggingFaceEmbeddings from pymongo import MongoClient from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from langchain.prompts import ChatPromptTemplate from langchain_community.llms import HuggingFaceEndpoint import os config= { 'MONGODB_CONN_STRING': os.getenv('MONGODB_CONN_STRING'), 'HUGGINGFACEHUB_API_TOKEN': os.getenv('HUGGINGFACEHUB_API_TOKEN'), 'DB_NAME':os.getenv('DB_NAME'), 'VECTOR_SEARCH_INDEX':os.getenv('VECTOR_SEARCH_INDEX'), 'PASSWORD_DB': os.getenv('PASSWORD_DB') } client = MongoClient(config['MONGODB_CONN_STRING']) embeddings = HuggingFaceEmbeddings(model_name= "intfloat/e5-large-v2") llm_model = HuggingFaceEndpoint(repo_id='mistralai/Mistral-7B-Instruct-v0.2', huggingfacehub_api_token=config['HUGGINGFACEHUB_API_TOKEN'], temperature=0.3) template = """ [INST] Instruction:Your are a helpful chatbot who can answer all data science ,anime and manga questions. You have to follow these rules strictly while answering the question based on context: 1. Do not use the word context or based on context which is provided in answers. 2. If there is no context you have to answer in 128 words not more than that. 3. context are in series format so make your own best pattern based on that give answer. [/INST] context: {context} ### QUESTION: {question} [/INST] """ prompt = ChatPromptTemplate.from_template(template=template) parser = StrOutputParser() def get_all_collections(): database = client[config['DB_NAME']] names = database.list_collection_names() coll_dict = {} for name in names: coll_dict[name] = ' '.join(str(name).capitalize().split('_')) return coll_dict class VECTORDB_STORE: def __init__(self, coll_name): collection_name = self.get_collection_name(coll_name) collection = client[config['DB_NAME']][collection_name] self.vectordb_store = MongoDBAtlasVectorSearch(collection =collection, embedding= embeddings, index_name= config['VECTOR_SEARCH_INDEX']) @staticmethod def get_collection_name(coll_name): for key, value in get_all_collections().items(): if coll_name == value: return key return None def chain(self): retriever = self.vectordb_store.as_retriever(search_kwargs={"k": 10}) chain = {'context': retriever, 'question': RunnablePassthrough()} | prompt | llm_model | parser return chain