Spaces:

itachi-ai
/

Chat-Bot

Sleeping

File size: 2,815 Bytes

from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_community.embeddings import HuggingFaceEmbeddings
from pymongo import MongoClient
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms import HuggingFaceEndpoint

import os

config= {
    'MONGODB_CONN_STRING': os.getenv('MONGODB_CONN_STRING'),  
    'HUGGINGFACEHUB_API_TOKEN': os.getenv('HUGGINGFACEHUB_API_TOKEN'),
    'DB_NAME':os.getenv('DB_NAME'),
    'VECTOR_SEARCH_INDEX':os.getenv('VECTOR_SEARCH_INDEX'),
    'PASSWORD_DB': os.getenv('PASSWORD_DB')
    
}
client = MongoClient(config['MONGODB_CONN_STRING'])
embeddings = HuggingFaceEmbeddings(model_name= "intfloat/e5-large-v2")

llm_model = HuggingFaceEndpoint(repo_id='mistralai/Mistral-7B-Instruct-v0.2',
                                huggingfacehub_api_token=config['HUGGINGFACEHUB_API_TOKEN'],
                                temperature=0.3)

template = """
        <s>[INST] Instruction:Your are a helpful chatbot who can answer all data science ,anime and manga questions.
        You have to follow these rules strictly while answering the question based on context:
        1. Do not use the word context or based on context which is provided in answers.
        2. If there is no context you have to answer in 128 words not more than that.
        3. context are in series format so make your own best pattern based on that give answer.
        [/INST]
        context: 
        {context}</s>
        ### QUESTION:
        {question} [/INST]
         """
prompt = ChatPromptTemplate.from_template(template=template)
parser = StrOutputParser()


def get_all_collections():
    database = client[config['DB_NAME']]
    names = database.list_collection_names()
    coll_dict = {}
    for name in names:
        coll_dict[name] = ' '.join(str(name).capitalize().split('_'))
    return coll_dict
class VECTORDB_STORE:

    def __init__(self, coll_name):
        collection_name = self.get_collection_name(coll_name)
        collection = client[config['DB_NAME']][collection_name]
        self.vectordb_store = MongoDBAtlasVectorSearch(collection =collection,
                                        embedding= embeddings,
                                        index_name= config['VECTOR_SEARCH_INDEX'])
    @staticmethod
    def get_collection_name(coll_name):
        for key, value in get_all_collections().items():
            if coll_name == value:
                return key
        return None

    def chain(self):
        retriever = self.vectordb_store.as_retriever(search_kwargs={"k": 10})
        chain = {'context': retriever, 'question': RunnablePassthrough()} | prompt | llm_model | parser
        return chain