from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader from langchain.prompts import PromptTemplate from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.llms import CTransformers from langchain.chains import RetrievalQA import chainlit as cl #defining the path for the vector DB DB_FAISS_PATH = 'vectorstore/db_faiss' #defining the baseline prompt for the model custom_prompt_template = """Please use the provided information to respond to the user's inquiry. If you're unsure of the answer, it's best to acknowledge that you don't know rather than providing inaccurate information. Context: {context} Question: {question} Please provide only the helpful response below and refrain from including any additional information. Helpful response: """ def set_custom_prompt(): """ Prompt template """ prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question']) return prompt #passing the promt to the base model to retrieve an answer. def retrieval_qa_chain(llm, prompt, db): qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=db.as_retriever(search_kwargs={'k': 2}), return_source_documents=True, chain_type_kwargs={'prompt': prompt} ) return qa_chain #loading the model defining the context lengh def load_llm(): llm = CTransformers( model = "llama-2-7b-chat.ggmlv3.q8_0.bin", model_type="llama", max_new_tokens = 700, temperature = 0.5 ) return llm #Using hugging face embedings leveraging GPU compute(cuda) def qa_bot(): embedding = HuggingFaceEmbeddings(model_name= 'sentence-transformers/all-MiniLM-L6-v2', model_kwargs = {'device' : 'cuda'}) #loading the generated embedings db = FAISS.load_local(DB_FAISS_PATH, embedding, allow_dangerous_deserialization=True) llm = load_llm() qa_prompt = set_custom_prompt() qa = retrieval_qa_chain(llm, qa_prompt, db) return qa #this fuction returns an answer to the user def final_result(query): qa_result = qa_bot() response = qa_result({'query': query}) return response #UI creationg with chainlit @cl.on_chat_start async def start(): chain = qa_bot() msg = cl.Message(content="Starting...") await msg.send() msg.content = "Hi, Welcome to your Medical Research Assistant. How may I help?" await msg.update() cl.user_session.set("chain", chain) #this fuction creates the interactive flow Q&A with the user @cl.on_message async def main(message: cl.Message): chain = cl.user_session.get("chain") cb = cl.AsyncLangchainCallbackHandler( stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] ) cb.answer_reached = True res = await chain.acall(message.content, callbacks=[cb]) answer = res["result"] sources = res["source_documents"] if sources: answer += f"\nSources:" + str(sources) else: answer += "\nNo sources found" await cl.Message(content=answer).send()