from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import CTransformers
from langchain.chains import RetrievalQA
import chainlit as cl

#defining the path for the vector DB
DB_FAISS_PATH = 'vectorstore/db_faiss'

#defining the baseline prompt for the model
custom_prompt_template = """Please use the provided information to respond to the user's inquiry. If you're unsure of the answer, it's best to acknowledge that you don't know rather than providing inaccurate information.

Context: {context}
Question: {question}

Please provide only the helpful response below and refrain from including any additional information.
Helpful response:
"""

def set_custom_prompt():
    """
    Prompt template
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

#passing the promt to the base model to retrieve an answer.
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=db.as_retriever(search_kwargs={'k': 2}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt}
                                       )
    return qa_chain

#loading the model defining the context lengh 
def load_llm():
    llm = CTransformers(
        model = "llama-2-7b-chat.ggmlv3.q8_0.bin",
        model_type="llama",
        max_new_tokens = 700,
        temperature = 0.5
    )
    return llm

#Using hugging face embedings leveraging GPU compute(cuda)
def qa_bot():
    embedding = HuggingFaceEmbeddings(model_name= 'sentence-transformers/all-MiniLM-L6-v2',
                                    model_kwargs = {'device' : 'cuda'})
    
    #loading the generated embedings
    
    db = FAISS.load_local(DB_FAISS_PATH, embedding,
                           allow_dangerous_deserialization=True)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)

    return qa

#this fuction returns an answer to the user 

def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response


#UI creationg with chainlit
@cl.on_chat_start
async def start():
    chain = qa_bot()
    msg = cl.Message(content="Starting...")
    await msg.send()
    msg.content = "Hi, Welcome to your Medical Research Assistant. How may I help?"
    await msg.update()

    cl.user_session.set("chain", chain)

#this fuction creates the interactive flow Q&A with the user    

@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain") 
    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
    )
    cb.answer_reached = True
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["result"]
    sources = res["source_documents"]

    if sources:
        answer += f"\nSources:" + str(sources)
    else:
        answer += "\nNo sources found"

    await cl.Message(content=answer).send()