Spaces:

xarical
/

WoF-RAG-QA-Bot

Sleeping

File size: 7,942 Bytes

import os

import gradio as gr
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import EnsembleRetriever
from langchain_chroma import Chroma
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from langchain_groq import ChatGroq
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Custom ensemble retriever with LLM rephrasing and document filtering
class CustomEnsembleRetriever(EnsembleRetriever):
    def invoke(self, query: str, *args, **kwargs) -> list[Document]:
        """
        Rephrase the query using LLM call and judge the documents returned by the superclass 
        EnsembleRetriever using judge_documents()
        """
        documents = super().invoke(query, *args, **kwargs)

        # Rephrase if applicable
        print("Original question:", query)
        if rephrase:
            rephrased_query = llm.invoke(rephrase_template.format(query=query), {"temperature": 0}).content
            print("Rephrased question:", rephrased_query)
            documents += super().invoke(rephrased_query, *args, **kwargs)

        return self.judge_documents(query, documents)

    def judge_documents(self, query: str, documents: list[Document]) -> list[Document]:
        """
        Filter documents by relevance using LLM call
        """
        if judge:
            docs_str = ""
            for index, doc in enumerate(documents):
                docs_str += f"\n{index}. {doc}"

            filtered_doc_nums = llm.invoke(judge_template.format(query=query, docs_to_judge=docs_str), {"temperature": 0}).content.split()

            if not filtered_doc_nums or filtered_doc_nums[0] == "0":
                documents = [Document(page_content="No documents found!")]
            else:
                temp = list(documents)
                documents = []
                for num in filtered_doc_nums:
                    try:
                      documents.append(temp[int(num)-1])
                    except ValueError:
                      pass

        return documents

# Prompts
system_prompt = """<|start_header_id|>user<|end_header_id|>
You are an assistant for discussing Wings of Fire using the provided context.
Your response should be under 250 tokens.
You are given the extracted parts of a long document and a question. Anwser the question as thoroughly as possible with the tone and form of an objective analytical essay.
You must use only the provided context to answer the question. Do not make up an answer.
WHEN ANSWERING THE QUESTION, DO NOT MENTION THE CONTEXT.
If the user is asking a question and there are no relevant documents, say that you don't know.
If the user is not asking a question, you may discuss Wings of Fire with them only.
You can only discuss Wings of Fire. If the user is not talking about Wings of Fire, inform them that you can only discuss Wings of Fire and suggest potential Wings of Fire related questions that they can ask instead.
Question: {question}
Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

rephrase_template = """Rephrase this query to be more easily searchable in a google search. Do not add things to the query. Just rephrase the query to be clearer and simpler.
DO NOT PREFACE YOUR MESSAGE WITH ANYTHING. DO NOT RESPOND WITH ANYTHING EXCEPT THE REPHRASED QUERY.

Example query: whats morrowseer like and what does he want
Response: What is Morrowseer's personality and what are his motivations?

Query to process: {query}
"""

judge_template = """Provide the numbers of the documents that are EXTREMELY LIKELY to be relevant to the given query. Seperate the numbers by spaces.
DO NOT PREFACE YOUR MESSAGE WITH ANYTHING. DO NOT RESPOND WITH ANYTHING EXCEPT THE NUMBERS.
If there are no relevant documents, then respond with a 0.
If there is an exact duplicate of a document, only return the number of one of them.

Example query: What is Morrowseer's personality and what are his motivations?
Example documents:
1. Morrowseer is a NightWing antagonist in the book series Wings of Fire
2. the NightWings plotted to take over the rainforest
3. charming and charismatic. darkstalker's traits allowed him to make friends and allies easily
4. The NightWings created a false prophecy in order to help them take over the rainforest
5. in the ancient days, NightWings were known to be wise, spreading knowledge across the continent
6. Morrowseer was scheming, as he was involved in creating the false prophecy
Response: 1 2 4 6

Query to process: {query}
Documents: {docs_to_judge}
"""

# Load data from chromadb
print("Loading data from chromadb...")
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=os.environ['GEMINI'])
vectorstore = Chroma(embedding_function=embeddings, persist_directory="./chromadb")

# Instantiate model
llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0.7,
    api_key=os.environ['GROQ'],
    model_kwargs={"top_p": 0.65}
)

# Get documents and instantiate BM25Retriever
docs = vectorstore.get()["documents"]
bm25_retriever = BM25Retriever.from_texts(docs)

# Generate chatbot response based on user question
def chatbot_response(question, history, prompt_template, bm25_k, vs_k, _rephrase, _judge):
    global judge
    judge = _judge
    global rephrase
    rephrase = _rephrase

    # Set k values and instantiate EnsembleRetriever
    bm25_retriever.k = bm25_k
    vs_retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": vs_k})
    retriever = CustomEnsembleRetriever(retrievers=[bm25_retriever, vs_retriever], weights=[0.5, 0.5])  

    # Prompt template
    prompt = PromptTemplate(
        input_variables=["context", "question"],
        template=prompt_template
    )

    # Instantiate and invoke retriever and chain
    qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever, chain_type_kwargs={"prompt": prompt}, return_source_documents=True)
    response = qa_chain.invoke({"query": question})

    # Print debug
    print("Response:", response["result"], "\n\n")
    print("Rephrase?", rephrase, "\nJudge?", judge)
    for index, document in enumerate(response["source_documents"]):
        try:
            print(f'*{str(index+1)}. {document.metadata["source"]}*')
        except:
            print(f'*(metadata not found)*')
        print(f'Quote: "{document.page_content}"\n\n')

    return response["result"]

# Instantiate and start the demo
print("Starting gradio...")
demo = gr.ChatInterface(
    chatbot_response,
    title="🐲 WoF RAG Q&A Bot",
    description="A Llama3 8b Q&A bot powered by Groq, using RAG (Retrieval Augmented Generation) on documents from the Wings of Fire wiki. It utilizes LLMs to rephrase the user's query and judge and filter retrieved documents for relevance. Note that this is just a demo; the bot knows a decent amount but is still prone to hallucination or saying that it doesn't know. It performs best with Q&A and analyzing canon characters or events. If responses are unsatisfactory, try tweaking the values in the additional inputs section at the bottom.",
    additional_inputs=[
        gr.Textbox(value=system_prompt, label="System message"),
        gr.Slider(minimum=1, maximum=4, value=3, step=1, label="Number of documents to retrieve for bm25"),
        gr.Slider(minimum=1, maximum=4, value=3, step=1, label="Number of documents to retrieve for vectorstore similarity"),
        gr.Checkbox(label="Rephrase query?", value=True),
        gr.Checkbox(label="Judge returned documents?", value=True),
    ],
    examples=[
        ["What is Wings of Fire"],
        ["What is the dragonet prophecy"],
        ["Who is Queen Scarlet and what are her motivations"],
        ["Write an essay about the role does Qibli plays in Wings of Fire"],
        ["Who is Foxglove"]
    ],
    cache_examples=False,
)
demo.launch(show_api=False)