File size: 7,942 Bytes
37208a8
 
c5950bb
37208a8
 
890e301
37208a8
890e301
659b551
37208a8
 
 
ff2fd24
659b551
6046930
 
 
 
 
 
 
 
a62e75b
6046930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85776fe
 
 
 
6046930
 
659b551
 
17b67d2
dc2915a
37208a8
c5950bb
944c093
dc2915a
6046930
 
dc2915a
37208a8
 
 
 
659b551
85776fe
659b551
 
 
 
 
 
 
a62e75b
85776fe
659b551
a62e75b
659b551
6046930
659b551
6046930
 
765e2ba
6046930
 
 
765e2ba
659b551
 
 
 
 
 
 
 
 
 
 
 
 
 
76aee50
 
6046930
659b551
 
c5950bb
 
 
659b551
d6c7c8c
659b551
 
a62e75b
 
659b551
 
c5950bb
 
fa3383c
c5950bb
659b551
37208a8
 
 
 
c5950bb
659b551
 
53d040a
c5950bb
a62e75b
 
6046930
659b551
 
 
 
 
 
c5950bb
37208a8
 
659b551
 
37208a8
 
e57d361
6046930
37208a8
 
5c0f43a
c9bd829
b2b23b0
944c093
37208a8
6046930
85776fe
beaf8fa
85776fe
 
beaf8fa
6046930
ec85955
37208a8
ae8c91e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import os

import gradio as gr
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import EnsembleRetriever
from langchain_chroma import Chroma
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from langchain_groq import ChatGroq
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Custom ensemble retriever with LLM rephrasing and document filtering
class CustomEnsembleRetriever(EnsembleRetriever):
    def invoke(self, query: str, *args, **kwargs) -> list[Document]:
        """
        Rephrase the query using LLM call and judge the documents returned by the superclass 
        EnsembleRetriever using judge_documents()
        """
        documents = super().invoke(query, *args, **kwargs)

        # Rephrase if applicable
        print("Original question:", query)
        if rephrase:
            rephrased_query = llm.invoke(rephrase_template.format(query=query), {"temperature": 0}).content
            print("Rephrased question:", rephrased_query)
            documents += super().invoke(rephrased_query, *args, **kwargs)

        return self.judge_documents(query, documents)

    def judge_documents(self, query: str, documents: list[Document]) -> list[Document]:
        """
        Filter documents by relevance using LLM call
        """
        if judge:
            docs_str = ""
            for index, doc in enumerate(documents):
                docs_str += f"\n{index}. {doc}"

            filtered_doc_nums = llm.invoke(judge_template.format(query=query, docs_to_judge=docs_str), {"temperature": 0}).content.split()

            if not filtered_doc_nums or filtered_doc_nums[0] == "0":
                documents = [Document(page_content="No documents found!")]
            else:
                temp = list(documents)
                documents = []
                for num in filtered_doc_nums:
                    try:
                      documents.append(temp[int(num)-1])
                    except ValueError:
                      pass

        return documents

# Prompts
system_prompt = """<|start_header_id|>user<|end_header_id|>
You are an assistant for discussing Wings of Fire using the provided context.
Your response should be under 250 tokens.
You are given the extracted parts of a long document and a question. Anwser the question as thoroughly as possible with the tone and form of an objective analytical essay.
You must use only the provided context to answer the question. Do not make up an answer.
WHEN ANSWERING THE QUESTION, DO NOT MENTION THE CONTEXT.
If the user is asking a question and there are no relevant documents, say that you don't know.
If the user is not asking a question, you may discuss Wings of Fire with them only.
You can only discuss Wings of Fire. If the user is not talking about Wings of Fire, inform them that you can only discuss Wings of Fire and suggest potential Wings of Fire related questions that they can ask instead.
Question: {question}
Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

rephrase_template = """Rephrase this query to be more easily searchable in a google search. Do not add things to the query. Just rephrase the query to be clearer and simpler.
DO NOT PREFACE YOUR MESSAGE WITH ANYTHING. DO NOT RESPOND WITH ANYTHING EXCEPT THE REPHRASED QUERY.

Example query: whats morrowseer like and what does he want
Response: What is Morrowseer's personality and what are his motivations?

Query to process: {query}
"""

judge_template = """Provide the numbers of the documents that are EXTREMELY LIKELY to be relevant to the given query. Seperate the numbers by spaces.
DO NOT PREFACE YOUR MESSAGE WITH ANYTHING. DO NOT RESPOND WITH ANYTHING EXCEPT THE NUMBERS.
If there are no relevant documents, then respond with a 0.
If there is an exact duplicate of a document, only return the number of one of them.

Example query: What is Morrowseer's personality and what are his motivations?
Example documents:
1. Morrowseer is a NightWing antagonist in the book series Wings of Fire
2. the NightWings plotted to take over the rainforest
3. charming and charismatic. darkstalker's traits allowed him to make friends and allies easily
4. The NightWings created a false prophecy in order to help them take over the rainforest
5. in the ancient days, NightWings were known to be wise, spreading knowledge across the continent
6. Morrowseer was scheming, as he was involved in creating the false prophecy
Response: 1 2 4 6

Query to process: {query}
Documents: {docs_to_judge}
"""

# Load data from chromadb
print("Loading data from chromadb...")
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=os.environ['GEMINI'])
vectorstore = Chroma(embedding_function=embeddings, persist_directory="./chromadb")

# Instantiate model
llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0.7,
    api_key=os.environ['GROQ'],
    model_kwargs={"top_p": 0.65}
)

# Get documents and instantiate BM25Retriever
docs = vectorstore.get()["documents"]
bm25_retriever = BM25Retriever.from_texts(docs)

# Generate chatbot response based on user question
def chatbot_response(question, history, prompt_template, bm25_k, vs_k, _rephrase, _judge):
    global judge
    judge = _judge
    global rephrase
    rephrase = _rephrase

    # Set k values and instantiate EnsembleRetriever
    bm25_retriever.k = bm25_k
    vs_retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": vs_k})
    retriever = CustomEnsembleRetriever(retrievers=[bm25_retriever, vs_retriever], weights=[0.5, 0.5])  

    # Prompt template
    prompt = PromptTemplate(
        input_variables=["context", "question"],
        template=prompt_template
    )

    # Instantiate and invoke retriever and chain
    qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever, chain_type_kwargs={"prompt": prompt}, return_source_documents=True)
    response = qa_chain.invoke({"query": question})

    # Print debug
    print("Response:", response["result"], "\n\n")
    print("Rephrase?", rephrase, "\nJudge?", judge)
    for index, document in enumerate(response["source_documents"]):
        try:
            print(f'*{str(index+1)}. {document.metadata["source"]}*')
        except:
            print(f'*(metadata not found)*')
        print(f'Quote: "{document.page_content}"\n\n')

    return response["result"]

# Instantiate and start the demo
print("Starting gradio...")
demo = gr.ChatInterface(
    chatbot_response,
    title="🐲 WoF RAG Q&A Bot",
    description="A Llama3 8b Q&A bot powered by Groq, using RAG (Retrieval Augmented Generation) on documents from the Wings of Fire wiki. It utilizes LLMs to rephrase the user's query and judge and filter retrieved documents for relevance. Note that this is just a demo; the bot knows a decent amount but is still prone to hallucination or saying that it doesn't know. It performs best with Q&A and analyzing canon characters or events. If responses are unsatisfactory, try tweaking the values in the additional inputs section at the bottom.",
    additional_inputs=[
        gr.Textbox(value=system_prompt, label="System message"),
        gr.Slider(minimum=1, maximum=4, value=3, step=1, label="Number of documents to retrieve for bm25"),
        gr.Slider(minimum=1, maximum=4, value=3, step=1, label="Number of documents to retrieve for vectorstore similarity"),
        gr.Checkbox(label="Rephrase query?", value=True),
        gr.Checkbox(label="Judge returned documents?", value=True),
    ],
    examples=[
        ["What is Wings of Fire"],
        ["What is the dragonet prophecy"],
        ["Who is Queen Scarlet and what are her motivations"],
        ["Write an essay about the role does Qibli plays in Wings of Fire"],
        ["Who is Foxglove"]
    ],
    cache_examples=False,
)
demo.launch(show_api=False)