File size: 4,873 Bytes
7c56890
2e4fe6f
 
bfaa73f
 
 
 
 
 
2e4fe6f
33b8d04
 
 
 
1031a9d
33b8d04
bfaa73f
1efbb98
 
 
 
2e4fe6f
 
 
 
 
 
 
 
 
1efbb98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfaa73f
 
3b3340f
 
33b8d04
 
 
3b3340f
 
33b8d04
cfab194
3b3340f
 
 
 
 
33b8d04
3b3340f
33b8d04
 
 
3b3340f
 
 
 
 
 
bfaa73f
3b3340f
bfaa73f
3b3340f
 
 
bfaa73f
3b3340f
ac31900
267af0c
39f662b
 
 
 
 
bb77715
2e4fe6f
8e02856
3b3340f
8e02856
bfaa73f
f26ae96
505e588
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import json
from typing import List, Dict
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.schema import Document
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import os

openai_api_key = os.getenv("OPENAI_API_KEY")

vectorstore = None
llm = None
qa_instance = None

def load_embeddings_from_json(json_file_path: str):
    with open(json_file_path, 'r') as f:
        data = json.load(f)
    chunks = [item['chunk'] for item in data]
    embeddings = [item['embeddings'] for item in data]
    ids = [item.get('id', str(index)) for index, item in enumerate(data)]
    return chunks, embeddings, ids

def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
    global vectorstore, llm, qa_instance
    if vectorstore is None:
        chunks, embeddings, ids = load_embeddings_from_json(json_file_path)
        vectorstore = Chroma(
            collection_name="my_collection",
            persist_directory=None,
            embedding_function=OpenAIEmbeddings(api_key=openai_api_key)
        )
        vectorstore._client._add(
            collection_id=vectorstore._collection.id,
            ids=ids,
            embeddings=embeddings,
            metadatas=[{"source": "json"} for _ in chunks],
            documents=chunks,
        )
    if llm is None:
        llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
    retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    condense_question_prompt_template = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question without changing the content in given question.
    Chat History:
    {chat_history}
    Follow Up Input: {question}
    Standalone question:""")
    
    qa_prompt = PromptTemplate(
        template="""You are a highly informative and helpful Assiatant for Book QA System specialized in providing information related to the ["What is Democracy? & Why Democracy?"] Book given and strictly within the 'Context', But if Query is related to general knowledge related to UPSC Exam (Ensure you only answer questions that are imformative related only to : Historical-Analytical, Geographical-Environmental,Political-Governance, International-Relations, Economic-Developmental, Technological-Innovative, Ethical-Philosophical, Current-Affairs & Specialized-Academic), then reply "Out of the book, but searching from web:" and after this answer the Query. But if the Question is totally out of given types the reply "Not related to the Book & UPSC", do not provide an answer. If you fell some wrong spellings, try to correct the spellings. For example: user can write "Madam Linda" or "Madam Lindog", but in the book's comtext the chrarcter is "Madam Lyngdoh". Try Correcting these problems. Never tell anything about 'Context'. Dont use unnecessary lines! Also be helpful and caring in nature. Always try to engage the user by chating in a Assisting mode, don't be very straight forward.
        Context:
        {context}
        Question: {question}
        Helpful Answer:""", 
        input_variables=["context", "question"]
    )

    question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
    doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
    qa_instance = ConversationalRetrievalChain(
        retriever=retriever,
        question_generator=question_generator,
        combine_docs_chain=doc_chain,
        memory=memory
    )
    return qa_instance

def answer_query(question: str, chat_history):
    if not question.strip():
        return "Please enter a question.", chat_history
    qa_instance = initialize_chatbot_from_json("embeddings.json", openai_api_key)
    result = qa_instance({"question": question, "chat_history": chat_history})
    chat_history.append((question, result['answer']))
    return "", chat_history

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # AI Book Agent!
    Ask any UPSC relevant Query from the NCERT.
    """)
    chatbot = gr.Chatbot(label="Chatbot")
    question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
    # answer_button = gr.Button("Get Answer")

    question.submit(answer_query, inputs=[question, chatbot], outputs=[question, chatbot])

if __name__ == "__main__":
    demo.launch()