File size: 8,300 Bytes
505e588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c56890
2e4fe6f
 
bfaa73f
 
 
 
 
 
2e4fe6f
33b8d04
 
 
 
1031a9d
33b8d04
bfaa73f
2e4fe6f
 
 
 
 
 
 
 
 
3b3340f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfaa73f
 
3b3340f
 
33b8d04
 
 
3b3340f
 
33b8d04
3b3340f
 
 
 
 
 
33b8d04
3b3340f
33b8d04
 
 
3b3340f
 
 
 
 
 
bfaa73f
3b3340f
bfaa73f
3b3340f
 
 
bfaa73f
3b3340f
ac31900
267af0c
39f662b
 
 
 
 
bb77715
2e4fe6f
8e02856
3b3340f
8e02856
bfaa73f
f26ae96
505e588
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# import gradio as gr
# import json
# from typing import List, Dict
# from langchain_openai.embeddings import OpenAIEmbeddings
# from langchain_chroma import Chroma
# from langchain.retrievers.multi_query import MultiQueryRetriever
# from langchain.chains import ConversationalRetrievalChain
# from langchain.memory import ConversationBufferMemory
# from langchain_openai import ChatOpenAI
# from langchain.schema import Document
# from langchain.chains import LLMChain
# from langchain.chains.question_answering import load_qa_chain
# from langchain.prompts import PromptTemplate
# import os

# openai_api_key = os.getenv("OPENAI_API_KEY")

# vectorstore = None
# llm = None
# qa_instance = None
# chat_history = []

# def load_embeddings_from_json(json_file_path: str):
#     with open(json_file_path, 'r') as f:
#         data = json.load(f)
#     chunks = [item['chunk'] for item in data]
#     embeddings = [item['embeddings'] for item in data]
#     ids = [item.get('id', str(index)) for index, item in enumerate(data)]
#     return chunks, embeddings, ids

# def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
#     global vectorstore, llm, qa_instance
#     if vectorstore is None:
#         chunks, embeddings, ids = load_embeddings_from_json(json_file_path)
#         vectorstore = Chroma(
#             collection_name="my_collection",
#             persist_directory=None,
#             embedding_function=OpenAIEmbeddings(api_key=openai_api_key)
#         )
#         vectorstore._client._add(
#             collection_id=vectorstore._collection.id,
#             ids=ids,
#             embeddings=embeddings,
#             metadatas=[{"source": "json"} for _ in chunks],
#             documents=chunks,
#         )
#     if llm is None:
#         llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
#     retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
#     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
#     _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a 
#     standalone question without changing the content in given question.
#     Chat History:
#     {chat_history}
#     Follow Up Input: {question}
#     Standalone question:"""
#     condense_question_prompt_template = PromptTemplate.from_template(_template)
#     prompt_template = """You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
#     Context:
#     {context}
#     Question: {question}
#     Helpful Answer:"""
#     qa_prompt = PromptTemplate(
#     template=prompt_template, input_variables=["context", "question"]
#     )
#     question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
#     doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
#     qa_instance = ConversationalRetrievalChain(
#     retriever=retriever,
#     question_generator=question_generator,
#     combine_docs_chain=doc_chain,
#     memory=memory)

# def answer_query(question: str):
#     global chat_history
#     if qa_instance is None:
#         return [("Please initialize the system first.", "")]
#     if not question.strip():
#         return [("Please enter a question.", "")]
#     result = qa_instance({"question": question})
#     chat_history.append((question, result['answer']))
#     return chat_history

# with gr.Blocks() as demo:
#     initialize_chatbot_from_json("embeddings.json", openai_api_key)
#     chat_history = []
    
#     chatbot = gr.Chatbot(label="Chatbot")
#     question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
#     question.submit(answer_query, inputs=[question], outputs=[chatbot])

# if __name__ == "__main__":
#     demo.launch()




import gradio as gr
import json
from typing import List, Dict
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.schema import Document
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import os

openai_api_key = os.getenv("OPENAI_API_KEY")

def load_embeddings_from_json(json_file_path: str):
    with open(json_file_path, 'r') as f:
        data = json.load(f)
    chunks = [item['chunk'] for item in data]
    embeddings = [item['embeddings'] for item in data]
    ids = [item.get('id', str(index)) for index, item in enumerate(data)]
    return chunks, embeddings, ids

def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
    chunks, embeddings, ids = load_embeddings_from_json(json_file_path)
    vectorstore = Chroma(
        collection_name="my_collection",
        persist_directory=None,
        embedding_function=OpenAIEmbeddings(api_key=openai_api_key)
    )
    vectorstore._client._add(
        collection_id=vectorstore._collection.id,
        ids=ids,
        embeddings=embeddings,
        metadatas=[{"source": "json"} for _ in chunks],
        documents=chunks,
    )

    llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
    retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    condense_question_prompt_template = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question without changing the content in given question.
    Chat History:
    {chat_history}
    Follow Up Input: {question}
    Standalone question:""")
    
    qa_prompt = PromptTemplate(
        template="""You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
        Context:
        {context}
        Question: {question}
        Helpful Answer:""", 
        input_variables=["context", "question"]
    )

    question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
    doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
    qa_instance = ConversationalRetrievalChain(
        retriever=retriever,
        question_generator=question_generator,
        combine_docs_chain=doc_chain,
        memory=memory
    )
    return qa_instance

def answer_query(question: str, chat_history):
    if not question.strip():
        return "Please enter a question.", chat_history
    qa_instance = initialize_chatbot_from_json("embeddings.json", openai_api_key)
    result = qa_instance({"question": question, "chat_history": chat_history})
    chat_history.append((question, result['answer']))
    return "", chat_history

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # AI Book Agent!
    Ask any UPSC relevant Query from the NCERT.
    """)
    chatbot = gr.Chatbot(label="Chatbot")
    question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
    # answer_button = gr.Button("Get Answer")

    question.submit(answer_query, inputs=[question, chatbot], outputs=[question, chatbot])

if __name__ == "__main__":
    demo.launch()