Spaces:
Sleeping
Sleeping
File size: 8,300 Bytes
505e588 7c56890 2e4fe6f bfaa73f 2e4fe6f 33b8d04 1031a9d 33b8d04 bfaa73f 2e4fe6f 3b3340f bfaa73f 3b3340f 33b8d04 3b3340f 33b8d04 3b3340f 33b8d04 3b3340f 33b8d04 3b3340f bfaa73f 3b3340f bfaa73f 3b3340f bfaa73f 3b3340f ac31900 267af0c 39f662b bb77715 2e4fe6f 8e02856 3b3340f 8e02856 bfaa73f f26ae96 505e588 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# import gradio as gr
# import json
# from typing import List, Dict
# from langchain_openai.embeddings import OpenAIEmbeddings
# from langchain_chroma import Chroma
# from langchain.retrievers.multi_query import MultiQueryRetriever
# from langchain.chains import ConversationalRetrievalChain
# from langchain.memory import ConversationBufferMemory
# from langchain_openai import ChatOpenAI
# from langchain.schema import Document
# from langchain.chains import LLMChain
# from langchain.chains.question_answering import load_qa_chain
# from langchain.prompts import PromptTemplate
# import os
# openai_api_key = os.getenv("OPENAI_API_KEY")
# vectorstore = None
# llm = None
# qa_instance = None
# chat_history = []
# def load_embeddings_from_json(json_file_path: str):
# with open(json_file_path, 'r') as f:
# data = json.load(f)
# chunks = [item['chunk'] for item in data]
# embeddings = [item['embeddings'] for item in data]
# ids = [item.get('id', str(index)) for index, item in enumerate(data)]
# return chunks, embeddings, ids
# def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
# global vectorstore, llm, qa_instance
# if vectorstore is None:
# chunks, embeddings, ids = load_embeddings_from_json(json_file_path)
# vectorstore = Chroma(
# collection_name="my_collection",
# persist_directory=None,
# embedding_function=OpenAIEmbeddings(api_key=openai_api_key)
# )
# vectorstore._client._add(
# collection_id=vectorstore._collection.id,
# ids=ids,
# embeddings=embeddings,
# metadatas=[{"source": "json"} for _ in chunks],
# documents=chunks,
# )
# if llm is None:
# llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
# retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
# standalone question without changing the content in given question.
# Chat History:
# {chat_history}
# Follow Up Input: {question}
# Standalone question:"""
# condense_question_prompt_template = PromptTemplate.from_template(_template)
# prompt_template = """You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
# Context:
# {context}
# Question: {question}
# Helpful Answer:"""
# qa_prompt = PromptTemplate(
# template=prompt_template, input_variables=["context", "question"]
# )
# question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
# doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
# qa_instance = ConversationalRetrievalChain(
# retriever=retriever,
# question_generator=question_generator,
# combine_docs_chain=doc_chain,
# memory=memory)
# def answer_query(question: str):
# global chat_history
# if qa_instance is None:
# return [("Please initialize the system first.", "")]
# if not question.strip():
# return [("Please enter a question.", "")]
# result = qa_instance({"question": question})
# chat_history.append((question, result['answer']))
# return chat_history
# with gr.Blocks() as demo:
# initialize_chatbot_from_json("embeddings.json", openai_api_key)
# chat_history = []
# chatbot = gr.Chatbot(label="Chatbot")
# question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
# question.submit(answer_query, inputs=[question], outputs=[chatbot])
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
import json
from typing import List, Dict
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.schema import Document
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import os
openai_api_key = os.getenv("OPENAI_API_KEY")
def load_embeddings_from_json(json_file_path: str):
with open(json_file_path, 'r') as f:
data = json.load(f)
chunks = [item['chunk'] for item in data]
embeddings = [item['embeddings'] for item in data]
ids = [item.get('id', str(index)) for index, item in enumerate(data)]
return chunks, embeddings, ids
def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
chunks, embeddings, ids = load_embeddings_from_json(json_file_path)
vectorstore = Chroma(
collection_name="my_collection",
persist_directory=None,
embedding_function=OpenAIEmbeddings(api_key=openai_api_key)
)
vectorstore._client._add(
collection_id=vectorstore._collection.id,
ids=ids,
embeddings=embeddings,
metadatas=[{"source": "json"} for _ in chunks],
documents=chunks,
)
llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
condense_question_prompt_template = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question without changing the content in given question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")
qa_prompt = PromptTemplate(
template="""You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
Context:
{context}
Question: {question}
Helpful Answer:""",
input_variables=["context", "question"]
)
question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
qa_instance = ConversationalRetrievalChain(
retriever=retriever,
question_generator=question_generator,
combine_docs_chain=doc_chain,
memory=memory
)
return qa_instance
def answer_query(question: str, chat_history):
if not question.strip():
return "Please enter a question.", chat_history
qa_instance = initialize_chatbot_from_json("embeddings.json", openai_api_key)
result = qa_instance({"question": question, "chat_history": chat_history})
chat_history.append((question, result['answer']))
return "", chat_history
with gr.Blocks() as demo:
gr.Markdown(
"""
# AI Book Agent!
Ask any UPSC relevant Query from the NCERT.
""")
chatbot = gr.Chatbot(label="Chatbot")
question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
# answer_button = gr.Button("Get Answer")
question.submit(answer_query, inputs=[question, chatbot], outputs=[question, chatbot])
if __name__ == "__main__":
demo.launch()
|