Spaces:

Gaurav-2273
/

chat-bot

Sleeping

App Files Files Community

Gaurav-2273 commited on Jul 9

Commit

33b8d04

•

1 Parent(s): 267af0c

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -90

app.py CHANGED Viewed

@@ -1,83 +1,3 @@
-# import gradio as gr
-# import fitz  # PyMuPDF
-# import re
-# from langchain_openai.embeddings import OpenAIEmbeddings
-# from langchain_chroma import Chroma
-# from langchain.retrievers.multi_query import MultiQueryRetriever
-# from langchain.chains import ConversationalRetrievalChain
-# from langchain.memory import ConversationBufferMemory
-# from langchain_openai import ChatOpenAI
-# from langchain_experimental.text_splitter import SemanticChunker
-# import os
-# openai_api_key = os.getenv("OPENAI_API_KEY")
-# vectorstore = None
-# llm = None
-# qa_instance = None
-# chat_history = []  # Global chat history
-# def extract_text_from_pdf(pdf_bytes):
-#     document = fitz.open("pdf", pdf_bytes)
-#     text = ""
-#     for page_num in range(len(document)):
-#         page = document.load_page(page_num)
-#         text += page.get_text()
-#     document.close()
-#     return text
-# def clean_text(text):
-#     cleaned_text = re.sub(r'\s+', ' ', text)
-#     cleaned_text = re.sub(r'(.)\1{2,}', r'\1', cleaned_text)
-#     cleaned_text = re.sub(r'\b(\w+)\b(?:\s+\1\b)+', r'\1', cleaned_text)
-#     return cleaned_text.strip()
-# def initialize_chatbot(cleaned_text, openai_api_key):
-#     global vectorstore, llm, qa_instance
-#     if vectorstore is None:  # Only create embeddings and Chroma once
-#         embeddings = OpenAIEmbeddings(api_key=openai_api_key)
-#         text_splitter = SemanticChunker(embeddings)
-#         docs = text_splitter.create_documents([cleaned_text])
-#         vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings)
-#     if llm is None:
-#         llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
-#     retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
-#     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-#     qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
-# def setup_qa_system(pdf_file):
-#     global chat_history
-#     if pdf_file is None:
-#         return [("Please upload a PDF file.", "")]
-#     extracted_text = extract_text_from_pdf(pdf_file)
-#     cleaned_text = clean_text(extracted_text)
-#     initialize_chatbot(cleaned_text, openai_api_key)
-#     chat_history = [("Chatbot initialized. Please ask a question.", "")]
-#     return chat_history
-# def answer_query(question):
-#     global chat_history
-#     if qa_instance is None:
-#         return [("Please upload a PDF and initialize the system first.", "")]
-#     if not question.strip():
-#         return [("Please enter a question.", "")]
-#     result = qa_instance({"question": question})
-#     chat_history.append((question, result['answer']))
-#     return chat_history
-# with gr.Blocks() as demo:
-#     upload = gr.File(label="Upload PDF", type="binary", file_types=["pdf"])
-#     chatbot = gr.Chatbot(label="Chatbot")
-#     question = gr.Textbox(label="Ask a question", placeholder="Type your question after uploading PDF...")
-#     upload.change(setup_qa_system, inputs=[upload], outputs=[chatbot])
-#     question.submit(answer_query, inputs=[question], outputs=[chatbot])
-# if __name__ == "__main__":
-#     demo.launch()
 import gradio as gr
 import json
 from typing import List, Dict
@@ -88,8 +8,12 @@ from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain_openai import ChatOpenAI
 from langchain.schema import Document
-openai_api_key = "sk-proj-bxh8lX8T6EoQaDWm2cljT3BlbkFJylU5bVGc2eQxB8WCP1Ub"
 vectorstore = None
 llm = None
@@ -124,7 +48,28 @@ def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
         llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
     retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-    qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
 def answer_query(question: str):
     global chat_history
@@ -137,16 +82,10 @@ def answer_query(question: str):
     return chat_history
 with gr.Blocks() as demo:
-    gr.Markdown(
-    """
-    # AI Book Agent
-    Ask your Queires regarding NCERT books.
-    """)
     chatbot = gr.Chatbot(label="Chatbot")
     question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
     question.submit(answer_query, inputs=[question], outputs=[chatbot])
-    initialize_chatbot_from_json("embeddings.json", openai_api_key)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import json
 from typing import List, Dict
 from langchain.memory import ConversationBufferMemory
 from langchain_openai import ChatOpenAI
 from langchain.schema import Document
+from langchain.chains import LLMChain
+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+import os
+openai_api_key = os.getenv("OPENAI_API_KEY")
 vectorstore = None
 llm = None
         llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
     retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
+    standalone question without changing the content in given question.
+    Chat History:
+    {chat_history}
+    Follow Up Input: {question}
+    Standalone question:"""
+    condense_question_prompt_template = PromptTemplate.from_template(_template)
+    prompt_template = """You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
+    Context:
+    {context}
+    Question: {question}
+    Helpful Answer:"""
+    qa_prompt = PromptTemplate(
+    template=prompt_template, input_variables=["context", "question"]
+    )
+    question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
+    doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
+    qa_instance = ConversationalRetrievalChain(
+    retriever=retriever,
+    question_generator=question_generator,
+    combine_docs_chain=doc_chain,
+    memory=memory)
 def answer_query(question: str):
     global chat_history
     return chat_history
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(label="Chatbot")
     question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
     question.submit(answer_query, inputs=[question], outputs=[chatbot])
+    initialize_chatbot_from_json("/Users/gaurav/Downloads/embeddings.json", openai_api_key)
 if __name__ == "__main__":
+    demo.launch()