Spaces:
Sleeping
Sleeping
Commit
•
33b8d04
1
Parent(s):
267af0c
Update app.py
Browse files
app.py
CHANGED
@@ -1,83 +1,3 @@
|
|
1 |
-
# import gradio as gr
|
2 |
-
# import fitz # PyMuPDF
|
3 |
-
# import re
|
4 |
-
# from langchain_openai.embeddings import OpenAIEmbeddings
|
5 |
-
# from langchain_chroma import Chroma
|
6 |
-
# from langchain.retrievers.multi_query import MultiQueryRetriever
|
7 |
-
# from langchain.chains import ConversationalRetrievalChain
|
8 |
-
# from langchain.memory import ConversationBufferMemory
|
9 |
-
# from langchain_openai import ChatOpenAI
|
10 |
-
# from langchain_experimental.text_splitter import SemanticChunker
|
11 |
-
|
12 |
-
# import os
|
13 |
-
# openai_api_key = os.getenv("OPENAI_API_KEY")
|
14 |
-
|
15 |
-
# vectorstore = None
|
16 |
-
# llm = None
|
17 |
-
# qa_instance = None
|
18 |
-
# chat_history = [] # Global chat history
|
19 |
-
|
20 |
-
# def extract_text_from_pdf(pdf_bytes):
|
21 |
-
# document = fitz.open("pdf", pdf_bytes)
|
22 |
-
# text = ""
|
23 |
-
# for page_num in range(len(document)):
|
24 |
-
# page = document.load_page(page_num)
|
25 |
-
# text += page.get_text()
|
26 |
-
# document.close()
|
27 |
-
# return text
|
28 |
-
|
29 |
-
# def clean_text(text):
|
30 |
-
# cleaned_text = re.sub(r'\s+', ' ', text)
|
31 |
-
# cleaned_text = re.sub(r'(.)\1{2,}', r'\1', cleaned_text)
|
32 |
-
# cleaned_text = re.sub(r'\b(\w+)\b(?:\s+\1\b)+', r'\1', cleaned_text)
|
33 |
-
# return cleaned_text.strip()
|
34 |
-
|
35 |
-
# def initialize_chatbot(cleaned_text, openai_api_key):
|
36 |
-
# global vectorstore, llm, qa_instance
|
37 |
-
# if vectorstore is None: # Only create embeddings and Chroma once
|
38 |
-
# embeddings = OpenAIEmbeddings(api_key=openai_api_key)
|
39 |
-
# text_splitter = SemanticChunker(embeddings)
|
40 |
-
# docs = text_splitter.create_documents([cleaned_text])
|
41 |
-
# vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings)
|
42 |
-
# if llm is None:
|
43 |
-
# llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
|
44 |
-
# retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
|
45 |
-
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
46 |
-
# qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
|
47 |
-
|
48 |
-
# def setup_qa_system(pdf_file):
|
49 |
-
# global chat_history
|
50 |
-
# if pdf_file is None:
|
51 |
-
# return [("Please upload a PDF file.", "")]
|
52 |
-
# extracted_text = extract_text_from_pdf(pdf_file)
|
53 |
-
# cleaned_text = clean_text(extracted_text)
|
54 |
-
# initialize_chatbot(cleaned_text, openai_api_key)
|
55 |
-
# chat_history = [("Chatbot initialized. Please ask a question.", "")]
|
56 |
-
# return chat_history
|
57 |
-
|
58 |
-
# def answer_query(question):
|
59 |
-
# global chat_history
|
60 |
-
# if qa_instance is None:
|
61 |
-
# return [("Please upload a PDF and initialize the system first.", "")]
|
62 |
-
# if not question.strip():
|
63 |
-
# return [("Please enter a question.", "")]
|
64 |
-
# result = qa_instance({"question": question})
|
65 |
-
# chat_history.append((question, result['answer']))
|
66 |
-
# return chat_history
|
67 |
-
|
68 |
-
# with gr.Blocks() as demo:
|
69 |
-
# upload = gr.File(label="Upload PDF", type="binary", file_types=["pdf"])
|
70 |
-
# chatbot = gr.Chatbot(label="Chatbot")
|
71 |
-
# question = gr.Textbox(label="Ask a question", placeholder="Type your question after uploading PDF...")
|
72 |
-
|
73 |
-
# upload.change(setup_qa_system, inputs=[upload], outputs=[chatbot])
|
74 |
-
# question.submit(answer_query, inputs=[question], outputs=[chatbot])
|
75 |
-
|
76 |
-
# if __name__ == "__main__":
|
77 |
-
# demo.launch()
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
import gradio as gr
|
82 |
import json
|
83 |
from typing import List, Dict
|
@@ -88,8 +8,12 @@ from langchain.chains import ConversationalRetrievalChain
|
|
88 |
from langchain.memory import ConversationBufferMemory
|
89 |
from langchain_openai import ChatOpenAI
|
90 |
from langchain.schema import Document
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
openai_api_key = "
|
93 |
|
94 |
vectorstore = None
|
95 |
llm = None
|
@@ -124,7 +48,28 @@ def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
|
|
124 |
llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
|
125 |
retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
|
126 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
def answer_query(question: str):
|
130 |
global chat_history
|
@@ -137,16 +82,10 @@ def answer_query(question: str):
|
|
137 |
return chat_history
|
138 |
|
139 |
with gr.Blocks() as demo:
|
140 |
-
gr.Markdown(
|
141 |
-
"""
|
142 |
-
# AI Book Agent
|
143 |
-
Ask your Queires regarding NCERT books.
|
144 |
-
""")
|
145 |
chatbot = gr.Chatbot(label="Chatbot")
|
146 |
question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
|
147 |
question.submit(answer_query, inputs=[question], outputs=[chatbot])
|
148 |
-
initialize_chatbot_from_json("embeddings.json", openai_api_key)
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
-
demo.launch()
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import json
|
3 |
from typing import List, Dict
|
|
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain.schema import Document
|
11 |
+
from langchain.chains import LLMChain
|
12 |
+
from langchain.chains.question_answering import load_qa_chain
|
13 |
+
from langchain.prompts import PromptTemplate
|
14 |
+
import os
|
15 |
|
16 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
17 |
|
18 |
vectorstore = None
|
19 |
llm = None
|
|
|
48 |
llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
|
49 |
retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
|
50 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
51 |
+
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
|
52 |
+
standalone question without changing the content in given question.
|
53 |
+
Chat History:
|
54 |
+
{chat_history}
|
55 |
+
Follow Up Input: {question}
|
56 |
+
Standalone question:"""
|
57 |
+
condense_question_prompt_template = PromptTemplate.from_template(_template)
|
58 |
+
prompt_template = """You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
|
59 |
+
Context:
|
60 |
+
{context}
|
61 |
+
Question: {question}
|
62 |
+
Helpful Answer:"""
|
63 |
+
qa_prompt = PromptTemplate(
|
64 |
+
template=prompt_template, input_variables=["context", "question"]
|
65 |
+
)
|
66 |
+
question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
|
67 |
+
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
|
68 |
+
qa_instance = ConversationalRetrievalChain(
|
69 |
+
retriever=retriever,
|
70 |
+
question_generator=question_generator,
|
71 |
+
combine_docs_chain=doc_chain,
|
72 |
+
memory=memory)
|
73 |
|
74 |
def answer_query(question: str):
|
75 |
global chat_history
|
|
|
82 |
return chat_history
|
83 |
|
84 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
85 |
chatbot = gr.Chatbot(label="Chatbot")
|
86 |
question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
|
87 |
question.submit(answer_query, inputs=[question], outputs=[chatbot])
|
88 |
+
initialize_chatbot_from_json("/Users/gaurav/Downloads/embeddings.json", openai_api_key)
|
89 |
|
90 |
if __name__ == "__main__":
|
91 |
+
demo.launch()
|
|