Gaurav-2273 commited on
Commit
33b8d04
1 Parent(s): 267af0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -90
app.py CHANGED
@@ -1,83 +1,3 @@
1
- # import gradio as gr
2
- # import fitz # PyMuPDF
3
- # import re
4
- # from langchain_openai.embeddings import OpenAIEmbeddings
5
- # from langchain_chroma import Chroma
6
- # from langchain.retrievers.multi_query import MultiQueryRetriever
7
- # from langchain.chains import ConversationalRetrievalChain
8
- # from langchain.memory import ConversationBufferMemory
9
- # from langchain_openai import ChatOpenAI
10
- # from langchain_experimental.text_splitter import SemanticChunker
11
-
12
- # import os
13
- # openai_api_key = os.getenv("OPENAI_API_KEY")
14
-
15
- # vectorstore = None
16
- # llm = None
17
- # qa_instance = None
18
- # chat_history = [] # Global chat history
19
-
20
- # def extract_text_from_pdf(pdf_bytes):
21
- # document = fitz.open("pdf", pdf_bytes)
22
- # text = ""
23
- # for page_num in range(len(document)):
24
- # page = document.load_page(page_num)
25
- # text += page.get_text()
26
- # document.close()
27
- # return text
28
-
29
- # def clean_text(text):
30
- # cleaned_text = re.sub(r'\s+', ' ', text)
31
- # cleaned_text = re.sub(r'(.)\1{2,}', r'\1', cleaned_text)
32
- # cleaned_text = re.sub(r'\b(\w+)\b(?:\s+\1\b)+', r'\1', cleaned_text)
33
- # return cleaned_text.strip()
34
-
35
- # def initialize_chatbot(cleaned_text, openai_api_key):
36
- # global vectorstore, llm, qa_instance
37
- # if vectorstore is None: # Only create embeddings and Chroma once
38
- # embeddings = OpenAIEmbeddings(api_key=openai_api_key)
39
- # text_splitter = SemanticChunker(embeddings)
40
- # docs = text_splitter.create_documents([cleaned_text])
41
- # vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings)
42
- # if llm is None:
43
- # llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
44
- # retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
45
- # memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
46
- # qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
47
-
48
- # def setup_qa_system(pdf_file):
49
- # global chat_history
50
- # if pdf_file is None:
51
- # return [("Please upload a PDF file.", "")]
52
- # extracted_text = extract_text_from_pdf(pdf_file)
53
- # cleaned_text = clean_text(extracted_text)
54
- # initialize_chatbot(cleaned_text, openai_api_key)
55
- # chat_history = [("Chatbot initialized. Please ask a question.", "")]
56
- # return chat_history
57
-
58
- # def answer_query(question):
59
- # global chat_history
60
- # if qa_instance is None:
61
- # return [("Please upload a PDF and initialize the system first.", "")]
62
- # if not question.strip():
63
- # return [("Please enter a question.", "")]
64
- # result = qa_instance({"question": question})
65
- # chat_history.append((question, result['answer']))
66
- # return chat_history
67
-
68
- # with gr.Blocks() as demo:
69
- # upload = gr.File(label="Upload PDF", type="binary", file_types=["pdf"])
70
- # chatbot = gr.Chatbot(label="Chatbot")
71
- # question = gr.Textbox(label="Ask a question", placeholder="Type your question after uploading PDF...")
72
-
73
- # upload.change(setup_qa_system, inputs=[upload], outputs=[chatbot])
74
- # question.submit(answer_query, inputs=[question], outputs=[chatbot])
75
-
76
- # if __name__ == "__main__":
77
- # demo.launch()
78
-
79
-
80
-
81
  import gradio as gr
82
  import json
83
  from typing import List, Dict
@@ -88,8 +8,12 @@ from langchain.chains import ConversationalRetrievalChain
88
  from langchain.memory import ConversationBufferMemory
89
  from langchain_openai import ChatOpenAI
90
  from langchain.schema import Document
 
 
 
 
91
 
92
- openai_api_key = "sk-proj-bxh8lX8T6EoQaDWm2cljT3BlbkFJylU5bVGc2eQxB8WCP1Ub"
93
 
94
  vectorstore = None
95
  llm = None
@@ -124,7 +48,28 @@ def initialize_chatbot_from_json(json_file_path: str, openai_api_key: str):
124
  llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
125
  retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
126
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
127
- qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  def answer_query(question: str):
130
  global chat_history
@@ -137,16 +82,10 @@ def answer_query(question: str):
137
  return chat_history
138
 
139
  with gr.Blocks() as demo:
140
- gr.Markdown(
141
- """
142
- # AI Book Agent
143
- Ask your Queires regarding NCERT books.
144
- """)
145
  chatbot = gr.Chatbot(label="Chatbot")
146
  question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
147
  question.submit(answer_query, inputs=[question], outputs=[chatbot])
148
- initialize_chatbot_from_json("embeddings.json", openai_api_key)
149
 
150
  if __name__ == "__main__":
151
- demo.launch()
152
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import json
3
  from typing import List, Dict
 
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain_openai import ChatOpenAI
10
  from langchain.schema import Document
11
+ from langchain.chains import LLMChain
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain.prompts import PromptTemplate
14
+ import os
15
 
16
+ openai_api_key = os.getenv("OPENAI_API_KEY")
17
 
18
  vectorstore = None
19
  llm = None
 
48
  llm = ChatOpenAI(api_key=openai_api_key, temperature=0.5, model="gpt-4o", verbose=True)
49
  retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)
50
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
51
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
52
+ standalone question without changing the content in given question.
53
+ Chat History:
54
+ {chat_history}
55
+ Follow Up Input: {question}
56
+ Standalone question:"""
57
+ condense_question_prompt_template = PromptTemplate.from_template(_template)
58
+ prompt_template = """You are a highly informative and helpful QA System specialized in providing information related to the UPSC Exam but strictly within the 'Context'. Ensure you only answer questions that are relevant to the UPSC Exam. If the question asked is not in 'Context' and not related to the UPSC Exam, do not provide an answer. Always answer in an informative and highly detailed manner, oriented towards the UPSC Exam. Also never just answer the Query, Never tell anything about 'Context'. Dont use unnecessary lines!
59
+ Context:
60
+ {context}
61
+ Question: {question}
62
+ Helpful Answer:"""
63
+ qa_prompt = PromptTemplate(
64
+ template=prompt_template, input_variables=["context", "question"]
65
+ )
66
+ question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
67
+ doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
68
+ qa_instance = ConversationalRetrievalChain(
69
+ retriever=retriever,
70
+ question_generator=question_generator,
71
+ combine_docs_chain=doc_chain,
72
+ memory=memory)
73
 
74
  def answer_query(question: str):
75
  global chat_history
 
82
  return chat_history
83
 
84
  with gr.Blocks() as demo:
 
 
 
 
 
85
  chatbot = gr.Chatbot(label="Chatbot")
86
  question = gr.Textbox(label="Ask a question", placeholder="Type your question...")
87
  question.submit(answer_query, inputs=[question], outputs=[chatbot])
88
+ initialize_chatbot_from_json("/Users/gaurav/Downloads/embeddings.json", openai_api_key)
89
 
90
  if __name__ == "__main__":
91
+ demo.launch()