Spaces:
Running
Running
Commit
•
f26ae96
1
Parent(s):
9f85a45
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import gradio as gr
|
2 |
-
from huggingface_hub import InferenceClient
|
3 |
import fitz # PyMuPDF
|
4 |
import re
|
5 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
@@ -15,7 +14,7 @@ openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
15 |
vectorstore = None
|
16 |
llm = None
|
17 |
qa_instance = None
|
18 |
-
chat_history = []
|
19 |
|
20 |
def extract_text_from_pdf(pdf_bytes):
|
21 |
document = fitz.open("pdf", pdf_bytes)
|
@@ -32,9 +31,9 @@ def clean_text(text):
|
|
32 |
cleaned_text = re.sub(r'\b(\w+)\b(?:\s+\1\b)+', r'\1', cleaned_text)
|
33 |
return cleaned_text.strip()
|
34 |
|
35 |
-
def initialize_chatbot(cleaned_text):
|
36 |
global vectorstore, llm, qa_instance
|
37 |
-
if vectorstore is None:
|
38 |
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
|
39 |
text_splitter = SemanticChunker(embeddings)
|
40 |
docs = text_splitter.create_documents([cleaned_text])
|
@@ -46,15 +45,17 @@ def initialize_chatbot(cleaned_text):
|
|
46 |
qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
|
47 |
|
48 |
def setup_qa_system(pdf_file):
|
|
|
49 |
if pdf_file is None:
|
50 |
return [("Please upload a PDF file.", "")]
|
51 |
extracted_text = extract_text_from_pdf(pdf_file)
|
52 |
cleaned_text = clean_text(extracted_text)
|
53 |
-
initialize_chatbot(cleaned_text)
|
54 |
chat_history = [("Chatbot initialized. Please ask a question.", "")]
|
55 |
return chat_history
|
56 |
|
57 |
def answer_query(question):
|
|
|
58 |
if qa_instance is None:
|
59 |
return [("Please upload a PDF and initialize the system first.", "")]
|
60 |
if not question.strip():
|
@@ -71,4 +72,5 @@ with gr.Blocks() as demo:
|
|
71 |
upload.change(setup_qa_system, inputs=[upload], outputs=[chatbot])
|
72 |
question.submit(answer_query, inputs=[question], outputs=[chatbot])
|
73 |
|
74 |
-
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import fitz # PyMuPDF
|
3 |
import re
|
4 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
|
|
14 |
vectorstore = None
|
15 |
llm = None
|
16 |
qa_instance = None
|
17 |
+
chat_history = [] # Global chat history
|
18 |
|
19 |
def extract_text_from_pdf(pdf_bytes):
|
20 |
document = fitz.open("pdf", pdf_bytes)
|
|
|
31 |
cleaned_text = re.sub(r'\b(\w+)\b(?:\s+\1\b)+', r'\1', cleaned_text)
|
32 |
return cleaned_text.strip()
|
33 |
|
34 |
+
def initialize_chatbot(cleaned_text, openai_api_key):
|
35 |
global vectorstore, llm, qa_instance
|
36 |
+
if vectorstore is None: # Only create embeddings and Chroma once
|
37 |
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
|
38 |
text_splitter = SemanticChunker(embeddings)
|
39 |
docs = text_splitter.create_documents([cleaned_text])
|
|
|
45 |
qa_instance = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
|
46 |
|
47 |
def setup_qa_system(pdf_file):
|
48 |
+
global chat_history
|
49 |
if pdf_file is None:
|
50 |
return [("Please upload a PDF file.", "")]
|
51 |
extracted_text = extract_text_from_pdf(pdf_file)
|
52 |
cleaned_text = clean_text(extracted_text)
|
53 |
+
initialize_chatbot(cleaned_text, openai_api_key)
|
54 |
chat_history = [("Chatbot initialized. Please ask a question.", "")]
|
55 |
return chat_history
|
56 |
|
57 |
def answer_query(question):
|
58 |
+
global chat_history
|
59 |
if qa_instance is None:
|
60 |
return [("Please upload a PDF and initialize the system first.", "")]
|
61 |
if not question.strip():
|
|
|
72 |
upload.change(setup_qa_system, inputs=[upload], outputs=[chatbot])
|
73 |
question.submit(answer_query, inputs=[question], outputs=[chatbot])
|
74 |
|
75 |
+
if __name__ == "__main__":
|
76 |
+
demo.launch()
|