fahmiaziz98 commited on
Commit
986437f
·
1 Parent(s): 31a1fee
app.py CHANGED
@@ -6,18 +6,16 @@ from src.tools_retrieval.retriever import RetrieverManager
6
  from src.workflow import RAGWorkflow
7
  from src.utils import (
8
  logger,
9
- convert_document_to_markdown,
10
- save_to_markdown,
11
  determine_top_k,
12
  determine_reranking_top_n
13
  )
14
-
15
-
16
  UPLOAD_FOLDER = "uploads/"
17
  PERSIST_DIRECTORY = "./chroma_db"
18
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
  os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
20
 
 
21
  if "messages" not in st.session_state:
22
  st.session_state.messages = []
23
  if "retriever" not in st.session_state:
@@ -27,7 +25,6 @@ if "vector_store" not in st.session_state:
27
  if "workflow" not in st.session_state:
28
  st.session_state.workflow = None
29
 
30
-
31
  st.set_page_config(
32
  page_title="RAG Chatbot",
33
  layout="wide",
@@ -35,76 +32,74 @@ st.set_page_config(
35
  )
36
  st.title("Agentic RAG Chatbot")
37
 
 
 
 
 
 
 
38
  with st.sidebar:
39
  st.header("Upload")
40
  uploaded_file = st.file_uploader("Upload Document", type=["pdf", "xlsx", "docx", "txt"])
41
  process_button = st.button("Process Document")
42
 
43
- if uploaded_file and process_button:
44
- with st.spinner("Processing Document..."):
45
- file_path = os.path.join(UPLOAD_FOLDER, uploaded_file.name)
46
- with open(file_path, "wb") as f:
47
- f.write(uploaded_file.getbuffer())
48
-
49
- doc_processor = DocumentProcessor()
50
- chunks = doc_processor.load_and_split_pdf(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- vector_store_manager = VectorStoreManager()
53
- vector_store = vector_store_manager.index_documents(chunks)
54
-
55
- st.session_state.vector_store = vector_store
56
- st.success("Document processed and indexed successfully!")
57
- top_k = determine_top_k(len(chunks))
58
- top_n = determine_reranking_top_n(top_k)
59
-
60
- retriever_manager = RetrieverManager(vector_store)
61
- retriever_tool = retriever_manager.create_retriever(
62
- documents=chunks,
63
- top_n=top_n,
64
- k=top_k
65
- )
66
- st.session_state.retriever = retriever_tool
67
- st.success("Retriever tool created successfully!")
68
- rag_workflow = RAGWorkflow(retriever_tool)
69
- workflow = rag_workflow.compile()
70
- st.session_state.workflow = workflow
71
-
72
-
73
 
74
- # Display chat messages
75
  for message in st.session_state.messages:
76
  with st.chat_message(message["role"]):
77
  st.markdown(message["content"])
78
 
79
  if prompt := st.chat_input("Ask a question about your document"):
80
- # Add user message to chat history
81
  st.session_state.messages.append({"role": "user", "content": prompt})
82
  with st.chat_message("user"):
83
- st.markdown(prompt)
84
-
85
- # Generate response
86
  with st.chat_message("assistant"):
87
- if st.session_state.retriever is None:
88
- final_response = "Please upload a PDF document first."
89
  else:
90
- with st.spinner("Thinking..."):
91
- # Retrieve relevant documents
92
- inputs = {
93
- "messages": [
94
- ("user", prompt),
95
- ]
96
- }
97
-
98
- # Generate response using workflow
99
- if st.session_state.workflow is not None:
100
  response = st.session_state.workflow.invoke(inputs)
101
  final_response = response["messages"][-1].content
102
- else:
103
- final_response = "Please upload a PDF document first."
104
-
 
105
  st.markdown(final_response)
106
  st.session_state.messages.append({"role": "assistant", "content": final_response})
107
 
108
- # Add clear chat button
109
  if st.sidebar.button("Clear Chat"):
110
- st.session_state.messages = []
 
6
  from src.workflow import RAGWorkflow
7
  from src.utils import (
8
  logger,
 
 
9
  determine_top_k,
10
  determine_reranking_top_n
11
  )
12
+
 
13
  UPLOAD_FOLDER = "uploads/"
14
  PERSIST_DIRECTORY = "./chroma_db"
15
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
16
  os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
17
 
18
+ # Initialize session state
19
  if "messages" not in st.session_state:
20
  st.session_state.messages = []
21
  if "retriever" not in st.session_state:
 
25
  if "workflow" not in st.session_state:
26
  st.session_state.workflow = None
27
 
 
28
  st.set_page_config(
29
  page_title="RAG Chatbot",
30
  layout="wide",
 
32
  )
33
  st.title("Agentic RAG Chatbot")
34
 
35
+ def process_document_upload(file_obj):
36
+ file_path = os.path.join(UPLOAD_FOLDER, file_obj.name)
37
+ with open(file_path, "wb") as f:
38
+ f.write(file_obj.getbuffer())
39
+ return file_path
40
+
41
  with st.sidebar:
42
  st.header("Upload")
43
  uploaded_file = st.file_uploader("Upload Document", type=["pdf", "xlsx", "docx", "txt"])
44
  process_button = st.button("Process Document")
45
 
46
+ if uploaded_file and process_button:
47
+ with st.spinner("Processing Document..."):
48
+ try:
49
+ file_path = process_document_upload(uploaded_file)
50
+
51
+ doc_processor = DocumentProcessor()
52
+ chunks = doc_processor.load_and_split_pdf(file_path)
53
+
54
+ vector_store_manager = VectorStoreManager()
55
+ vector_store = vector_store_manager.index_documents(chunks)
56
+ st.session_state.vector_store = vector_store
57
+ st.success("Document processed and indexed successfully!")
58
+
59
+ top_k = determine_top_k(len(chunks))
60
+ top_n = determine_reranking_top_n(top_k)
61
+
62
+ retriever_manager = RetrieverManager(vector_store)
63
+ retriever_tool = retriever_manager.create_retriever(
64
+ documents=chunks,
65
+ top_n=top_n,
66
+ k=top_k
67
+ )
68
+ st.session_state.retriever = retriever_tool
69
+ st.success("Retriever tool created successfully!")
70
+
71
+ rag_workflow = RAGWorkflow(retriever_tool)
72
+ workflow = rag_workflow.compile()
73
+ st.session_state.workflow = workflow
74
+ except Exception as e:
75
+ logger.error(f"Error processing document: {e}")
76
+ st.error(f"Error processing document: {e}")
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
 
79
  for message in st.session_state.messages:
80
  with st.chat_message(message["role"]):
81
  st.markdown(message["content"])
82
 
83
  if prompt := st.chat_input("Ask a question about your document"):
 
84
  st.session_state.messages.append({"role": "user", "content": prompt})
85
  with st.chat_message("user"):
86
+ st.markdown(prompt)
87
+
 
88
  with st.chat_message("assistant"):
89
+ if st.session_state.workflow is None:
90
+ final_response = "Please upload a document first."
91
  else:
92
+ try:
93
+ with st.spinner("Thinking..."):
94
+ inputs = {"messages": [("user", prompt)]}
 
 
 
 
 
 
 
95
  response = st.session_state.workflow.invoke(inputs)
96
  final_response = response["messages"][-1].content
97
+ except Exception as e:
98
+ logger.error(f"Error invoking workflow: {e}")
99
+ final_response = f"An error occurred while processing your request: {e}"
100
+
101
  st.markdown(final_response)
102
  st.session_state.messages.append({"role": "assistant", "content": final_response})
103
 
 
104
  if st.sidebar.button("Clear Chat"):
105
+ st.session_state.messages = []
src/llm/llm_interface.py CHANGED
@@ -5,6 +5,6 @@ llm_groq = ChatGroq(
5
  model="llama3-8b-8192",
6
  temperature=0.1,
7
  api_key=os.getenv("GROQ_API_KEY"),
8
- # max_retries=3,
9
- # streaming=True,
10
  )
 
5
  model="llama3-8b-8192",
6
  temperature=0.1,
7
  api_key=os.getenv("GROQ_API_KEY"),
8
+ max_retries=3,
9
+ streaming=True,
10
  )
src/tools_retrieval/retriever.py CHANGED
@@ -45,8 +45,9 @@ class RetrieverManager:
45
  def create_retriever(self, documents, top_n: int, k: int = 3, ):
46
  base_retriever = self.create_ensemble_retriever(texts=documents, k=k)
47
  compression_retriever = self.create_compression_retriever(base_retriever=base_retriever, top_n=top_n)
48
- return create_retriever_tool(
49
  compression_retriever,
50
  "retrieve_docs",
51
  "use tools for search through the user's provided documents and return relevant information about user query.",
52
- )
 
 
45
  def create_retriever(self, documents, top_n: int, k: int = 3, ):
46
  base_retriever = self.create_ensemble_retriever(texts=documents, k=k)
47
  compression_retriever = self.create_compression_retriever(base_retriever=base_retriever, top_n=top_n)
48
+ retriever_tool = create_retriever_tool(
49
  compression_retriever,
50
  "retrieve_docs",
51
  "use tools for search through the user's provided documents and return relevant information about user query.",
52
+ )
53
+ return retriever_tool
src/workflow.py CHANGED
@@ -9,13 +9,12 @@ from langgraph.graph import END, StateGraph, START
9
  from langgraph.prebuilt import ToolNode, tools_condition
10
  from .state import AgentState
11
  from src.llm.llm_interface import llm_groq
12
-
13
 
14
  class GradeDocs(BaseModel):
15
  binary_score: str = Field(description="Relevance score 'yes' or 'no'")
16
 
17
 
18
-
19
  class RAGWorkflow:
20
  def __init__(self, retriever_tool):
21
  self.workflow = StateGraph(AgentState)
 
9
  from langgraph.prebuilt import ToolNode, tools_condition
10
  from .state import AgentState
11
  from src.llm.llm_interface import llm_groq
12
+
13
 
14
  class GradeDocs(BaseModel):
15
  binary_score: str = Field(description="Relevance score 'yes' or 'no'")
16
 
17
 
 
18
  class RAGWorkflow:
19
  def __init__(self, retriever_tool):
20
  self.workflow = StateGraph(AgentState)