DrishtiSharma commited on
Commit
a620e89
Β·
verified Β·
1 Parent(s): bba0424

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -39
app.py CHANGED
@@ -28,16 +28,15 @@ if "vector_store" not in st.session_state:
28
  st.session_state.vector_store = None
29
  if "documents" not in st.session_state:
30
  st.session_state.documents = None
31
- if "processed" not in st.session_state:
32
- st.session_state.processed = False # Prevent redundant processing
 
 
 
 
33
 
34
- # Step 1: Choose PDF Source (Horizontal radio buttons)
35
- pdf_source = st.radio(
36
- "Upload or provide a link to a PDF:",
37
- ["Upload a PDF file", "Enter a PDF URL"],
38
- index=0,
39
- horizontal=True
40
- )
41
 
42
  pdf_path = None
43
  if pdf_source == "Upload a PDF file":
@@ -47,7 +46,9 @@ if pdf_source == "Upload a PDF file":
47
  with open(pdf_path, "wb") as f:
48
  f.write(uploaded_file.getbuffer())
49
  st.success("βœ… PDF Uploaded Successfully!")
50
- st.session_state.processed = False # Reset processing
 
 
51
 
52
  elif pdf_source == "Enter a PDF URL":
53
  pdf_url = st.text_input("Enter PDF URL:")
@@ -60,44 +61,50 @@ elif pdf_source == "Enter a PDF URL":
60
  with open(pdf_path, "wb") as f:
61
  f.write(response.content)
62
  st.success("βœ… PDF Downloaded Successfully!")
63
- st.session_state.processed = False # Reset processing
 
 
64
  else:
65
  st.error("❌ Failed to download PDF. Check the URL.")
66
  except Exception as e:
67
  st.error(f"Error downloading PDF: {e}")
68
 
69
-
70
- # Step 2: Process PDF and Create Vector Store (Only if Not Processed)
71
- if pdf_path and not st.session_state.processed:
72
- with st.spinner("Loading and processing PDF..."):
73
  loader = PDFPlumberLoader(pdf_path)
74
  docs = loader.load()
 
 
75
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
76
 
77
- # Step 3: Chunking
78
- with st.spinner("Chunking the document..."):
79
- model_name = "nomic-ai/modernbert-embed-base"
80
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
81
- text_splitter = SemanticChunker(embedding_model)
82
- documents = text_splitter.split_documents(docs)
83
- st.session_state.documents = documents
84
- st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
85
-
86
- # Step 4: Setup Vectorstore
87
- with st.spinner("Creating vector store..."):
88
- vector_store = Chroma(
89
- collection_name="deepseek_collection",
90
- collection_metadata={"hnsw:space": "cosine"},
91
- embedding_function=embedding_model
92
- )
93
- vector_store.add_documents(documents)
94
- num_documents = len(vector_store.get()["documents"])
95
- st.session_state.vector_store = vector_store # Store in session state
96
- st.session_state.processed = True # Mark as processed
97
- st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
98
-
99
- # Step 5: Query Input (Only allow if vector store exists)
100
- if st.session_state.vector_store:
 
 
 
101
  query = st.text_input("πŸ” Enter a Query:")
102
  if query:
103
  with st.spinner("Retrieving relevant contexts..."):
@@ -145,5 +152,14 @@ if st.session_state.vector_store:
145
  st.subheader("πŸŸ₯ RAG Final Response")
146
  st.success(final_response['final_response'])
147
 
 
 
 
 
 
 
 
 
 
148
  else:
149
  st.warning("πŸ“„ Please upload or provide a PDF URL first.")
 
28
  st.session_state.vector_store = None
29
  if "documents" not in st.session_state:
30
  st.session_state.documents = None
31
+ if "pdf_loaded" not in st.session_state:
32
+ st.session_state.pdf_loaded = False
33
+ if "chunked" not in st.session_state:
34
+ st.session_state.chunked = False
35
+ if "vector_created" not in st.session_state:
36
+ st.session_state.vector_created = False
37
 
38
+ # Step 1: Choose PDF Source
39
+ pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
 
 
 
 
 
40
 
41
  pdf_path = None
42
  if pdf_source == "Upload a PDF file":
 
46
  with open(pdf_path, "wb") as f:
47
  f.write(uploaded_file.getbuffer())
48
  st.success("βœ… PDF Uploaded Successfully!")
49
+ st.session_state.pdf_loaded = False
50
+ st.session_state.chunked = False
51
+ st.session_state.vector_created = False
52
 
53
  elif pdf_source == "Enter a PDF URL":
54
  pdf_url = st.text_input("Enter PDF URL:")
 
61
  with open(pdf_path, "wb") as f:
62
  f.write(response.content)
63
  st.success("βœ… PDF Downloaded Successfully!")
64
+ st.session_state.pdf_loaded = False
65
+ st.session_state.chunked = False
66
+ st.session_state.vector_created = False
67
  else:
68
  st.error("❌ Failed to download PDF. Check the URL.")
69
  except Exception as e:
70
  st.error(f"Error downloading PDF: {e}")
71
 
72
+ # Step 2: Process PDF
73
+ if pdf_path and not st.session_state.pdf_loaded:
74
+ with st.spinner("Loading PDF..."):
 
75
  loader = PDFPlumberLoader(pdf_path)
76
  docs = loader.load()
77
+ st.session_state.documents = docs
78
+ st.session_state.pdf_loaded = True
79
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
80
 
81
+ # Step 3: Chunking (Only if Not Already Done)
82
+ if st.session_state.pdf_loaded and not st.session_state.chunked:
83
+ with st.spinner("Chunking the document..."):
84
+ model_name = "nomic-ai/modernbert-embed-base"
85
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
86
+ text_splitter = SemanticChunker(embedding_model)
87
+ documents = text_splitter.split_documents(st.session_state.documents)
88
+ st.session_state.documents = documents
89
+ st.session_state.chunked = True
90
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
91
+
92
+ # Step 4: Setup Vectorstore
93
+ if st.session_state.chunked and not st.session_state.vector_created:
94
+ with st.spinner("Creating vector store..."):
95
+ vector_store = Chroma(
96
+ collection_name="deepseek_collection",
97
+ collection_metadata={"hnsw:space": "cosine"},
98
+ embedding_function=embedding_model
99
+ )
100
+ vector_store.add_documents(st.session_state.documents)
101
+ num_documents = len(vector_store.get()["documents"])
102
+ st.session_state.vector_store = vector_store
103
+ st.session_state.vector_created = True
104
+ st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
105
+
106
+ # Step 5: Query Input
107
+ if st.session_state.vector_created:
108
  query = st.text_input("πŸ” Enter a Query:")
109
  if query:
110
  with st.spinner("Retrieving relevant contexts..."):
 
152
  st.subheader("πŸŸ₯ RAG Final Response")
153
  st.success(final_response['final_response'])
154
 
155
+ # Final + Intermediate Outputs
156
+ st.subheader("πŸ” **Full Workflow Breakdown:**")
157
+ st.json({
158
+ "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
159
+ "Relevant Contexts": relevant_response["context_number"],
160
+ "Extracted Contexts": final_contexts["relevant_contexts"],
161
+ "Final Answer": final_response["final_response"]
162
+ })
163
+
164
  else:
165
  st.warning("πŸ“„ Please upload or provide a PDF URL first.")