Spaces:

Rakib023
/

project2

Runtime error

App Files Files Community

project2 / app.py

Rakib023

Create app.py

45fb094 verified about 2 months ago

raw

history blame contribute delete

5.35 kB

	# app.py
	import streamlit as st
	from rag_pipeline import load_and_process_documents, ask_question

	st.set_page_config(page_title="Bangladesh Law QA", layout="wide")
	st.title("📚 Bangladesh Law RAG QA System")
	st.markdown("Ask legal questions based on the Constitution, ICT Act, Labour Law, and more.")

	# Load and process PDFs
	@st.cache_resource
	def setup():
	pdfs = [
	"./pdfs/Bangladesh-ICT-Act-2006.pdf",
	"./pdfs/Bangladesh-Labour-Act-2006_English-Upto-2018.pdf",
	"./pdfs/bangladesh_rti_act_2009_summary.pdf",
	"./pdfs/bgd-gbv-19-03-law-1860-eng-the-penal-code-1860.pdf",
	"./pdfs/constitution.pdf",
	"./pdfs/gazette.pdf",
	"./pdfs/unicef.pdf",
	]
	return load_and_process_documents(pdfs)

	chunks, retriever, qa_chain = setup()

	query = st.text_input("🔍 Enter your legal question")
	law_options = ["All", "ICT Act", "Labour Act", "Penal Code", "Constitution"]
	law_filter = st.selectbox("📘 Filter by Law (optional)", law_options)
	if law_filter == "All": law_filter = None

	if query:
	with st.spinner("Answering..."):
	answer, sources = ask_question(query, retriever, qa_chain, law_filter)
	st.success(answer)
	with st.expander("📎 Source Documents"):
	for doc in sources:
	st.markdown(f"{doc.metadata.get('law_name', '')} - {doc.metadata.get('section_heading', '')}")
	st.text(doc.page_content[:500])

	# BONUS: Predefined sample questions
	st.markdown("---")
	st.markdown("### 🧪 Try Sample Legal Questions:")
	sample_questions = [
	("What does the Constitution say about freedom of expression?", "Constitution"),
	("Under ICT Act, is cyberbullying a crime?", "ICT Act"),
	("How many hours can a laborer work in a day?", "Labour Act"),
	("What are the punishments under the Digital Security Act for hacking?", "ICT Act"),
	("Is digital evidence allowed in court?", "ICT Act"),
	]

	for q, lf in sample_questions:
	if st.button(f"▶️ {q}"):
	with st.spinner("Running..."):
	answer, sources = ask_question(q, retriever, qa_chain, law_filter=lf)
	st.success(answer)
	with st.expander("📎 Source Documents"):
	for doc in sources:
	st.markdown(f"{doc.metadata.get('law_name', '')} - {doc.metadata.get('section_heading', '')}")
	st.text(doc.page_content[:500])


	# rag_pipeline.py
	import os, re
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.schema import Document
	from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
	from langchain.vectorstores import Chroma
	from langchain.chains import RetrievalQA

	def load_and_process_documents(pdf_paths):
	all_docs = []
	for path in pdf_paths:
	loader = PyPDFLoader(path)
	pages = loader.load()
	for p in pages:
	p.metadata["source"] = os.path.basename(path)
	all_docs.extend(pages)

	# Add metadata
	for doc in all_docs:
	src = doc.metadata.get("source", "").lower()
	if "ict" in src:
	doc.metadata.update({"law_name": "ICT Act", "year": 2006, "law_type": "ICT"})
	elif "labour" in src:
	doc.metadata.update({"law_name": "Labour Act", "year": 2018, "law_type": "Labour"})
	elif "penal" in src:
	doc.metadata.update({"law_name": "Penal Code", "year": 1860, "law_type": "Criminal"})
	elif "constitution" in src:
	doc.metadata.update({"law_name": "Constitution", "year": 1972, "law_type": "Constitutional"})

	# Section splitting
	section_pattern = re.compile(r"(Section\\s\\d+\\.?\\d\|Article\\s\\d+\\.?\\d\|Chapter\\s\\d+\\.?\\d*)", re.IGNORECASE)
	section_chunks = []
	for doc in all_docs:
	text = doc.page_content or ""
	splits = section_pattern.split(text)
	for i in range(1, len(splits), 2):
	heading = splits[i].strip()
	body = splits[i+1].strip() if i+1 < len(splits) else ""
	chunk_text = f"{heading}\n{body}"
	meta = doc.metadata.copy()
	meta.update({"section_heading": heading})
	section_chunks.append(Document(page_content=chunk_text, metadata=meta))

	# Embedding + Vector store
	embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	vectorstore = Chroma.from_documents(section_chunks, embedding=embedding, persist_directory="./chroma_db")
	vectorstore.persist()

	retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
	llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0)
	qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, chain_type="stuff")

	return section_chunks, retriever, qa_chain

	def ask_question(query, retriever, qa_chain, law_filter=None, year_filter=None):
	docs = retriever.get_relevant_documents(query)
	if law_filter:
	docs = [d for d in docs if d.metadata.get("law_name") == law_filter]
	if year_filter:
	docs = [d for d in docs if d.metadata.get("year") == year_filter]

	if not docs:
	return "No relevant information found.", []

	result = qa_chain({"input_documents": docs, "query": query})
	return result["result"], result["source_documents"]