Spaces:

JARVIS-JI
/

IPCBNSPredictorRAG-basedLegalAISystem

Sleeping

App Files Files Community

JARVIS-JI commited on Sep 12

Commit

8e9e93e

verified ·

1 Parent(s): 4ca148c

Create app.py

Browse files

Files changed (1) hide show

app.py +235 -0

app.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import gradio as gr
+from PyPDF2 import PdfReader
+from transformers import AutoTokenizer, AutoModel
+import torch
+import faiss
+import numpy as np
+from groq import Groq
+import os
+# ------------- CONSTANTS ------------------------------------------------------
+LEGAL_BERT_MODEL = "nlpaueb/legal-bert-base-uncased"
+# Multiple legal documents - adjust PDFs here
+DOCS = [
+    ("bns_full.pdf", "Bharatiya Nyaya Sanhita 2023"),
+    ("bns_ipc_mapping.pdf", "BNS-IPC Comparative Mapping"),
+]
+MAX_CHUNK_SIZE = 1000
+OVERLAP = 200
+TOP_K = 5  # Number of chunks to retrieve for context
+LLAMA_MODEL = 'llama-3.3-70b-versatile'
+# Groq API setup
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+groq_client = Groq(api_key=GROQ_API_KEY)
+# ------------- LEGAL-BERT EMBEDDER CLASS ------------------------------------
+class LegalBERTEmbedder:
+    def __init__(self, model_name=LEGAL_BERT_MODEL):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+        self.model.eval()
+    def embed(self, texts):
+        all_embeddings = []
+        with torch.no_grad():
+            for text in texts:
+                inputs = self.tokenizer(text, return_tensors="pt",
+                                      truncation=True, max_length=512).to(self.device)
+                outputs = self.model(**inputs)
+                cls_embed = outputs.last_hidden_state[:, 0, :].cpu().numpy()
+                all_embeddings.append(cls_embed.flatten())
+        return np.vstack(all_embeddings)
+# ------------- PDF PROCESSING FUNCTIONS ------------------------------------
+def extract_text_from_pdf(pdf_path):
+    """Extract text from PDF file"""
+    reader = PdfReader(pdf_path)
+    raw_text = ""
+    for page in reader.pages:
+        text = page.extract_text()
+        if text:
+            raw_text += text + "\n"
+    return raw_text
+def chunk_text(text, max_chunk_size=MAX_CHUNK_SIZE, overlap=OVERLAP):
+    """Split text into overlapping chunks"""
+    chunks = []
+    start = 0
+    length = len(text)
+    while start < length:
+        end = min(start + max_chunk_size, length)
+        chunk = text[start:end]
+        chunks.append(chunk)
+        start += max_chunk_size - overlap
+    return chunks
+# ------------- FAISS INDEX FUNCTIONS ---------------------------------------
+def build_faiss_index(embeddings):
+    """Build FAISS index for similarity search"""
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatIP(dim)  # Inner product for cosine similarity
+    faiss.normalize_L2(embeddings)
+    index.add(embeddings)
+    return index
+def query_faiss(index, query_embed, k=TOP_K):
+    """Query FAISS index for top-k similar chunks"""
+    faiss.normalize_L2(query_embed)
+    distances, indices = index.search(query_embed, k)
+    return distances, indices
+# ------------- LOAD AND PROCESS ALL DOCUMENTS ------------------------------
+print("Loading and processing multiple legal documents...")
+embedder = LegalBERTEmbedder()
+all_chunks = []
+metadata = []  # Store (act_label, original_chunk_text) for reference
+print("Extracting and chunking text from all PDFs...")
+for pdf_path, act_label in DOCS:
+    try:
+        raw_text = extract_text_from_pdf(pdf_path)
+        print(f"Extracted {len(raw_text)} characters from {act_label}")
+        chunks = chunk_text(raw_text)
+        print(f"Created {len(chunks)} chunks from {act_label}")
+        # Prefix each chunk with act label for better context
+        labeled_chunks = [f"[{act_label}] {chunk}" for chunk in chunks]
+        all_chunks.extend(labeled_chunks)
+        metadata.extend([(act_label, chunk) for chunk in chunks])
+    except Exception as e:
+        print(f"Error processing {pdf_path}: {str(e)}")
+        continue
+print(f"Total chunks created: {len(all_chunks)}")
+print("Embedding all text chunks with Legal-BERT...")
+chunk_embeddings = embedder.embed(all_chunks)
+print("Embeddings created successfully")
+print("Building FAISS index...")
+faiss_index = build_faiss_index(chunk_embeddings)
+print("FAISS index built successfully")
+# ------------- PROMPT TEMPLATES -------------------------------------------
+SYSTEM_PROMPT = """You are a senior Indian legal expert specializing in the Bharatiya Nyaya Sanhita 2023 (BNS) and its correspondence with the Indian Penal Code 1860 (IPC).
+When answering any question, you MUST use this exact format:
+CONTEXT/SITUATION:
+[Provide detailed explanation of the legal context and situation]
+BNS SECTIONS:
+[List the specific BNS sections and subsections that apply, with proper citations]
+IPC SECTIONS (if applicable):
+[List the corresponding IPC sections based on mappings, with proper citations]
+SUMMARY:
+[Provide a clear one-sentence summary highlighting the applicable BNS and IPC sections in **bold** format]
+Always cite specific sections when available and ensure your response covers relevant BNS provisions and mapped IPC equivalents."""
+def build_user_prompt(context, question):
+    """Build the user prompt with context and question"""
+    return f"""Based on the following relevant extracts from BNS and IPC legislation:
+{context}
+Question: {question}
+Please provide a comprehensive legal answer following the exact format specified in the system instructions."""
+# ------------- MAIN QUERY FUNCTION ----------------------------------------
+def answer_query(user_query):
+    """Main function to answer user queries"""
+    try:
+        # Embed the user query
+        query_embed = embedder.embed([user_query])
+        # Retrieve top-k similar chunks from FAISS
+        _, indices = query_faiss(faiss_index, query_embed, k=TOP_K)
+        retrieved_chunks = [all_chunks[i] for i in indices[0]]
+        # Prepare context for Llama 3
+        context = "\n\n".join(retrieved_chunks)
+        # Create chat completion using Groq API with Llama 3
+        chat_completion = groq_client.chat.completions.create(
+            messages=[
+                {
+                    "role": "system",
+                    "content": SYSTEM_PROMPT
+                },
+                {
+                    "role": "user",
+                    "content": build_user_prompt(context, user_query)
+                }
+            ],
+            model=LLAMA_MODEL,
+            temperature=0.1,
+            max_tokens=1024
+        )
+        return chat_completion.choices[0].message.content.strip()
+    except Exception as e:
+        return f"Error processing query: {str(e)}\n\nPlease check your Groq API key and internet connection."
+# ------------- GRADIO INTERFACE -------------------------------------------
+with gr.Blocks(title="IPC & BNS Legal Assistant") as demo:
+    gr.Markdown("""
+    # 🏛️ IPC & BNS Legal Assistant
+    **Comprehensive Legal Q&A System covering:**
+    - Bharatiya Nyaya Sanhita 2023 (BNS)
+    - Corresponding Indian Penal Code 1860 (IPC) sections
+    Ask any question about Indian criminal legislation and get structured legal answers with proper citations.
+    """)
+    with gr.Row():
+        with gr.Column():
+            query_input = gr.Textbox(
+                label="💼 Enter your legal query",
+                placeholder="e.g., What are the penalties for murder under BNS? What is the IPC equivalent for theft?",
+                lines=4,
+                max_lines=8
+            )
+            with gr.Row():
+                submit_btn = gr.Button("🔍 Get Legal Answer", variant="primary", scale=2)
+                clear_btn = gr.Button("🗑️ Clear", scale=1)
+    with gr.Row():
+        answer_output = gr.Markdown(
+            label="📋 Legal Analysis",
+            value="*Submit your question to get a structured legal analysis...*"
+        )
+    # Event handlers
+    submit_btn.click(answer_query, inputs=query_input, outputs=answer_output)
+    query_input.submit(answer_query, inputs=query_input, outputs=answer_output)
+    clear_btn.click(lambda: ("", "*Submit your question to get a structured legal analysis...*"),
+                   outputs=[query_input, answer_output])
+    # Add examples
+    gr.Examples(
+        examples=[
+            ["What are the penalties for murder under BNS?"],
+            ["What is the IPC equivalent for BNS Section 103?"],
+            ["What constitutes theft according to BNS legislation?"],
+            ["How are punishments defined for assault in BNS?"],
+            ["What are the legal provisions for robbery under IPC and BNS?"]
+        ],
+        inputs=query_input,
+        outputs=answer_output,
+        fn=answer_query,
+        cache_examples=False
+    )
+# Launch the interface
+if __name__ == "__main__":
+    demo.launch(
+        share=False,
+        debug=True,
+        show_error=True
+    )