Spaces:

amalsp
/

indian-law-qa-app

Sleeping

App Files Files Community

amalsp commited on Oct 31

Commit

4241674

verified ·

1 Parent(s): 5bf4b6c

Add semantic search with sentence-transformers and improved UI

Browse files

Files changed (1) hide show

app.py +145 -62

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 from datasets import load_dataset
 import pandas as pd
 import random
 # Load the Indian Law dataset
 print("Loading Indian Law Dataset...")
@@ -9,49 +11,60 @@ ds = load_dataset("viber1/indian-law-dataset")
 # Convert to pandas for easier manipulation
 df = pd.DataFrame(ds['train'])
 print(f"Dataset loaded successfully with {len(df)} entries")
 print(f"Dataset columns: {df.columns.tolist()}")
-# Preview first few entries
-print("\nFirst 3 entries:")
-for i in range(min(3, len(df))):
-    print(f"\nEntry {i+1}:")
-    for col in df.columns:
-        print(f"  {col}: {df.iloc[i][col][:100] if isinstance(df.iloc[i][col], str) else df.iloc[i][col]}...")
 def search_legal_info(question):
-    """Search the dataset for relevant legal information based on user question"""
     if not question or len(question.strip()) == 0:
-        return "Please enter a legal question."
-    question_lower = question.lower()
-    results = []
-    # Search through the dataset
-    for idx, row in df.iterrows():
-        # Check all text columns for matches
-        for col in df.columns:
-            if isinstance(row[col], str) and any(word in row[col].lower() for word in question_lower.split()):
-                results.append(row.to_dict())
-                break
-        if len(results) >= 5:  # Limit to top 5 results
-            break
-    if not results:
-        return "No relevant information found in the dataset. Try rephrasing your question or use different keywords."
-    # Format the response
-    response = "📋 **Legal Information Found:**\n\n"
-    for i, result in enumerate(results, 1):
-        response += f"**Result {i}:**\n"
         for key, value in result.items():
-            if value and isinstance(value, str):
-                # Truncate long text
-                display_value = value[:500] + "..." if len(value) > 500 else value
-                response += f"- **{key}**: {display_value}\n"
-        response += "\n---\n\n"
     return response
@@ -60,62 +73,132 @@ def get_random_sample():
     random_idx = random.randint(0, len(df) - 1)
     sample = df.iloc[random_idx]
-    response = "📝 **Random Dataset Entry:**\n\n"
     for key, value in sample.items():
-        if value and isinstance(value, str):
-            display_value = value[:500] + "..." if len(value) > 500 else value
-            response += f"**{key}**: {display_value}\n\n"
     return response
-# Create Gradio interface
-with gr.Blocks(title="Indian Law Q&A Assistant") as demo:
     gr.Markdown("""
     # 🏛️ Indian Law Q&A Assistant
-    Welcome to the Indian Law Question-Answer Assistant powered by the `viber1/indian-law-dataset`.
-    ### How to use:
-    1. Enter your legal question in the text box below
-    2. Click "Search" to find relevant information from the dataset
-    3. Or click "Random Sample" to explore a random entry from the dataset
     ---
-    ⚠️ **DISCLAIMER**: This application is for **informational purposes only**. The information provided
-    is based on a dataset and should NOT be considered as legal advice. Always consult with a qualified
-    legal professional for specific legal matters and guidance.
-    ---
     """)
     with gr.Row():
-        with gr.Column():
             question_input = gr.Textbox(
-                label="Your Legal Question",
-                placeholder="E.g., What are the provisions related to property rights?",
                 lines=3
             )
             with gr.Row():
-                search_btn = gr.Button("🔍 Search", variant="primary")
-                random_btn = gr.Button("🎲 Random Sample")
-    output_box = gr.Markdown(label="Response")
     # Button actions
     search_btn.click(fn=search_legal_info, inputs=question_input, outputs=output_box)
     random_btn.click(fn=get_random_sample, inputs=None, outputs=output_box)
     gr.Markdown("""
     ---
-    ### Dataset Information:
-    - **Dataset**: viber1/indian-law-dataset
     - **Total Entries**: """ + str(len(df)) + """
-    - **Columns**: """ + ", ".join(df.columns.tolist()) + """
-    *Built with 💙 using Gradio and Hugging Face Datasets*
     """)
 if __name__ == "__main__":

 from datasets import load_dataset
 import pandas as pd
 import random
+from sentence_transformers import SentenceTransformer, util
+import torch
 # Load the Indian Law dataset
 print("Loading Indian Law Dataset...")
 # Convert to pandas for easier manipulation
 df = pd.DataFrame(ds['train'])
 print(f"Dataset loaded successfully with {len(df)} entries")
 print(f"Dataset columns: {df.columns.tolist()}")
+# Load semantic search model
+print("Loading sentence-transformers model for semantic search...")
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+# Precompute embeddings for the dataset
+print("Computing embeddings for dataset...")
+df['combined_text'] = df.apply(lambda row: ' '.join([str(val) for val in row.values if pd.notna(val) and isinstance(val, str)]), axis=1)
+corpus_embeddings = model.encode(df['combined_text'].tolist(), convert_to_tensor=True, show_progress_bar=True)
+print("Embeddings computed successfully!")
 def search_legal_info(question):
+    """Search the dataset for relevant legal information using semantic search"""
     if not question or len(question.strip()) == 0:
+        return "⚠️ Please enter a legal question to search."
+    # Encode the query
+    query_embedding = model.encode(question, convert_to_tensor=True)
+    # Compute cosine similarity scores
+    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
+    # Get top 5 results
+    top_results = torch.topk(cos_scores, k=min(5, len(df)))
+    # Format the response with best match first
+    response = "# 🔍 Search Results\n\n"
+    for i, (score, idx) in enumerate(zip(top_results.values, top_results.indices)):
+        result = df.iloc[idx.item()].to_dict()
+        similarity = score.item()
+        # Skip low relevance results
+        if similarity < 0.2:
+            continue
+        response += f"## 📌 Result {i+1} (Relevance: {similarity*100:.1f}%)\n\n"
         for key, value in result.items():
+            if key == 'combined_text':  # Skip internal field
+                continue
+            if value and isinstance(value, str) and len(value.strip()) > 0:
+                # Clean and format the text
+                display_value = value.strip()
+                if len(display_value) > 800:
+                    display_value = display_value[:800] + "..."
+                response += f"**{key.replace('_', ' ').title()}:**\n\n{display_value}\n\n"
+        response += "---\n\n"
+    if "Result 1" not in response:
+        return "❌ No relevant information found in the dataset. Please try rephrasing your question or use different keywords."
     return response
     random_idx = random.randint(0, len(df) - 1)
     sample = df.iloc[random_idx]
+    response = "# 📝 Random Legal Information\n\n"
     for key, value in sample.items():
+        if key == 'combined_text':  # Skip internal field
+            continue
+        if value and isinstance(value, str) and len(value.strip()) > 0:
+            display_value = value.strip()
+            if len(display_value) > 800:
+                display_value = display_value[:800] + "..."
+            response += f"**{key.replace('_', ' ').title()}:**\n\n{display_value}\n\n"
     return response
+def handle_feedback(question, feedback_type):
+    """Handle user feedback"""
+    return f"✅ Thank you for your {feedback_type}! Your input helps us improve the system."
+# Example questions
+EXAMPLE_QUESTIONS = [
+    "Can a plaint be amended after it has been filed in a civil case in India?",
+    "What are the provisions for bail under Indian law?",
+    "What are the rights of an accused person in India?",
+    "How can property rights be transferred in India?",
+    "What is the procedure for filing a divorce petition?",
+    "What are the provisions related to consumer protection?",
+    "What are the penalties for copyright infringement in India?",
+]
+# Create Gradio interface with improved UI
+with gr.Blocks(title="Indian Law Q&A Assistant", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🏛️ Indian Law Q&A Assistant
+    ### ⚠️ IMPORTANT DISCLAIMER
+    **This application is for informational purposes only and does NOT constitute legal advice.**
+    The information provided is based on a dataset and should not be relied upon for legal decisions.
+    Always consult with a qualified legal professional for specific legal matters.
     ---
+    Welcome to the Indian Law Question-Answer Assistant powered by semantic search technology
+    and the `viber1/indian-law-dataset`. Ask questions and get relevant legal information instantly!
     """)
     with gr.Row():
+        with gr.Column(scale=2):
             question_input = gr.Textbox(
+                label="💬 Your Legal Question",
+                placeholder="Type your legal question here...",
                 lines=3
             )
             with gr.Row():
+                search_btn = gr.Button("🔍 Search", variant="primary", size="lg")
+                random_btn = gr.Button("🎲 Random Sample", size="lg")
+            gr.Markdown("### 📋 Example Questions (Click to use):")
+            with gr.Row():
+                example_btns = []
+                for example in EXAMPLE_QUESTIONS[:4]:
+                    btn = gr.Button(example, size="sm")
+                    example_btns.append(btn)
+            with gr.Row():
+                for example in EXAMPLE_QUESTIONS[4:]:
+                    btn = gr.Button(example, size="sm")
+                    example_btns.append(btn)
+    output_box = gr.Markdown(label="📄 Response", value="Enter a question above and click Search to begin.")
+    with gr.Row():
+        gr.Markdown("""
+        ### 📢 Feedback
+        Found this helpful? Have suggestions? Click below:
+        """)
+    with gr.Row():
+        helpful_btn = gr.Button("👍 Helpful", size="sm")
+        report_btn = gr.Button("📝 Report Issue", size="sm")
+    feedback_output = gr.Markdown(visible=False)
     # Button actions
     search_btn.click(fn=search_legal_info, inputs=question_input, outputs=output_box)
     random_btn.click(fn=get_random_sample, inputs=None, outputs=output_box)
+    # Example button actions
+    for i, btn in enumerate(example_btns):
+        btn.click(
+            fn=lambda ex=EXAMPLE_QUESTIONS[i]: ex,
+            inputs=None,
+            outputs=question_input
+        )
+    # Feedback actions
+    helpful_btn.click(
+        fn=lambda q: handle_feedback(q, "positive feedback"),
+        inputs=question_input,
+        outputs=feedback_output
+    ).then(lambda: gr.update(visible=True), outputs=feedback_output)
+    report_btn.click(
+        fn=lambda q: handle_feedback(q, "report"),
+        inputs=question_input,
+        outputs=feedback_output
+    ).then(lambda: gr.update(visible=True), outputs=feedback_output)
     gr.Markdown("""
     ---
+    ### 📊 Dataset Information
+    - **Source**: viber1/indian-law-dataset on Hugging Face
     - **Total Entries**: """ + str(len(df)) + """
+    - **Search Method**: Semantic search using sentence-transformers
+    - **Model**: sentence-transformers/all-MiniLM-L6-v2
+    ### 🔧 Features
+    - ✅ Semantic search for better relevance
+    - ✅ Results ranked by similarity score
+    - ✅ Clean, readable Markdown formatting
+    - ✅ Example questions for quick start
+    - ✅ Random exploration of dataset
+    - ✅ User feedback mechanism
+    *Built with ❤️ using Gradio, Hugging Face Datasets, and Sentence Transformers*
     """)
 if __name__ == "__main__":