Spaces:

JAYASREESS
/

final_year

Running

App Files Files Community

jayasrees commited on Feb 28

Commit

9d21edd

1 Parent(s): d60dbcf

first commit

Browse files

Files changed (35) hide show

.gitignore +58 -0
analysis/common_analyzer.py +288 -0
analysis/consistency_check.py +4 -0
analysis/contradiction_check.py +23 -0
analysis/duplication_check.py +2 -0
analysis/llama_legal_verifier.py +93 -0
analysis/nli_validator.py +20 -0
analysis/nli_verifier.py +56 -0
analysis/similarity_search.py +3 -0
auth/user_store.py +90 -0
backend/README.md +22 -0
backend/app.py +717 -0
backend/requirements.txt +5 -0
domain_rules/belongings_check.py +7 -0
domain_rules/belongings_keywords.py +5 -0
domain_rules/legal_rules.py +0 -0
embeddings/sbert_encoder.py +25 -0
frontend/README.md +36 -0
frontend/app.js +509 -0
frontend/assets/legal-tech-bg.svg +31 -0
frontend/index.html +226 -0
frontend/issues.html +48 -0
frontend/styles.css +957 -0
frontend/summary.html +59 -0
frontend/upload.html +75 -0
frontend/workflow.html +11 -0
ingestion/docx_reader.py +5 -0
ingestion/pdf_reader.py +9 -0
main.py +43 -0
preprocessing/clause_extraction.py +47 -0
preprocessing/text_extractor.py +54 -0
reproduce_issue.py +69 -0
requirements.txt +11 -0
storage/faiss_index.py +8 -0
ui/app.py +871 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,58 @@

+# OS / editor
+.DS_Store
+Thumbs.db
+.idea/
+.vscode/
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+.coverage.*
+htmlcov/
+dist/
+build/
+*.egg-info/
+# Virtual environments
+venv/
+.venv/
+backend/.venv/
+ENV/
+env/
+# Environment files
+.env
+.env.*
+!.env.example
+# Logs
+*.log
+logs/
+# Databases / local state
+*.db
+*.sqlite
+*.sqlite3
+# Runtime artifacts
+output/
+tmp/
+*.tmp
+# Frontend
+node_modules/
+.next/
+coverage/
+# Local model/checkpoint artifacts (large)
+merged_tinyllama_instruction/
+*.bin
+*.pt
+*.ckpt
+*.safetensors

analysis/common_analyzer.py ADDED Viewed

	@@ -0,0 +1,288 @@

+"""
+Strict Domain Analyzer for Legal Documents.
+Implements specific checks for:
+- Entity Roles (Vendor vs Vendee)
+- Domain Categories (Financial, Possession, Ownership, etc.)
+- Timeline Logic (Agreement vs Registration)
+- Numeric Consistency within context
+"""
+import re
+# =========================
+# 1. STRICT CLASSIFICATION
+# =========================
+def is_legal_boilerplate(text):
+    """Detects standard legal headers, footers, and witness blocks."""
+    t = text.lower()
+    patterns = [
+        "in witness whereof", "signed and delivered", "witnesses:",
+        "schedule", "jurisdiction", "arbitration", "notice",
+        "all that piece and parcel", "north by", "south by"
+    ]
+    # If it's very short (< 5 words) and contains a keyword
+    words = t.split()
+    if len(words) < 5 and any(p in t for p in patterns):
+        return True
+    # If it's just a signature block
+    if "signed by" in t or "witness" in t:
+        return True
+    return False
+def get_clause_domain(text):
+    """
+    Classify clause into strict legal domains.
+    Returns: 'FINANCIAL', 'POSSESSION', 'OWNERSHIP', 'ENCUMBRANCE', 'ADMINISTRATIVE', 'RECITAL', 'DEFINITION', 'OPERATIVE' or 'GENERAL'
+    """
+    t = text.lower()
+    # 1. RECITAL (Background)
+    if t.startswith("whereas") or "and whereas" in t:
+        return "RECITAL"
+    # 2. DEFINITION
+    if "shall mean" in t or "expression vendor" in t or "expression vendee" in t:
+        return "DEFINITION"
+    # 3. FINANCIAL (Money, Consideration)
+    if any(w in t for w in ["rs.", "rupees", "paid", "consideration", "sum of", "amount", "price", "cheque", "bank"]):
+        return "FINANCIAL"
+    # 4. POSSESSION (Handover, Vacant)
+    if any(w in t for w in ["possession", "handed over", "delivered", "vacant"]):
+        return "POSSESSION"
+    # 5. OWNERSHIP / TITLE
+    if any(w in t for w in ["owner", "title", "interest", "rights", "absolute", "fee simple"]):
+        return "OWNERSHIP"
+    # 6. ENCUMBRANCE (Loans, Mortgages)
+    if any(w in t for w in ["encumbrance", "mortgage", "loan", "charge", "lien", "litigation"]):
+        return "ENCUMBRANCE"
+    # 7. ADMINISTRATIVE (Boilerplate)
+    if any(w in t for w in ["witness", "signed", "schedule", "jurisdiction", "arbitration", "notice"]):
+        return "ADMINISTRATIVE"
+    # 8. OPERATIVE (Action)
+    if t.startswith("that") or "hereby" in t or "now this deed" in t:
+        return "OPERATIVE"
+    return "GENERAL"
+def get_entities(text):
+    """
+    Strictly detect if clause belongs to a specific entity.
+    """
+    t = text.lower()
+    entities = set()
+    if "vendor" in t: entities.add("Vendor")
+    if "vendee" in t: entities.add("Vendee")
+    return entities
+# =========================
+# 2. EXTRACTION HELPERS
+# =========================
+def extract_numbers(text):
+    """Extract numeric values for comparison."""
+    # Matches Rs. 100, 1,00,000, 500 sq ft (just the numbers)
+    return [int(n.replace(",", "")) for n in re.findall(r'\b\d{1,3}(?:,\d{3})*\b', text)]
+def has_negation(text):
+    neg_words = ["not", "never", "no", "cannot", "must not", "shall not"]
+    return any(w in text.lower() for w in neg_words)
+def has_exception_language(text):
+    """Detects legal exception/qualification identifiers."""
+    qualifiers = [
+        "subject to", "notwithstanding", "except as provided",
+        "unless otherwise", "provided however", "without prejudice"
+    ]
+    return any(q in text.lower() for q in qualifiers)
+def is_definition(text):
+    """Strictly checks if a clause is a definition."""
+    t = text.lower()
+    if "shall mean" in t or "means" in t or "defined as" in t:
+        return True
+    return False
+def is_party_intro(text):
+    """Detects if a clause is just listing a party description."""
+    t = text.lower()
+    # Strong Indicators: Address patterns, Relations, IDs
+    # Regex for "Door No", "D.No", "residing at"
+    address_pattern = r"(door\s*no|d\.no|residing\s*at|post\s*,\s*village)"
+    # Regex for relations: "son of", "wife of", "daughter of", "w/o", "s/o", or just "son", "wife" in context
+    relation_pattern = r"\b(son|wife|daughter|husband|father|mother|s/o|w/o|d/o)\b"
+    # Regex for IDs: "aadhaar", "pan no", "id card"
+    id_pattern = r"(aadhaar|pan\s*no|id\s*card|mobile\s*no)"
+    # Check for presence of these patterns
+    has_address = re.search(address_pattern, t)
+    has_relation = re.search(relation_pattern, t)
+    has_id = re.search(id_pattern, t)
+    # If it has at least 2 strong components (e.g. Relation + ID, or Address + Relation), it's a bio
+    score = 0
+    if has_address: score += 1
+    if has_relation: score += 1
+    if has_id: score += 1
+    return score >= 2
+# =========================
+# 3. CORE LOGIC GATES
+# =========================
+def analyze_pair(text1, text2, similarity, threshold=0.75):
+    """
+    Strict Analyzer returning (Label, Score, Reason).
+    Args:
+        threshold: Minimum similarity score to consider as CANDIDATE (default 0.75)
+    """
+    # Force Reload Trigger
+    # --- GATE 0: BOILERPLATE CHECK ---
+    if is_legal_boilerplate(text1) or is_legal_boilerplate(text2):
+        return None, 0.0, "Boilerplate (Skipped)"
+    # --- GATE 1: DOMAIN MISMATCH ---
+    d1 = get_clause_domain(text1)
+    d2 = get_clause_domain(text2)
+    # If domains are totally different, SKIP.
+    # Exception: OPERATIVE and GENERAL might overlap, but strictly FINANCIAL vs POSSESSION should skip.
+    if d1 != "GENERAL" and d2 != "GENERAL" and d1 != d2:
+        # RELAXATION: Only bypass if similarity is VERY high (suggesting misclassification).
+        # Otherwise, DO NOT compare apples (Financial) to oranges (Possession),
+        # even in Deep Search mode.
+        if similarity < 0.85:
+            return None, 0.0, "Domain Mismatch"
+    # --- HARDENED CHECK: GENERAL vs SPECIFIC ---
+    # Common source of noise: "Any other details" matching "The price is Rs 100"
+    # Block GENERAL vs Specific unless similarity is high
+    if (d1 == "GENERAL" and d2 != "GENERAL") or (d2 == "GENERAL" and d1 != "GENERAL"):
+        if similarity < 0.80:
+             return None, 0.0, "General vs Specific Domain (Skipped)"
+    # --- SPECIFIC FILTER: MONEY vs TIMELINE ---
+    # Prevents "Price is X" vs "Payment due on Date Y" (confusing numbers/dates)
+    # Check if one clause is purely FINANCIAL and other is purely TIMELINE/DATE based
+    is_financial = d1 == "FINANCIAL" or d2 == "FINANCIAL"
+    has_date = re.search(r"\d{1,2}[./-]\d{1,2}[./-]\d{2,4}", text1) or \
+               re.search(r"\d{1,2}[./-]\d{1,2}[./-]\d{2,4}", text2)
+    if is_financial and has_date:
+        # If one talks about Price/Amount and other has a Date,
+        # unless they are explicitly about "Payment Schedule", they are likely different.
+        if "schedule" not in text1.lower() and "schedule" not in text2.lower():
+             if similarity < 0.85:
+                 return None, 0.0, "Financial vs Timeline Mismatch"
+    # --- SPECIFIC FILTER: ELIGIBILITY vs ASSISTANCE ---
+    # Prevents "Eligibility criteria" vs "Assistance details" (Common in schemes)
+    # Check for keywords like "eligible", "qualify" vs "grant", "support", "help"
+    t1_lower, t2_lower = text1.lower(), text2.lower()
+    is_eligibility = any(w in t1_lower for w in ["eligible", "qualify", "criteria", "requirement"]) or \
+                     any(w in t2_lower for w in ["eligible", "qualify", "criteria", "requirement"])
+    is_assistance = any(w in t1_lower for w in ["provide", "grant", "subsidy", "support", "assistance"]) or \
+                    any(w in t2_lower for w in ["provide", "grant", "subsidy", "support", "assistance"])
+    if is_eligibility and is_assistance:
+         # Unless precise overlap, these are distinct sections
+         if similarity < 0.85:
+              return None, 0.0, "Eligibility vs Assistance Mismatch"
+    # --- GATE 1.5: PARTY DESCRIPTION CHECK ---
+    # If both clauses are just descriptions of people (addresses, relations), skip.
+    if is_party_intro(text1) and is_party_intro(text2):
+        return None, 0.0, "Party Description (Skipped)"
+    # --- GATE 2: ENTITY MISMATCH ---
+    e1 = get_entities(text1)
+    e2 = get_entities(text2)
+    # If one is Vendor ONLY and other is Vendee ONLY -> SKIP
+    if e1 and e2 and e1 != e2 and not (e1 & e2):
+        # RELAXATION: Only bypass if similarity is VERY high.
+        if similarity < 0.85:
+            return None, 0.0, "Entity Role Mismatch"
+    # --- GATE 2.5: DEFINITION GUARD ---
+    # Don't compare definitions with operative clauses generally
+    if is_definition(text1) or is_definition(text2):
+        # Only compare if both are definitions (conflicting definitions)
+        if not (is_definition(text1) and is_definition(text2)):
+             return None, 0.0, "Definition vs Operative"
+    # --- GATE 3: POSSESSION TIMELINE ---
+    # "Possession at agreement" vs "Possession at registration" is NOT a contradiction.
+    if d1 == "POSSESSION" and d2 == "POSSESSION":
+        keywords_a = ["agreement", "earnest"]
+        keywords_b = ["registration", "sale deed", "final"]
+        has_a = any(k in text1.lower() for k in keywords_a)
+        has_b = any(k in text2.lower() for k in keywords_b)
+        # If one talks about start and other about end, it's a sequence.
+        if (has_a and any(k in text2.lower() for k in keywords_b)) or \
+           (has_b and any(k in text1.lower() for k in keywords_a)):
+             return None, 0.0, "Possession Timeline Sequence"
+    # --- GATE 4: NUMERIC REASONING ---
+    # Only compare numbers if context allows
+    nums1 = extract_numbers(text1)
+    nums2 = extract_numbers(text2)
+    if nums1 and nums2 and nums1 != nums2:
+        # MAGNITUDE CHECK: If numbers differ by > 100x, likely different units (e.g. Price vs Area)
+        # e.g. 5,50,000 vs 1.25 -> Ratio is huge.
+        max1, max2 = max(nums1), max(nums2)
+        if max1 > 0 and max2 > 0:
+            ratio = max1 / max2 if max1 > max2 else max2 / max1
+            if ratio > 100:
+                 return None, 0.0, "Numeric Magnitude Mismatch (Likely Unit Diff)"
+        # Check if they are in the same domain (likely valid comparison)
+        if d1 == d2 and d1 != "GENERAL":
+             return "NUMERIC_INCONSISTENCY", 0.9, f"Mismatch in {d1} values"
+        # If General, be careful.
+        # But if similarity is VERY high, it might be a contradiction.
+        if similarity > 0.9:
+             return "NUMERIC_INCONSISTENCY", 0.85, "Numeric Mismatch in similar context"
+    # --- GATE 4.5: EXCEPTION/HIERARCHY CHECK ---
+    # If high similarity but one has exception language
+    # We use a slightly lower threshold for exception detection to be safe
+    exception_threshold = max(0.65, threshold - 0.05)
+    if similarity > exception_threshold:
+        has_ex1 = has_exception_language(text1)
+        has_ex2 = has_exception_language(text2)
+        if (has_ex1 and not has_ex2) or (has_ex2 and not has_ex1):
+            return "QUALIFICATION", similarity, "Legal Exception/Qualification detected (Not a Conflict)"
+    # --- GATE 5: LOGICAL NEGATION ---
+    if (has_negation(text1) and not has_negation(text2)) or \
+       (has_negation(text2) and not has_negation(text1)):
+        # Only flag if high similarity implies they are talking about the same thing
+        # Negation check requires fairly high confidence they are related
+        if similarity > 0.85:
+            return "LEGAL_CONFLICT", 0.8, "Logical Negation detected"
+    # --- FINAL GATE: CANDIDATE FOR NLI ---
+    # If we are here, we passed the blocks.
+    # If similarity is high, let NLI decide.
+    if similarity > threshold:
+        return "CANDIDATE", similarity, "High Similarity - Pending NLI"
+    return None, 0.0, "Low Similarity"

analysis/consistency_check.py ADDED Viewed

	@@ -0,0 +1,4 @@

+def check_inconsistency(text1, text2):
+    keywords = ["shall", "must", "may"]
+    return any(k in text1.lower() for k in keywords) and \
+           any(k in text2.lower() for k in keywords)

analysis/contradiction_check.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import re
+def extract_number(text):
+    match = re.search(r'INR\s*([\d,]+)', text)
+    if match:
+        return int(match.group(1).replace(",", ""))
+    return None
+def numeric_contradiction(text1, text2):
+    n1 = extract_number(text1)
+    n2 = extract_number(text2)
+    return n1 is not None and n2 is not None and n1 != n2
+def ownership_contradiction(text1, text2):
+    t1 = text1.lower()
+    t2 = text2.lower()
+    return (
+        ("must not own" in t1 and "may be eligible" in t2) or
+        ("must not own" in t2 and "may be eligible" in t1)
+    )
+def check_contradiction(text1, text2):
+    return numeric_contradiction(text1, text2) or ownership_contradiction(text1, text2)

analysis/duplication_check.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ def check_duplication(similarity, threshold=0.90):
2	+ return similarity >= threshold

analysis/llama_legal_verifier.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import os
+import re
+from typing import Tuple
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+class LlamaLegalVerifier:
+    """
+    Verifies whether two legal clauses are contradictory, entailing, or neutral
+    using a local fine-tuned causal language model.
+    """
+    def __init__(self, model_path: str):
+        if not os.path.isdir(model_path):
+            raise FileNotFoundError(f"Model path not found: {model_path}")
+        self.model_path = model_path
+        self.device = 0 if torch.cuda.is_available() else -1
+        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            local_files_only=True,
+            torch_dtype=dtype,
+        )
+        if tokenizer.pad_token_id is None:
+            tokenizer.pad_token_id = tokenizer.eos_token_id
+        self.generator = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device=self.device,
+        )
+    @staticmethod
+    def _parse_label(text: str) -> str:
+        lowered = text.lower()
+        if "contradiction" in lowered:
+            return "Contradiction"
+        if "entailment" in lowered or "duplicate" in lowered or "same meaning" in lowered:
+            return "Entailment"
+        if "neutral" in lowered:
+            return "Neutral"
+        return "Neutral"
+    @staticmethod
+    def _parse_confidence(text: str) -> float:
+        matches = re.findall(r"(?<!\d)(0(?:\.\d+)?|1(?:\.0+)?)(?!\d)", text)
+        if matches:
+            try:
+                value = float(matches[0])
+                return max(0.0, min(1.0, value))
+            except ValueError:
+                return 0.60
+        return 0.60
+    @staticmethod
+    def _parse_reason(text: str) -> str:
+        m = re.search(r"reason\s*:\s*(.+)", text, flags=re.IGNORECASE | re.DOTALL)
+        if m:
+            return m.group(1).strip()[:300]
+        return text.strip()[:300]
+    def predict(self, text1: str, text2: str) -> Tuple[bool, float, str, str]:
+        prompt = f"""You are a legal NLI verifier.
+Classify relationship between Clause A and Clause B.
+Allowed labels: Contradiction, Entailment, Neutral.
+Return exactly in this format:
+Label: <Contradiction|Entailment|Neutral>
+Confidence: <0.00-1.00>
+Reason: <one short legal reason>
+Clause A: {text1}
+Clause B: {text2}
+"""
+        output = self.generator(
+            prompt,
+            max_new_tokens=96,
+            do_sample=False,
+            return_full_text=False,
+            pad_token_id=self.generator.tokenizer.eos_token_id,
+        )[0]["generated_text"]
+        label = self._parse_label(output)
+        confidence = self._parse_confidence(output)
+        reason = self._parse_reason(output)
+        is_contradiction = label == "Contradiction" and confidence >= 0.50
+        return is_contradiction, confidence, label, reason

analysis/nli_validator.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from transformers import pipeline
+# Load once (slow only first time)
+nli_pipeline = pipeline(
+    "text-classification",
+    model="roberta-large-mnli",
+    device=-1  # CPU
+)
+def nli_contradiction(text1, text2, threshold=0.8):
+    """
+    Returns True if NLI model strongly predicts contradiction
+    """
+    input_text = f"{text1} </s></s> {text2}"
+    result = nli_pipeline(input_text)[0]
+    return (
+        result["label"] == "CONTRADICTION" and
+        result["score"] >= threshold
+    )

analysis/nli_verifier.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import numpy as np
+from sentence_transformers import CrossEncoder
+from huggingface_hub import login
+class NLIVerifier:
+    def __init__(self, model_name="cross-encoder/nli-distilroberta-base", hf_token=None):
+        """
+        Initialize the NLI model using CrossEncoder.
+        """
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Loading NLI Model ({self.device})...")
+        if hf_token:
+             try:
+                 login(token=hf_token)
+                 print("Logged in to Hugging Face.")
+             except Exception as e:
+                 print(f"HF Login Warning: {e}")
+        try:
+            self.model = CrossEncoder(model_name, device=self.device)
+            print("NLI Model Loaded Successfully.")
+        except Exception as e:
+             print(f"Error loading model: {e}")
+             self.model = None
+        # Label mapping for cross-encoder/nli-distilroberta-base
+        # 0: Contradiction
+        # 1: Entailment
+        # 2: Neutral
+        self.labels = ["Contradiction", "Entailment", "Neutral"]
+    def predict(self, text1, text2):
+        """
+        Verify if text1 and text2 contradict each other.
+        Returns: (IsContradiction: bool, Confidence: float, Label: str)
+        """
+        if not self.model:
+            return False, 0.0, "Model Error"
+        # CrossEncoder returns logits
+        scores = self.model.predict([(text1, text2)])[0]
+        # Apply softmax to get probabilities
+        exp_scores = np.exp(scores)
+        probs = exp_scores / np.sum(exp_scores)
+        pred_label_idx = probs.argmax()
+        confidence = probs[pred_label_idx]
+        label = self.labels[pred_label_idx]
+        # Check if Contradiction (Index 0) is the winner with high confidence
+        is_contradiction = (pred_label_idx == 0 and confidence > 0.5)
+        return is_contradiction, float(confidence), label

analysis/similarity_search.py ADDED Viewed

	@@ -0,0 +1,3 @@

+def get_similar(index, vector, k=5):
+    distances, indices = index.search(vector.reshape(1, -1), k)
+    return indices[0], distances[0]

auth/user_store.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import hashlib
+import os
+import secrets
+import sqlite3
+from pathlib import Path
+from typing import Tuple
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+DATA_DIR = PROJECT_ROOT / "data"
+DB_PATH = DATA_DIR / "users.db"
+def _ensure_db() -> None:
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS users (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                username TEXT UNIQUE NOT NULL,
+                password_hash TEXT NOT NULL,
+                salt TEXT NOT NULL,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+            """
+        )
+        conn.commit()
+    finally:
+        conn.close()
+def _hash_password(password: str, salt_hex: str) -> str:
+    salt = bytes.fromhex(salt_hex)
+    digest = hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt, 120_000)
+    return digest.hex()
+def _normalize_username(username: str) -> str:
+    return username.strip().lower()
+def create_user(username: str, password: str) -> Tuple[bool, str]:
+    _ensure_db()
+    normalized = _normalize_username(username)
+    if len(normalized) < 3:
+        return False, "Username must be at least 3 characters."
+    if len(password) < 8:
+        return False, "Password must be at least 8 characters."
+    salt_hex = secrets.token_hex(16)
+    password_hash = _hash_password(password, salt_hex)
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        conn.execute(
+            "INSERT INTO users (username, password_hash, salt) VALUES (?, ?, ?)",
+            (normalized, password_hash, salt_hex),
+        )
+        conn.commit()
+        return True, "Account created successfully."
+    except sqlite3.IntegrityError:
+        return False, "Username already exists."
+    finally:
+        conn.close()
+def authenticate_user(username: str, password: str) -> Tuple[bool, str]:
+    _ensure_db()
+    normalized = _normalize_username(username)
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        row = conn.execute(
+            "SELECT password_hash, salt FROM users WHERE username = ?",
+            (normalized,),
+        ).fetchone()
+    finally:
+        conn.close()
+    if not row:
+        return False, "User not found."
+    stored_hash, salt_hex = row
+    candidate_hash = _hash_password(password, salt_hex)
+    if candidate_hash != stored_hash:
+        return False, "Incorrect password."
+    return True, "Login successful."

backend/README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+# Backend (Flask + SQLite)
+## Setup
+```bash
+cd backend
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python app.py
+```
+Server runs on `http://127.0.0.1:5000`.
+## APIs
+- `GET /api/health`
+- `POST /api/register`
+- `POST /api/login`
+- `POST /api/analyze` (multipart form: `file`, `scanMode`)
+SQLite database file is created at `backend/app.db`.

backend/app.py ADDED Viewed

	@@ -0,0 +1,717 @@

+from __future__ import annotations
+import io
+import os
+import sqlite3
+import sys
+from difflib import SequenceMatcher
+from datetime import datetime, timezone
+from pathlib import Path
+from flask import Flask, jsonify, request
+from flask_cors import CORS
+from werkzeug.security import check_password_hash, generate_password_hash
+BASE_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = BASE_DIR.parent
+DB_PATH = Path(os.getenv("DB_PATH", BASE_DIR / "app.db"))
+app = Flask(__name__)
+CORS(app)
+def _bootstrap_site_packages() -> None:
+    """
+    Make backend resilient when dependencies are split across:
+    - project venv site-packages
+    - user local site-packages (~/.local)
+    """
+    py_ver = f"{sys.version_info.major}.{sys.version_info.minor}"
+    candidate_paths = [
+        PROJECT_ROOT / "venv" / "lib" / f"python{py_ver}" / "site-packages",
+        Path.home() / ".local" / "lib" / f"python{py_ver}" / "site-packages",
+    ]
+    for path in candidate_paths:
+        path_str = str(path)
+        if path.exists() and path_str not in sys.path:
+            sys.path.append(path_str)
+_bootstrap_site_packages()
+def get_db_connection() -> sqlite3.Connection:
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    return conn
+def init_db() -> None:
+    with get_db_connection() as conn:
+        conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS users (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                full_name TEXT NOT NULL,
+                email TEXT NOT NULL UNIQUE,
+                password_hash TEXT NOT NULL,
+                created_at TEXT NOT NULL
+            )
+            """
+        )
+        conn.commit()
+def _extract_text_data(file_bytes: bytes, file_ext: str):
+    if file_ext == "txt":
+        return [{"text": file_bytes.decode("utf-8", errors="ignore"), "page": 1}]
+    if file_ext == "pdf":
+        import pdfplumber
+        extracted = []
+        with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
+            for i, page in enumerate(pdf.pages):
+                text = page.extract_text() or ""
+                if text.strip():
+                    extracted.append({"text": text, "page": i + 1})
+        return extracted
+    if file_ext == "docx":
+        import docx
+        doc = docx.Document(io.BytesIO(file_bytes))
+        text = "\n".join(p.text for p in doc.paragraphs if p.text is not None)
+        return [{"text": text, "page": 1}] if text.strip() else []
+    raise ValueError("Unsupported file type. Use PDF, DOCX, or TXT.")
+def _extract_clauses(text_data):
+    import re
+    clauses = []
+    clause_id = 0
+    for chunk in text_data:
+        raw_text = chunk.get("text", "")
+        page_num = chunk.get("page", 1)
+        pattern = re.compile(r".+?(?:[.!?](?:\s+|$)|$)", re.DOTALL)
+        for match in pattern.finditer(raw_text):
+            cleaned = " ".join(match.group(0).split())
+            if len(cleaned) < 30:
+                continue
+            start_idx = match.start()
+            line_no = raw_text[:start_idx].count("\n") + 1
+            clauses.append(
+                {
+                    "id": clause_id,
+                    "text": cleaned,
+                    "page": page_num,
+                    "line": line_no,
+                }
+            )
+            clause_id += 1
+    return clauses
+def _normalize_person_name(raw: str) -> str:
+    import re
+    if not raw:
+        return ""
+    cleaned = " ".join(str(raw).split())
+    cleaned = re.sub(r"[^A-Za-z.\s]", " ", cleaned)
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    cleaned = re.sub(r"\b(mr|mrs|ms|miss|shri|smt)\.?\b", "", cleaned, flags=re.IGNORECASE)
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    stop_words = {
+        "the",
+        "vendor",
+        "vendee",
+        "party",
+        "agreement",
+        "hereinafter",
+        "called",
+        "referred",
+        "to",
+        "as",
+        "and",
+        "or",
+        "by",
+        "of",
+    }
+    parts = [p for p in cleaned.split(" ") if p and p.lower() not in stop_words]
+    if not parts:
+        return ""
+    parts = parts[:4]
+    name = " ".join(p.capitalize() for p in parts if len(p) > 1)
+    return name[:80].strip()
+def _extract_party_name(text: str, role: str) -> str:
+    import re
+    if not text:
+        return "Not found"
+    compact = " ".join(str(text).split())
+    role_l = role.lower()
+    patterns = [
+        # Role -> Name (e.g., "vendor: suresh kumar")
+        rf"\b{role_l}\b\s*[:,-]?\s*(?:is\s+)?(?:mr\.?|mrs\.?|ms\.?|shri|smt\.?)?\s*([A-Za-z][A-Za-z.\s]{{1,80}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
+        rf"\bthe\s+{role_l}\b\s*[:,-]?\s*(?:is\s+)?(?:mr\.?|mrs\.?|ms\.?|shri|smt\.?)?\s*([A-Za-z][A-Za-z.\s]{{1,80}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
+        # Name -> role via legal wording
+        rf"(?:mr\.?|mrs\.?|ms\.?|shri|smt\.?)?\s*([A-Za-z][A-Za-z.\s]{{1,80}}?)\s+(?:hereinafter\s+(?:called|referred\s+to\s+as)|called)\s+(?:the\s+)?{role_l}\b",
+        # Name (role)
+        rf"\b([A-Za-z][A-Za-z.\s]{{1,60}}?)\s*\(\s*{role_l}\s*\)",
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, compact, flags=re.IGNORECASE)
+        if not match:
+            continue
+        candidate = _normalize_person_name(match.group(1))
+        if candidate:
+            return candidate
+    if re.search(rf"\b{role_l}\b", compact, flags=re.IGNORECASE):
+        return f"{role.title()} mentioned (name not parsed)"
+    return "Not found"
+def _extract_document_parties(text_data):
+    full_text = "\n".join(chunk.get("text", "") for chunk in (text_data or []))
+    vendor = _extract_party_name(full_text, "vendor")
+    vendee = _extract_party_name(full_text, "vendee")
+    return {"vendor": vendor, "vendee": vendee}
+def _similarity(a: str, b: str) -> float:
+    return SequenceMatcher(None, a.lower(), b.lower()).ratio()
+def _threshold_for_mode(scan_mode: str) -> float:
+    mode = (scan_mode or "").lower()
+    if "deep" in mode:
+        return 0.50
+    if "strict" in mode:
+        return 0.85
+    return 0.60
+def _normalized_clause_text(text: str) -> str:
+    import re
+    return re.sub(r"\s+", " ", str(text or "").strip().lower())
+def _token_set(text: str) -> set[str]:
+    import re
+    return set(re.findall(r"[a-z]{3,}", _normalized_clause_text(text)))
+def _numeric_tokens(text: str) -> set[str]:
+    import re
+    return set(re.findall(r"\b\d+(?:[.,]\d+)?%?\b", str(text or "")))
+def _rule_based_category(text_a: str, text_b: str, similarity: float):
+    a_norm = _normalized_clause_text(text_a)
+    b_norm = _normalized_clause_text(text_b)
+    tokens_a = _token_set(text_a)
+    tokens_b = _token_set(text_b)
+    common = len(tokens_a & tokens_b)
+    denom = max(len(tokens_a | tokens_b), 1)
+    jaccard = common / denom
+    if a_norm and b_norm and a_norm == b_norm:
+        return ("duplication", "DUPLICATION_EXACT", 0.99, "Exact repeated clause text.")
+    if similarity >= 0.94 and jaccard >= 0.88:
+        return ("duplication", "DUPLICATION_NEAR", 0.94, "Near-duplicate clause wording.")
+    nums_a = _numeric_tokens(text_a)
+    nums_b = _numeric_tokens(text_b)
+    if jaccard >= 0.45 and nums_a and nums_b and nums_a != nums_b:
+        return (
+            "inconsistency",
+            "NUMERIC_INCONSISTENCY",
+            0.9,
+            f"Numeric mismatch detected: {sorted(nums_a)} vs {sorted(nums_b)}.",
+        )
+    neg_words = ("shall not", "will not", "not", "never", "prohibited", "forbidden")
+    pos_words = ("shall", "will", "must", "required", "permitted", "allowed")
+    a_has_neg = any(w in a_norm for w in neg_words)
+    b_has_neg = any(w in b_norm for w in neg_words)
+    a_has_pos = any(w in a_norm for w in pos_words)
+    b_has_pos = any(w in b_norm for w in pos_words)
+    if jaccard >= 0.5 and ((a_has_neg and b_has_pos) or (b_has_neg and a_has_pos)):
+        return ("contradiction", "LEGAL_CONFLICT", 0.9, "Opposite obligation/negation polarity.")
+    return (None, None, 0.0, "")
+def _analyze_clauses(clauses, threshold: float):
+    if str(PROJECT_ROOT) not in sys.path:
+        sys.path.append(str(PROJECT_ROOT))
+    try:
+        from analysis.common_analyzer import analyze_pair
+    except Exception as exc:
+        raise RuntimeError(f"Analyzer import failed: {exc}") from exc
+    findings = []
+    line_issues = []
+    counts = {"duplication": 0, "inconsistency": 0, "contradiction": 0}
+    compared_pairs = 0
+    max_pairs = 15000
+    seen_findings = set()
+    seen_line_issues = set()
+    def normalize_category(label: str, reason: str, similarity: float) -> str | None:
+        lbl = (label or "").upper()
+        rsn = (reason or "").lower()
+        if lbl in {"NUMERIC_INCONSISTENCY"}:
+            return "inconsistency"
+        if lbl in {"LEGAL_CONFLICT", "CONTRADICTION"}:
+            return "contradiction"
+        if lbl in {"DUPLICATION", "ENTAILMENT"}:
+            return "duplication"
+        if lbl in {"CANDIDATE", "QUALIFICATION"} and similarity >= 0.92:
+            return "duplication"
+        if "negation" in rsn or "conflict" in rsn:
+            return "contradiction"
+        return None
+    for i in range(len(clauses)):
+        for j in range(i + 1, len(clauses)):
+            compared_pairs += 1
+            if compared_pairs > max_pairs:
+                break
+            clause_a = clauses[i]
+            clause_b = clauses[j]
+            similarity = _similarity(clause_a["text"], clause_b["text"])
+            category, label, confidence, reason = _rule_based_category(
+                clause_a["text"], clause_b["text"], similarity
+            )
+            if category is None:
+                label, confidence, reason = analyze_pair(
+                    clause_a["text"],
+                    clause_b["text"],
+                    similarity,
+                    threshold=threshold,
+                )
+                if not label or label == "NO_CONFLICT":
+                    continue
+                category = normalize_category(label, reason, similarity)
+                if category is None:
+                    continue
+            finding_key = (
+                category,
+                clause_a["page"],
+                clause_a["line"],
+                clause_b["page"],
+                clause_b["line"],
+                label,
+            )
+            if finding_key in seen_findings:
+                continue
+            seen_findings.add(finding_key)
+            findings.append(
+                {
+                    "issueType": label,
+                    "category": category,
+                    "confidence": round(float(confidence), 4),
+                    "reason": reason,
+                    "clause1": clause_a["text"],
+                    "clause2": clause_b["text"],
+                    "location1": f"Pg {clause_a['page']}, Ln {clause_a['line']}",
+                    "location2": f"Pg {clause_b['page']}, Ln {clause_b['line']}",
+                    "page1": clause_a["page"],
+                    "line1": clause_a["line"],
+                    "page2": clause_b["page"],
+                    "line2": clause_b["line"],
+                }
+            )
+            counts[category] += 1
+            for clause in (clause_a, clause_b):
+                line_key = (category, clause["page"], clause["line"], label)
+                if line_key in seen_line_issues:
+                    continue
+                seen_line_issues.add(line_key)
+                line_issues.append(
+                    {
+                        "category": category,
+                        "issueType": label,
+                        "confidence": round(float(confidence), 4),
+                        "page": clause["page"],
+                        "line": clause["line"],
+                        "location": f"Pg {clause['page']}, Ln {clause['line']}",
+                        "reason": reason,
+                    }
+                )
+        if compared_pairs > max_pairs:
+            break
+    findings.sort(key=lambda item: item["confidence"], reverse=True)
+    line_issues.sort(key=lambda item: (item["page"], item["line"]))
+    return findings, line_issues, counts, compared_pairs
+def _build_page_summaries(clauses, line_issues, text_data):
+    pages = {}
+    page_text_map = {}
+    for chunk in text_data or []:
+        page = int(chunk.get("page", 1))
+        if page in page_text_map:
+            continue
+        raw = str(chunk.get("text", "") or "")
+        lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
+        page_text_map[page] = " ".join(lines[:2])[:260]
+    for clause in clauses:
+        page = int(clause.get("page", 1))
+        pages.setdefault(
+            page,
+            {
+                "page": page,
+                "clauseCount": 0,
+                "duplicationCount": 0,
+                "inconsistencyCount": 0,
+                "contradictionCount": 0,
+                "issueCount": 0,
+                "keyLines": [],
+                "pageSnippet": page_text_map.get(page, ""),
+            },
+        )
+        pages[page]["clauseCount"] += 1
+    for issue in line_issues:
+        page = int(issue.get("page", 1))
+        pages.setdefault(
+            page,
+            {
+                "page": page,
+                "clauseCount": 0,
+                "duplicationCount": 0,
+                "inconsistencyCount": 0,
+                "contradictionCount": 0,
+                "issueCount": 0,
+                "keyLines": [],
+                "pageSnippet": page_text_map.get(page, ""),
+            },
+        )
+        category = issue.get("category")
+        if category in {"duplication", "inconsistency", "contradiction"}:
+            pages[page][f"{category}Count"] += 1
+        pages[page]["issueCount"] += 1
+        if len(pages[page]["keyLines"]) < 6:
+            line_ref = f"Ln {issue.get('line', '-')}: {issue.get('issueType', '-')}"
+            if line_ref not in pages[page]["keyLines"]:
+                pages[page]["keyLines"].append(line_ref)
+    page_summaries = []
+    for page in sorted(pages.keys()):
+        item = pages[page]
+        item["summaryText"] = (
+            f"Page {page} contains {item['clauseCount']} clauses and {item['issueCount']} flagged lines "
+            f"(duplication: {item['duplicationCount']}, inconsistency: {item['inconsistencyCount']}, "
+            f"contradiction: {item['contradictionCount']})."
+        )
+        page_summaries.append(item)
+    return page_summaries
+def _shorten_text(text: str, limit: int = 220) -> str:
+    s = " ".join(str(text or "").split())
+    if len(s) <= limit:
+        return s
+    return s[: limit - 3].rstrip() + "..."
+def _clause_label(text: str, fallback_id: int) -> str:
+    import re
+    raw = str(text or "")
+    m = re.search(r"\bclause\s*(\d+)\s*(?:\(([^)]+)\))?", raw, flags=re.IGNORECASE)
+    if m:
+        num = m.group(1)
+        title = (m.group(2) or "").strip()
+        return f"Clause {num}" + (f" ({title})" if title else "")
+    return f"Clause {fallback_id}"
+def _build_detailed_summary(clauses, page_summaries, findings):
+    from collections import defaultdict
+    clauses_by_page = defaultdict(list)
+    for clause in clauses:
+        clauses_by_page[int(clause.get("page", 1))].append(clause)
+    lines = ["Here is the detailed summary of the document content:", ""]
+    for page_item in page_summaries:
+        page = int(page_item.get("page", 1))
+        page_clauses = sorted(clauses_by_page.get(page, []), key=lambda c: (c.get("line", 0), c.get("id", 0)))
+        lines.append(f"Page {page} Summary:")
+        if not page_clauses:
+            lines.append(f"- No clauses extracted for Page {page}.")
+            lines.append("")
+            continue
+        for idx, clause in enumerate(page_clauses[:12], start=1):
+            label = _clause_label(clause.get("text", ""), idx)
+            summary = _shorten_text(clause.get("text", ""), 210)
+            lines.append(f"- {label}: {summary} (Page {page}, Line {clause.get('line', '-')})")
+        if len(page_clauses) > 12:
+            lines.append(f"- Additional clauses on this page: {len(page_clauses) - 12}")
+        lines.append("")
+    contradictions = [f for f in findings if f.get("category") == "contradiction"]
+    inconsistencies = [f for f in findings if f.get("category") == "inconsistency"]
+    duplicates = [f for f in findings if f.get("category") == "duplication"]
+    lines.append("Summary of Key Contradictions Noted:")
+    if contradictions:
+        for idx, item in enumerate(contradictions[:10], start=1):
+            lines.append(
+                f"- {idx}. {item.get('issueType', 'LEGAL_CONFLICT')}: "
+                f"{_shorten_text(item.get('reason', ''), 170)} "
+                f"({item.get('location1', '-') } vs {item.get('location2', '-')})"
+            )
+    else:
+        lines.append("- No strong contradiction pair detected.")
+    lines.append("")
+    lines.append("Summary of Key Inconsistencies Noted:")
+    if inconsistencies:
+        for idx, item in enumerate(inconsistencies[:10], start=1):
+            lines.append(
+                f"- {idx}. {item.get('issueType', 'INCONSISTENCY')}: "
+                f"{_shorten_text(item.get('reason', ''), 170)} "
+                f"({item.get('location1', '-') } vs {item.get('location2', '-')})"
+            )
+    else:
+        lines.append("- No strong inconsistency pair detected.")
+    lines.append("")
+    lines.append("Summary of Key Duplications Noted:")
+    if duplicates:
+        for idx, item in enumerate(duplicates[:10], start=1):
+            lines.append(
+                f"- {idx}. {item.get('issueType', 'DUPLICATION')}: "
+                f"{_shorten_text(item.get('reason', ''), 170)} "
+                f"({item.get('location1', '-') } vs {item.get('location2', '-')})"
+            )
+    else:
+        lines.append("- No major duplication pair detected.")
+    return "\n".join(lines)
+# Ensure schema exists even when started via `flask run`.
+init_db()
+@app.get("/api/health")
+def health_check():
+    return jsonify({"status": "ok"}), 200
+@app.get("/")
+def root():
+    return (
+        jsonify(
+            {
+                "message": "Backend is running.",
+                "endpoints": [
+                    "GET /api/health",
+                    "POST /api/register",
+                    "POST /api/login",
+                    "POST /api/analyze",
+                    "GET /health",
+                    "POST /register",
+                    "POST /login",
+                    "POST /analyze",
+                ],
+            }
+        ),
+        200,
+    )
+@app.get("/health")
+def health_check_alias():
+    return health_check()
+@app.post("/api/register")
+def register():
+    data = request.get_json(silent=True) or {}
+    full_name = str(data.get("fullName", "")).strip()
+    email = str(data.get("email", "")).strip().lower()
+    password = str(data.get("password", ""))
+    if not full_name or not email or not password:
+        return jsonify({"error": "fullName, email, and password are required."}), 400
+    if len(password) < 6:
+        return jsonify({"error": "Password must be at least 6 characters."}), 400
+    password_hash = generate_password_hash(password)
+    created_at = datetime.now(timezone.utc).isoformat()
+    try:
+        with get_db_connection() as conn:
+            conn.execute(
+                "INSERT INTO users (full_name, email, password_hash, created_at) VALUES (?, ?, ?, ?)",
+                (full_name, email, password_hash, created_at),
+            )
+            conn.commit()
+    except sqlite3.IntegrityError:
+        return jsonify({"error": "Email already registered."}), 409
+    return jsonify({"message": "User created successfully."}), 201
+@app.post("/register")
+def register_alias():
+    return register()
+@app.post("/api/login")
+def login():
+    data = request.get_json(silent=True) or {}
+    email = str(data.get("email", "")).strip().lower()
+    password = str(data.get("password", ""))
+    if not email or not password:
+        return jsonify({"error": "email and password are required."}), 400
+    with get_db_connection() as conn:
+        user = conn.execute(
+            "SELECT id, full_name, email, password_hash FROM users WHERE email = ?",
+            (email,),
+        ).fetchone()
+    if user is None or not check_password_hash(user["password_hash"], password):
+        return jsonify({"error": "Invalid email or password."}), 401
+    return (
+        jsonify(
+            {
+                "message": "Login successful.",
+                "user": {
+                    "id": user["id"],
+                    "fullName": user["full_name"],
+                    "email": user["email"],
+                },
+            }
+        ),
+        200,
+    )
+@app.post("/api/analyze")
+def analyze():
+    uploaded = request.files.get("file")
+    scan_mode = request.form.get("scanMode", "Standard Scan (Recommended)")
+    threshold = _threshold_for_mode(scan_mode)
+    if uploaded is None or uploaded.filename is None or uploaded.filename.strip() == "":
+        return jsonify({"error": "Please upload a file."}), 400
+    file_ext = uploaded.filename.rsplit(".", 1)[-1].lower() if "." in uploaded.filename else ""
+    if file_ext not in {"pdf", "docx", "txt"}:
+        return jsonify({"error": "Unsupported file type. Use PDF, DOCX, or TXT."}), 400
+    try:
+        file_bytes = uploaded.read()
+        text_data = _extract_text_data(file_bytes=file_bytes, file_ext=file_ext)
+        if not text_data:
+            return jsonify({"error": "Could not extract text from file."}), 400
+        clauses = _extract_clauses(text_data)
+        if len(clauses) < 2:
+            return jsonify({"error": "Not enough clauses found for analysis."}), 400
+        parties = _extract_document_parties(text_data)
+        findings, line_issues, counts, compared_pairs = _analyze_clauses(
+            clauses=clauses, threshold=threshold
+        )
+        page_summaries = _build_page_summaries(
+            clauses=clauses, line_issues=line_issues, text_data=text_data
+        )
+        detailed_summary = _build_detailed_summary(
+            clauses=clauses,
+            page_summaries=page_summaries,
+            findings=findings,
+        )
+    except Exception as exc:
+        return jsonify({"error": f"Analysis failed: {exc}"}), 500
+    return (
+        jsonify(
+            {
+                "message": "Analysis completed.",
+                "summary": {
+                    "scanMode": scan_mode,
+                    "threshold": threshold,
+                    "vendor": parties["vendor"],
+                    "vendee": parties["vendee"],
+                    "clauses": len(clauses),
+                    "pairsCompared": compared_pairs,
+                    "issuesFound": len(findings),
+                    "duplicationCount": counts["duplication"],
+                    "inconsistencyCount": counts["inconsistency"],
+                    "contradictionCount": counts["contradiction"],
+                },
+                "pageSummaries": page_summaries,
+                "detailedSummary": detailed_summary,
+                "findings": findings[:50],
+                "lineIssues": line_issues[:200],
+            }
+        ),
+        200,
+    )
+@app.post("/login")
+def login_alias():
+    return login()
+@app.post("/analyze")
+def analyze_alias():
+    return analyze()
+if __name__ == "__main__":
+    # Keep defaults production-safe and compatible with restricted environments.
+    debug_mode = os.getenv("FLASK_DEBUG", "0") == "1"
+    host = os.getenv("HOST", "127.0.0.1")
+    port = int(os.getenv("PORT", "5000"))
+    app.run(host=host, port=port, debug=debug_mode, use_reloader=False)

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+Flask==3.1.0
+Flask-Cors==5.0.0
+Werkzeug==3.1.3
+pdfplumber==0.11.5
+python-docx==1.1.2

domain_rules/belongings_check.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def belongings_conflict(text1, text2):
+    t1 = text1.lower()
+    t2 = text2.lower()
+    if ("included" in t1 and "excluded" in t2) or \
+       ("excluded" in t1 and "included" in t2):
+        return True
+    return False

domain_rules/belongings_keywords.py ADDED Viewed

	@@ -0,0 +1,5 @@

+BELONGINGS_KEYWORDS = [
+    "fixture", "fitting", "belonging", "movable",
+    "immovable", "furniture", "appliance",
+    "electrical", "plumbing", "included", "excluded"
+]

domain_rules/legal_rules.py ADDED Viewed

File without changes

embeddings/sbert_encoder.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from sentence_transformers import SentenceTransformer
+_model = None
+def get_model():
+    global _model
+    if _model is None:
+        model_name = "all-MiniLM-L6-v2"
+        try:
+            print(f"Loading {model_name}...")
+            _model = SentenceTransformer(model_name)
+        except Exception as e:
+            print(f"Failed to load {model_name} online: {e}")
+            print("Attempting to load from local cache...")
+            try:
+                _model = SentenceTransformer(model_name, local_files_only=True)
+            except Exception as e2:
+                raise RuntimeError(f"Could not load model {model_name} (Online or Offline). Check connection.") from e2
+    return _model
+def generate_embeddings(clauses):
+    model = get_model()
+    texts = [c["text"] for c in clauses]
+    return model.encode(texts, convert_to_numpy=True)

frontend/README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Frontend (Multi-Page Flow)
+This frontend now uses a strict page flow:
+1. `index.html` -> Login/Signup
+2. `upload.html` -> Upload document and run analysis
+3. `issues.html` -> Line-level issue page (duplication, inconsistency, contradiction)
+4. `summary.html` -> Final full-document summary
+## Run
+Serve this folder using any static server from `frontend/`:
+```bash
+python -m http.server 8080
+```
+Open:
+- `http://127.0.0.1:8080/index.html`
+## Backend dependency
+Frontend expects Flask backend endpoints:
+- `POST /api/register`
+- `POST /api/login`
+- `POST /api/analyze`
+Fallback aliases are also supported in client code (`/register`, `/login`, `/analyze`) across ports `5000` and `5001`.
+## Notes
+- Login state and analysis payload are stored in `sessionStorage`.
+- If user session is missing, `upload.html`, `issues.html`, and `summary.html` redirect to `index.html`.
+- If analysis payload is missing, `issues.html` and `summary.html` redirect to `upload.html`.

frontend/app.js ADDED Viewed

	@@ -0,0 +1,509 @@

+const currentHost = window.location.hostname || "127.0.0.1";
+const API_BASES = [
+  `http://${currentHost}:5000/api`,
+  `http://${currentHost}:5001/api`,
+  "http://127.0.0.1:5000/api",
+  "http://localhost:5000/api",
+  "http://127.0.0.1:5001/api",
+  "http://localhost:5001/api",
+];
+const ANALYZE_URLS = [
+  `http://${currentHost}:5000/api/analyze`,
+  `http://${currentHost}:5000/analyze`,
+  `http://${currentHost}:5001/api/analyze`,
+  `http://${currentHost}:5001/analyze`,
+  "http://127.0.0.1:5000/api/analyze",
+  "http://127.0.0.1:5000/analyze",
+  "http://localhost:5000/api/analyze",
+  "http://localhost:5000/analyze",
+  "http://127.0.0.1:5001/api/analyze",
+  "http://127.0.0.1:5001/analyze",
+  "http://localhost:5001/api/analyze",
+  "http://localhost:5001/analyze",
+];
+const page = (window.location.pathname.split("/").pop() || "index.html").toLowerCase();
+function escapeHtml(value) {
+  return String(value)
+    .replaceAll("&", "&amp;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;")
+    .replaceAll('"', "&quot;")
+    .replaceAll("'", "&#039;");
+}
+function setText(el, text, type = null) {
+  if (!el) return;
+  el.textContent = text;
+  el.classList.remove("success", "error");
+  if (type) el.classList.add(type);
+}
+function getUser() {
+  const userRaw = sessionStorage.getItem("lsi_user");
+  if (!userRaw) return null;
+  try {
+    return JSON.parse(userRaw);
+  } catch {
+    return null;
+  }
+}
+function setUser(user) {
+  sessionStorage.setItem("lsi_user", JSON.stringify(user));
+}
+function clearSession() {
+  sessionStorage.removeItem("lsi_user");
+  sessionStorage.removeItem("lsi_analysis_payload");
+}
+function getAnalysisPayload() {
+  const raw = sessionStorage.getItem("lsi_analysis_payload");
+  if (!raw) return null;
+  try {
+    return JSON.parse(raw);
+  } catch {
+    return null;
+  }
+}
+function setAnalysisPayload(payload) {
+  sessionStorage.setItem("lsi_analysis_payload", JSON.stringify(payload));
+}
+function ensureAuth() {
+  const user = getUser();
+  if (!user) {
+    window.location.href = "index.html#home";
+    return null;
+  }
+  const badge = document.getElementById("userBadge");
+  if (badge) {
+    badge.textContent = `${user.fullName || user.email || "User"}`;
+  }
+  const logoutBtn = document.getElementById("logoutBtn");
+  if (logoutBtn) {
+    logoutBtn.addEventListener("click", () => {
+      clearSession();
+      window.location.href = "index.html#home";
+    });
+  }
+  return user;
+}
+async function postAuth(endpoint, payload) {
+  let response = null;
+  let data = null;
+  let lastNetworkError = null;
+  for (const base of API_BASES) {
+    try {
+      response = await fetch(`${base}${endpoint}`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(payload),
+      });
+      data = await response.json().catch(() => null);
+      lastNetworkError = null;
+      break;
+    } catch (error) {
+      lastNetworkError = error;
+    }
+  }
+  if (lastNetworkError) {
+    throw new Error(`Cannot reach backend at ${API_BASES.join(", ")}.`);
+  }
+  return { response, data };
+}
+async function runDocumentAnalysis(formData) {
+  let response = null;
+  let data = null;
+  let lastNetworkError = null;
+  let status = null;
+  for (const url of ANALYZE_URLS) {
+    try {
+      response = await fetch(url, { method: "POST", body: formData });
+      data = await response.json().catch(() => null);
+      status = response.status;
+      lastNetworkError = null;
+      if (response.status !== 404) break;
+    } catch (error) {
+      lastNetworkError = error;
+    }
+  }
+  if (lastNetworkError) {
+    throw new Error("Cannot connect to backend for analysis.");
+  }
+  if (!response.ok) {
+    throw new Error(data?.error || `Analysis request failed with HTTP ${status || response.status}.`);
+  }
+  return data;
+}
+function buildIssueRows(lineIssues, category) {
+  const rows = lineIssues
+    .filter((item) => item.category === category)
+    .slice(0, 80)
+    .map(
+      (item) => `
+      <tr>
+        <td>${escapeHtml(item.location || `Pg ${item.page}, Ln ${item.line}`)}</td>
+        <td>${escapeHtml(item.issueType || "-")}</td>
+        <td>${escapeHtml(item.confidence ?? "-")}</td>
+      </tr>
+    `
+    )
+    .join("");
+  if (!rows) {
+    return `<p class="result-muted">No ${category} lines detected.</p>`;
+  }
+  return `
+    <div class="table-wrap">
+      <table class="result-table">
+        <thead>
+          <tr>
+            <th>Page/Line</th>
+            <th>Issue Type</th>
+            <th>Confidence</th>
+          </tr>
+        </thead>
+        <tbody>${rows}</tbody>
+      </table>
+    </div>
+  `;
+}
+function initIndexPage() {
+  const loginTab = document.getElementById("loginTab");
+  const signupTab = document.getElementById("signupTab");
+  const authForm = document.getElementById("authForm");
+  const nameField = document.getElementById("nameField");
+  const fullNameInput = document.getElementById("fullName");
+  const emailInput = document.getElementById("email");
+  const passwordInput = document.getElementById("password");
+  const submitBtn = document.getElementById("submitBtn");
+  const formSubtitle = document.getElementById("formSubtitle");
+  const message = document.getElementById("message");
+  let mode = "login";
+  function setMode(nextMode) {
+    mode = nextMode;
+    const isSignup = mode === "signup";
+    signupTab.classList.toggle("active", isSignup);
+    loginTab.classList.toggle("active", !isSignup);
+    nameField.classList.toggle("hidden", !isSignup);
+    submitBtn.textContent = isSignup ? "Create Account" : "Login";
+    formSubtitle.textContent = isSignup
+      ? "Create your account to start securely."
+      : "Enter your credentials to access your account.";
+    fullNameInput.required = isSignup;
+    setText(message, "", null);
+  }
+  async function handleAuthSubmit(event) {
+    event.preventDefault();
+    setText(message, "", null);
+    const email = emailInput.value.trim();
+    const password = passwordInput.value;
+    const fullName = fullNameInput.value.trim();
+    if (!email || !password || (mode === "signup" && !fullName)) {
+      setText(message, "Please fill all required fields.", "error");
+      return;
+    }
+    submitBtn.disabled = true;
+    try {
+      const endpoint = mode === "signup" ? "/register" : "/login";
+      const payload = mode === "signup" ? { fullName, email, password } : { email, password };
+      const { response, data } = await postAuth(endpoint, payload);
+      if (!response.ok) {
+        throw new Error(data?.error || `Request failed with HTTP ${response.status}.`);
+      }
+      if (mode === "signup") {
+        setText(message, "Account created. Please login now.", "success");
+        authForm.reset();
+        setMode("login");
+        return;
+      }
+      const user = data?.user || { fullName: fullName || email, email };
+      setUser(user);
+      window.location.href = "upload.html";
+    } catch (error) {
+      setText(message, error.message || "Something went wrong.", "error");
+    } finally {
+      submitBtn.disabled = false;
+    }
+  }
+  loginTab.addEventListener("click", () => setMode("login"));
+  signupTab.addEventListener("click", () => setMode("signup"));
+  authForm.addEventListener("submit", handleAuthSubmit);
+  setMode("login");
+  if (getUser()) {
+    window.location.href = "upload.html";
+  }
+}
+function initUploadPage() {
+  if (!ensureAuth()) return;
+  const uploadForm = document.getElementById("uploadForm");
+  const legalFile = document.getElementById("legalFile");
+  const scanMode = document.getElementById("scanMode");
+  const uploadMessage = document.getElementById("uploadMessage");
+  const loadingState = document.getElementById("loadingState");
+  const analysisInputSummary = document.getElementById("analysisInputSummary");
+  legalFile.addEventListener("change", () => {
+    if (!legalFile.files || !legalFile.files[0]) return;
+    const selectedFile = legalFile.files[0];
+    analysisInputSummary.classList.remove("hidden");
+    analysisInputSummary.innerHTML = `
+      <p><strong>File:</strong> ${escapeHtml(selectedFile.name)}</p>
+      <p><strong>Type:</strong> ${escapeHtml(selectedFile.type || "unknown")}</p>
+      <p><strong>Size:</strong> ${escapeHtml((selectedFile.size / 1024).toFixed(2))} KB</p>
+      <p><strong>Scan Mode:</strong> ${escapeHtml(scanMode.value)}</p>
+    `;
+    setText(uploadMessage, `Selected: ${selectedFile.name}`, "success");
+  });
+  uploadForm.addEventListener("submit", async (event) => {
+    event.preventDefault();
+    setText(uploadMessage, "", null);
+    if (!legalFile.files || legalFile.files.length === 0) {
+      setText(uploadMessage, "Please choose a file to continue.", "error");
+      return;
+    }
+    const selectedFile = legalFile.files[0];
+    const selectedScanMode = scanMode.value;
+    const formData = new FormData();
+    formData.append("file", selectedFile);
+    formData.append("scanMode", selectedScanMode);
+    uploadForm.classList.add("hidden");
+    loadingState.classList.remove("hidden");
+    try {
+      const payload = await runDocumentAnalysis(formData);
+      payload._meta = {
+        fileName: selectedFile.name,
+        fileType: selectedFile.type || "unknown",
+        fileSizeKb: Number((selectedFile.size / 1024).toFixed(2)),
+      };
+      setAnalysisPayload(payload);
+      window.location.href = "issues.html";
+    } catch (error) {
+      loadingState.classList.add("hidden");
+      uploadForm.classList.remove("hidden");
+      setText(uploadMessage, error.message || "Analysis failed.", "error");
+    }
+  });
+}
+function initIssuesPage() {
+  if (!ensureAuth()) return;
+  const payload = getAnalysisPayload();
+  if (!payload) {
+    window.location.href = "upload.html";
+    return;
+  }
+  const summary = payload.summary || {};
+  const lineIssues = Array.isArray(payload.lineIssues) ? payload.lineIssues : [];
+  const issueStats = document.getElementById("issueStats");
+  issueStats.innerHTML = `
+    <article class="stat-card stat-dup">
+      <h3>Duplication</h3>
+      <p>${escapeHtml(summary.duplicationCount ?? 0)}</p>
+    </article>
+    <article class="stat-card stat-inc">
+      <h3>Inconsistency</h3>
+      <p>${escapeHtml(summary.inconsistencyCount ?? 0)}</p>
+    </article>
+    <article class="stat-card stat-con">
+      <h3>Contradiction</h3>
+      <p>${escapeHtml(summary.contradictionCount ?? 0)}</p>
+    </article>
+  `;
+  const lineIssueTables = document.getElementById("lineIssueTables");
+  lineIssueTables.innerHTML = `
+    <section class="result-card">
+      <h4>Duplication Lines</h4>
+      ${buildIssueRows(lineIssues, "duplication")}
+    </section>
+    <section class="result-card">
+      <h4>Inconsistency Lines</h4>
+      ${buildIssueRows(lineIssues, "inconsistency")}
+    </section>
+    <section class="result-card">
+      <h4>Contradiction Lines</h4>
+      ${buildIssueRows(lineIssues, "contradiction")}
+    </section>
+  `;
+}
+function initSummaryPage() {
+  if (!ensureAuth()) return;
+  const payload = getAnalysisPayload();
+  if (!payload) {
+    window.location.href = "upload.html";
+    return;
+  }
+  const summary = payload.summary || {};
+  const findings = Array.isArray(payload.findings) ? payload.findings : [];
+  const pageSummaries = Array.isArray(payload.pageSummaries) ? payload.pageSummaries : [];
+  const lineIssues = Array.isArray(payload.lineIssues) ? payload.lineIssues : [];
+  const detailedSummary = String(payload.detailedSummary || "").trim();
+  const meta = payload._meta || {};
+  const summaryDetails = document.getElementById("summaryDetails");
+  summaryDetails.innerHTML = `
+    <article class="summary-item"><span>File</span><strong>${escapeHtml(meta.fileName || "-")}</strong></article>
+    <article class="summary-item"><span>Scan Mode</span><strong>${escapeHtml(summary.scanMode || "-")}</strong></article>
+    <article class="summary-item"><span>Threshold</span><strong>${escapeHtml(summary.threshold ?? "-")}</strong></article>
+    <article class="summary-item"><span>Vendor</span><strong>${escapeHtml(summary.vendor || "Not found")}</strong></article>
+    <article class="summary-item"><span>Vendee</span><strong>${escapeHtml(summary.vendee || "Not found")}</strong></article>
+    <article class="summary-item"><span>Clauses</span><strong>${escapeHtml(summary.clauses ?? 0)}</strong></article>
+    <article class="summary-item"><span>Pairs Compared</span><strong>${escapeHtml(summary.pairsCompared ?? 0)}</strong></article>
+    <article class="summary-item"><span>Total Issues</span><strong>${escapeHtml(summary.issuesFound ?? 0)}</strong></article>
+  `;
+  const findingsBoard = document.getElementById("findingsBoard");
+  const pageSummaryBoard = document.getElementById("pageSummaryBoard");
+  const detailedSummaryText = document.getElementById("detailedSummaryText");
+  const lineErrorDashboard = document.getElementById("lineErrorDashboard");
+  if (detailedSummaryText) {
+    detailedSummaryText.textContent = detailedSummary || "Detailed summary is not available for this document.";
+  }
+  if (pageSummaryBoard) {
+    if (pageSummaries.length === 0) {
+      pageSummaryBoard.innerHTML =
+        `<article class="result-card"><p class="result-muted">No page-wise summary available for this document.</p></article>`;
+    } else {
+      pageSummaryBoard.innerHTML = pageSummaries
+        .map((item) => {
+          const keyLines = Array.isArray(item.keyLines) ? item.keyLines : [];
+          const keyLineHtml = keyLines.length
+            ? keyLines.map((k) => `<li>${escapeHtml(k)}</li>`).join("")
+            : "<li>No flagged lines on this page.</li>";
+          return `
+          <article class="result-card">
+            <h4>Page ${escapeHtml(item.page)}</h4>
+            <p><strong>Clauses:</strong> ${escapeHtml(item.clauseCount ?? 0)}</p>
+            <p><strong>Issues:</strong> ${escapeHtml(item.issueCount ?? 0)} (Duplication: ${escapeHtml(item.duplicationCount ?? 0)}, Inconsistency: ${escapeHtml(item.inconsistencyCount ?? 0)}, Contradiction: ${escapeHtml(item.contradictionCount ?? 0)})</p>
+            <p><strong>Page Snippet:</strong> ${escapeHtml(item.pageSnippet || "-")}</p>
+            <p><strong>Summary:</strong> ${escapeHtml(item.summaryText || "-")}</p>
+            <p><strong>Key Lines:</strong></p>
+            <ul>${keyLineHtml}</ul>
+          </article>
+        `;
+        })
+        .join("");
+    }
+  }
+  if (findings.length === 0) {
+    findingsBoard.innerHTML = `<article class="result-card"><p class="result-muted">No major findings detected for this document.</p></article>`;
+    return;
+  }
+  const topFindings = findings.slice(0, 20);
+  findingsBoard.innerHTML = topFindings
+    .map(
+      (item) => `
+      <article class="result-card">
+        <h4>${escapeHtml(item.category || "issue")} - ${escapeHtml(item.issueType || "-")}</h4>
+        <p><strong>Confidence:</strong> ${escapeHtml(item.confidence ?? "-")}</p>
+        <p><strong>Location A:</strong> ${escapeHtml(item.location1 || "-")}</p>
+        <p><strong>Location B:</strong> ${escapeHtml(item.location2 || "-")}</p>
+        <p><strong>Reason:</strong> ${escapeHtml(item.reason || "-")}</p>
+      </article>
+    `
+    )
+    .join("");
+  if (lineErrorDashboard) {
+    if (lineIssues.length === 0) {
+      lineErrorDashboard.innerHTML = `<p class="result-muted">No line-level errors detected.</p>`;
+      return;
+    }
+    const rows = lineIssues
+      .slice(0, 200)
+      .map(
+        (item) => `
+        <tr>
+          <td>${escapeHtml(item.location || `Pg ${item.page}, Ln ${item.line}`)}</td>
+          <td>${escapeHtml(item.category || "-")}</td>
+          <td>${escapeHtml(item.issueType || "-")}</td>
+          <td>${escapeHtml(item.confidence ?? "-")}</td>
+          <td>${escapeHtml(item.reason || "-")}</td>
+        </tr>
+      `
+      )
+      .join("");
+    lineErrorDashboard.innerHTML = `
+      <div class="table-wrap">
+        <table class="result-table">
+          <thead>
+            <tr>
+              <th>Page/Line</th>
+              <th>Category</th>
+              <th>Issue Type</th>
+              <th>Confidence</th>
+              <th>Reason</th>
+            </tr>
+          </thead>
+          <tbody>${rows}</tbody>
+        </table>
+      </div>
+    `;
+  }
+}
+if (page === "index.html" || page === "") {
+  initIndexPage();
+} else if (page === "upload.html") {
+  initUploadPage();
+} else if (page === "issues.html") {
+  initIssuesPage();
+} else if (page === "summary.html") {
+  initSummaryPage();
+} else if (page === "workflow.html") {
+  window.location.href = "upload.html";
+}

frontend/assets/legal-tech-bg.svg ADDED Viewed

frontend/index.html ADDED Viewed

	@@ -0,0 +1,226 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Semantix • Legal Semantic Intelligence</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Space+Grotesk:wght@500;600;700&display=swap" rel="stylesheet" />
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css" />
+    <style>
+      :root {
+        --navy: #0f172a;
+      }
+      .tail-container {
+        font-family: "Inter", system-ui, sans-serif;
+      }
+      .heading-font {
+        font-family: "Space Grotesk", sans-serif;
+      }
+      .hero-bg {
+        background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
+      }
+      .card {
+        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+      }
+      .card:hover {
+        transform: translateY(-4px);
+        box-shadow: 0 20px 25px -5px rgb(15 23 42 / 0.1), 0 8px 10px -6px rgb(15 23 42 / 0.1);
+      }
+      .document-3d {
+        perspective: 1200px;
+        transition: transform 0.6s cubic-bezier(0.23, 1, 0.32, 1);
+      }
+      .document-3d:hover {
+        transform: rotateX(12deg) rotateY(12deg) scale(1.03);
+      }
+      .document-inner {
+        box-shadow: 0 25px 50px -12px rgb(0 0 0 / 0.4), 0 0 80px -20px rgb(129 140 248 / 0.6), inset 0 4px 12px rgba(255, 255, 255, 0.3);
+      }
+      .scan-line {
+        position: absolute;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 4px;
+        background: linear-gradient(90deg, transparent, #a5b4fc, transparent);
+        animation: scan 4s linear infinite;
+        opacity: 0.6;
+      }
+      .switcher button.active {
+        background: #ffffff;
+        color: #111827;
+        box-shadow: 0 4px 10px rgba(15, 23, 42, 0.12);
+      }
+      @keyframes scan {
+        0% {
+          transform: translateY(-100%);
+        }
+        100% {
+          transform: translateY(380px);
+        }
+      }
+      @media (max-width: 768px) {
+        .nav-mobile-hide {
+          display: none;
+        }
+      }
+    </style>
+  </head>
+  <body class="tail-container bg-zinc-50 text-slate-900">
+    <header class="bg-white border-b border-slate-200 sticky top-0 z-50">
+      <div class="max-w-7xl mx-auto px-6 md:px-8 py-5 flex items-center justify-between gap-4">
+        <div class="flex items-center gap-x-3">
+          <div class="w-9 h-9 bg-gradient-to-br from-indigo-600 to-violet-600 rounded-2xl flex items-center justify-center text-white font-bold text-2xl leading-none pt-0.5">S</div>
+          <a href="#home" class="heading-font text-3xl font-semibold tracking-tighter text-slate-900">Semantix</a>
+        </div>
+        <nav class="nav-mobile-hide md:flex items-center gap-x-10 text-sm font-medium">
+          <a href="#home" class="hover:text-indigo-600 transition-colors">Home</a>
+          <a href="#about" class="hover:text-indigo-600 transition-colors">About</a>
+          <a href="#service" class="hover:text-indigo-600 transition-colors">Service</a>
+          <a href="#contact" class="hover:text-indigo-600 transition-colors">Contact</a>
+        </nav>
+        <a href="#authView" class="px-5 py-2.5 text-sm font-semibold bg-indigo-600 hover:bg-indigo-700 text-white rounded-2xl transition-colors">Get Started</a>
+      </div>
+    </header>
+    <main>
+      <section id="home" class="hero-bg min-h-screen flex items-center relative overflow-hidden">
+        <div class="absolute inset-0 bg-[radial-gradient(at_50%_30%,rgba(129,140,248,0.15),transparent)]"></div>
+        <div class="max-w-7xl mx-auto px-6 md:px-8 grid md:grid-cols-12 gap-14 items-center relative z-10 py-16">
+          <div class="md:col-span-7">
+            <div class="inline-flex items-center gap-x-2 bg-white/10 backdrop-blur-md border border-white/20 text-white text-xs font-medium px-4 py-2 rounded-3xl mb-6">
+              <span class="relative flex h-3 w-3">
+                <span class="animate-ping absolute inline-flex h-full w-full rounded-full bg-emerald-400 opacity-75"></span>
+                <span class="relative inline-flex rounded-full h-3 w-3 bg-emerald-500"></span>
+              </span>
+              AI LEGAL INTELLIGENCE
+            </div>
+            <h1 class="heading-font text-5xl md:text-7xl leading-none font-semibold tracking-tighter text-white max-w-2xl">
+              Legal Documents,<br />Deeply Understood
+            </h1>
+            <p class="mt-8 text-lg md:text-xl text-slate-300 max-w-xl">
+              Advanced semantic analysis that uncovers hidden risks and delivers crystal-clear clarity in every contract.
+            </p>
+            <div class="mt-12 flex justify-center md:justify-start">
+              <div class="document-3d relative inline-block">
+                <div class="document-inner w-[300px] md:w-[320px] h-[360px] md:h-[380px] bg-white rounded-3xl overflow-hidden border border-white/40 relative">
+                  <div class="h-12 bg-gradient-to-r from-indigo-600 to-violet-600 flex items-center px-6 text-white text-sm font-medium">
+                    CONTRACT • PAGE 1
+                  </div>
+                  <div class="p-6 space-y-3 text-[10px] leading-tight text-slate-700 font-mono">
+                    <div class="h-2.5 bg-slate-200 rounded w-3/4"></div>
+                    <div class="h-2.5 bg-slate-200 rounded w-11/12"></div>
+                    <div class="h-2.5 bg-slate-200 rounded w-5/6"></div>
+                    <div class="h-2.5 bg-slate-200 rounded w-full"></div>
+                    <div class="h-2.5 bg-slate-200 rounded w-3/4"></div>
+                    <div class="h-2.5 bg-slate-200 rounded w-10/12"></div>
+                  </div>
+                  <div class="absolute inset-0 bg-gradient-to-br from-indigo-400/10 to-violet-400/10 flex items-center justify-center">
+                    <i class="fa-solid fa-wand-magic-sparkles text-white text-[120px] opacity-30"></i>
+                  </div>
+                  <div class="scan-line"></div>
+                </div>
+              </div>
+            </div>
+          </div>
+          <div class="md:col-span-5">
+            <section id="authView" class="bg-white rounded-3xl shadow-2xl p-8 md:p-10 card">
+              <div class="form-header mb-8">
+                <div class="switcher grid grid-cols-2 bg-slate-100 p-1 rounded-2xl mb-3" role="tablist" aria-label="Auth mode">
+                  <button id="loginTab" class="active px-7 py-3 text-sm font-semibold rounded-[14px]" type="button">Login</button>
+                  <button id="signupTab" class="px-7 py-3 text-sm font-semibold rounded-[14px]" type="button">Sign Up</button>
+                </div>
+                <p id="formSubtitle" class="text-slate-500 text-sm">Enter your credentials to access your account.</p>
+              </div>
+              <form id="authForm" class="space-y-5" novalidate>
+                <div id="nameField" class="hidden">
+                  <label class="text-xs uppercase tracking-widest text-slate-500 block mb-1" for="fullName">Full Name</label>
+                  <input id="fullName" name="fullName" type="text" placeholder="Jayasree" class="w-full bg-zinc-50 border border-slate-200 focus:border-indigo-500 rounded-2xl px-5 py-4 outline-none" />
+                </div>
+                <div>
+                  <label class="text-xs uppercase tracking-widest text-slate-500 block mb-1" for="email">Email</label>
+                  <input id="email" name="email" type="email" placeholder="you@lawfirm.in" autocomplete="email" required class="w-full bg-zinc-50 border border-slate-200 focus:border-indigo-500 rounded-2xl px-5 py-4 outline-none" />
+                </div>
+                <div>
+                  <label class="text-xs uppercase tracking-widest text-slate-500 block mb-1" for="password">Password</label>
+                  <input id="password" name="password" type="password" placeholder="Minimum 6 characters" autocomplete="current-password" required class="w-full bg-zinc-50 border border-slate-200 focus:border-indigo-500 rounded-2xl px-5 py-4 outline-none" />
+                </div>
+                <button id="submitBtn" type="submit" class="w-full bg-indigo-600 hover:bg-indigo-700 transition-colors text-white font-semibold py-4 rounded-3xl">Login</button>
+              </form>
+              <p id="message" class="text-center text-sm mt-6 text-slate-500"></p>
+            </section>
+          </div>
+        </div>
+      </section>
+      <section id="about" class="py-20 bg-white">
+        <div class="max-w-4xl mx-auto px-8 text-center">
+          <h2 class="heading-font text-4xl md:text-5xl font-semibold tracking-tighter mb-6">Reliable. Precise. Intelligent.</h2>
+          <p class="text-lg text-slate-600 max-w-2xl mx-auto">
+            Semantix delivers clear, accurate semantic analysis of legal documents, helping you catch issues instantly and work with confidence.
+          </p>
+        </div>
+      </section>
+      <section id="service" class="py-24 bg-slate-50">
+        <div class="max-w-7xl mx-auto px-8">
+          <h2 class="heading-font text-center text-4xl md:text-5xl font-semibold tracking-tighter mb-16">Built for serious legal work</h2>
+          <div class="grid md:grid-cols-3 gap-8">
+            <div class="bg-white p-10 rounded-3xl card text-center">
+              <div class="text-5xl mb-6">🔐</div>
+              <h3 class="font-semibold text-xl">Enterprise Security</h3>
+              <p class="text-slate-500 mt-3">Your documents stay private and protected.</p>
+            </div>
+            <div class="bg-white p-10 rounded-3xl card text-center">
+              <div class="text-5xl mb-6">🧠</div>
+              <h3 class="font-semibold text-xl">Smart Analysis</h3>
+              <p class="text-slate-500 mt-3">Understands legal language like a senior counsel.</p>
+            </div>
+            <div class="bg-white p-10 rounded-3xl card text-center">
+              <div class="text-5xl mb-6">📈</div>
+              <h3 class="font-semibold text-xl">Instant Insights</h3>
+              <p class="text-slate-500 mt-3">Visual dashboard with line-level clarity.</p>
+            </div>
+          </div>
+        </div>
+      </section>
+      <section id="contact" class="py-20 bg-slate-900 text-white text-center">
+        <div class="max-w-7xl mx-auto px-8">
+          <p class="text-sm uppercase tracking-widest text-slate-400">Made for legal professionals</p>
+          <h2 class="heading-font text-4xl mt-4">Ready for flawless contracts?</h2>
+          <a href="mailto:hello@semantix.ai" class="inline-block mt-10 px-10 py-4 bg-white text-slate-900 font-semibold rounded-3xl hover:bg-indigo-50 transition-colors">Contact Us</a>
+          <p class="mt-20 text-xs text-slate-500">© 2026 Semantix • Legal Semantic Intelligence</p>
+        </div>
+      </section>
+    </main>
+    <script src="app.js"></script>
+  </body>
+</html>

frontend/issues.html ADDED Viewed

	@@ -0,0 +1,48 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Issue Analysis | LegalSI</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700;800&family=Space+Grotesk:wght@500;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="styles.css" />
+  </head>
+  <body>
+    <header class="topbar">
+      <div class="container topbar-inner">
+        <a class="brand" href="index.html#home">LegalSI</a>
+        <div class="page-links">
+          <a class="page-link" href="upload.html">Upload</a>
+          <a class="page-link active" href="issues.html">Issue Analysis</a>
+          <a class="page-link" href="summary.html">Final Summary</a>
+          <button id="logoutBtn" class="logout-btn" type="button">Logout</button>
+        </div>
+      </div>
+    </header>
+    <main class="flow-main">
+      <section class="container flow-card">
+        <div class="upload-header">
+          <h1>Line-Level Issue Analysis</h1>
+          <span id="userBadge" class="user-badge"></span>
+        </div>
+        <p class="upload-subtitle">Inconsistencies, contradictions, and duplications with page and line references.</p>
+        <div id="issueStats" class="stats-grid"></div>
+        <div id="lineIssueTables"></div>
+        <div class="workflow-actions">
+          <a class="secondary-btn as-link" href="upload.html">Back to Upload</a>
+          <a class="submit-btn as-link submit-link" href="summary.html">Next: Final Summary</a>
+        </div>
+      </section>
+    </main>
+    <script src="app.js"></script>
+  </body>
+</html>

frontend/styles.css ADDED Viewed

	@@ -0,0 +1,957 @@

+:root {
+  --bg: #f3f5f8;
+  --surface: #ffffff;
+  --surface-soft: #f8fafc;
+  --ink: #0e2238;
+  --muted: #5b6f85;
+  --border: #d3dee9;
+  --navy: #12385f;
+  --navy-2: #1f4d79;
+  --gold: #b78a28;
+  --primary: #1f5fa6;
+  --primary-2: #2e79c8;
+  --teal: #1f8a75;
+  --danger: #b93f4f;
+  --ok: #166a47;
+}
+* {
+  box-sizing: border-box;
+}
+html {
+  scroll-behavior: smooth;
+}
+body {
+  margin: 0;
+  font-family: "Manrope", sans-serif;
+  color: var(--ink);
+  background:
+    radial-gradient(1000px 450px at -10% -8%, #dfe9f5 0%, rgba(223, 233, 245, 0) 60%),
+    radial-gradient(900px 420px at 110% -10%, #ece5d5 0%, rgba(236, 229, 213, 0) 58%),
+    linear-gradient(180deg, #eff3f7 0%, #f8fafd 42%, #ffffff 100%);
+  line-height: 1.45;
+}
+.container {
+  width: min(1180px, 92%);
+  margin: 0 auto;
+}
+.topbar {
+  position: sticky;
+  top: 0;
+  z-index: 20;
+  background: rgba(249, 251, 253, 0.94);
+  backdrop-filter: blur(6px);
+  border-bottom: 1px solid #cfd9e4;
+  box-shadow: 0 4px 18px rgba(14, 34, 56, 0.06);
+}
+.topbar-inner {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  min-height: 68px;
+}
+.brand {
+  font-family: "Space Grotesk", sans-serif;
+  font-size: 24px;
+  font-weight: 700;
+  color: var(--navy);
+  text-decoration: none;
+}
+.nav-links {
+  display: flex;
+  gap: 20px;
+}
+.nav-links a {
+  color: #264868;
+  text-decoration: none;
+  font-weight: 700;
+  font-size: 14px;
+  padding: 6px 8px;
+  border-radius: 8px;
+}
+.nav-links a:hover {
+  background: #e9f0f8;
+  color: var(--navy);
+}
+.hero {
+  position: relative;
+  padding: 48px 0 42px;
+  overflow: hidden;
+}
+.hero-bg {
+  position: absolute;
+  inset: 0;
+  background:
+    linear-gradient(120deg, rgba(18, 56, 95, 0.1), rgba(183, 138, 40, 0.09)),
+    url("assets/legal-tech-bg.svg") right center / cover no-repeat;
+  opacity: 0.95;
+  pointer-events: none;
+}
+.hero-grid {
+  position: relative;
+  display: grid;
+  grid-template-columns: 1.1fr 0.95fr;
+  gap: 24px;
+  align-items: start;
+}
+.hero-copy {
+  background: rgba(255, 255, 255, 0.82);
+  border: 1px solid var(--border);
+  border-radius: 20px;
+  padding: 24px;
+  box-shadow: 0 14px 34px rgba(15, 38, 66, 0.11);
+  animation: fadeInUp 0.45s ease-out;
+}
+.eyebrow {
+  margin: 0 0 10px;
+  font-size: 13px;
+  letter-spacing: 0.05em;
+  text-transform: uppercase;
+  color: var(--navy-2);
+  font-weight: 800;
+}
+.hero-copy h1 {
+  margin: 0;
+  font-size: clamp(30px, 4.6vw, 50px);
+  line-height: 1.08;
+  font-family: "Space Grotesk", sans-serif;
+}
+.hero-text {
+  margin: 14px 0 18px;
+  color: var(--muted);
+  line-height: 1.6;
+  max-width: 66ch;
+}
+.hero-cta-row {
+  display: flex;
+  gap: 10px;
+  flex-wrap: wrap;
+  margin: 8px 0 14px;
+}
+.hero-cta-primary,
+.hero-cta-secondary {
+  text-decoration: none;
+  border-radius: 11px;
+  font-size: 14px;
+  font-weight: 800;
+  padding: 10px 14px;
+}
+.hero-cta-primary {
+  color: #ffffff;
+  background: linear-gradient(92deg, var(--navy), var(--primary-2) 58%, var(--teal));
+  box-shadow: 0 10px 18px rgba(17, 62, 110, 0.22);
+}
+.hero-cta-secondary {
+  color: #1c446b;
+  background: #ecf4ff;
+  border: 1px solid #bfd6f2;
+}
+.trust-strip {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  margin: 0 0 14px;
+}
+.trust-strip span {
+  border: 1px solid #d0dded;
+  border-radius: 999px;
+  padding: 5px 10px;
+  font-size: 12px;
+  font-weight: 700;
+  color: #315579;
+  background: #f5f9ff;
+}
+.hero-metrics {
+  display: grid;
+  grid-template-columns: repeat(3, 1fr);
+  gap: 10px;
+}
+.hero-metrics > div {
+  border: 1px solid #d5e2f0;
+  background: #ffffff;
+  border-radius: 12px;
+  padding: 12px;
+  transition: transform 0.18s ease, box-shadow 0.18s ease;
+}
+.hero-metrics > div:hover {
+  transform: translateY(-2px);
+  box-shadow: 0 10px 18px rgba(16, 43, 74, 0.09);
+}
+.hero-metrics h3 {
+  margin: 0;
+  font-size: 14px;
+}
+.hero-metrics p {
+  margin: 6px 0 0;
+  color: var(--muted);
+  font-size: 12px;
+}
+.preview-card {
+  margin-top: 12px;
+  border: 1px solid #ccdaea;
+  border-radius: 14px;
+  padding: 12px;
+  background: linear-gradient(160deg, #f7fbff 0%, #edf5ff 100%);
+}
+.preview-card h3 {
+  margin: 0 0 10px;
+  font-size: 14px;
+  color: #163a60;
+}
+.preview-grid {
+  display: grid;
+  grid-template-columns: repeat(4, 1fr);
+  gap: 8px;
+}
+.preview-grid div {
+  border: 1px solid #c8d9ec;
+  border-radius: 10px;
+  background: #ffffff;
+  padding: 8px;
+  display: grid;
+  gap: 3px;
+}
+.preview-grid span {
+  font-size: 11px;
+  color: #5a7090;
+}
+.preview-grid strong {
+  font-size: 19px;
+  color: #15395e;
+}
+.panel {
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: 18px;
+  box-shadow: 0 14px 30px rgba(12, 31, 53, 0.12);
+  animation: fadeInUp 0.5s ease-out;
+}
+.auth-panel {
+  padding: 22px;
+}
+.form-header {
+  margin-bottom: 18px;
+}
+.switcher {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  background: #e9eff7;
+  border-radius: 12px;
+  padding: 4px;
+  margin-bottom: 12px;
+}
+.switcher button {
+  border: 0;
+  background: transparent;
+  border-radius: 9px;
+  padding: 10px;
+  font-weight: 800;
+  cursor: pointer;
+  color: #315579;
+  transition: background 0.2s ease, color 0.2s ease, transform 0.12s ease;
+}
+.switcher button.active {
+  color: #112a48;
+  background: #ffffff;
+  box-shadow: 0 6px 14px rgba(8, 26, 49, 0.08);
+}
+.switcher button:active {
+  transform: scale(0.98);
+}
+#formSubtitle {
+  margin: 0;
+  color: var(--muted);
+  font-size: 14px;
+}
+.auth-form {
+  display: grid;
+  gap: 14px;
+}
+.field {
+  display: grid;
+  gap: 7px;
+}
+.field label {
+  font-size: 14px;
+  font-weight: 700;
+}
+.field input,
+.control {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 12px 13px;
+  font: inherit;
+  background: #ffffff;
+  outline: none;
+  width: 100%;
+}
+.field input:focus,
+.control:focus {
+  border-color: var(--primary);
+  box-shadow: 0 0 0 4px rgba(31, 95, 166, 0.16);
+}
+.hidden {
+  display: none;
+}
+.submit-btn {
+  margin-top: 8px;
+  border: 0;
+  border-radius: 12px;
+  padding: 12px;
+  background: linear-gradient(92deg, var(--navy), var(--primary-2) 58%, var(--teal));
+  color: #ffffff;
+  font-weight: 800;
+  font-size: 15px;
+  cursor: pointer;
+  transition: transform 0.16s ease, box-shadow 0.16s ease, filter 0.16s ease;
+}
+.submit-btn:hover {
+  filter: brightness(1.03);
+  transform: translateY(-1px);
+  box-shadow: 0 10px 18px rgba(17, 62, 110, 0.22);
+}
+.message {
+  min-height: 22px;
+  margin: 14px 0 0;
+  font-size: 14px;
+  font-weight: 700;
+}
+.message.success {
+  color: var(--ok);
+}
+.message.error {
+  color: var(--danger);
+}
+.upload-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+}
+.upload-header h2 {
+  margin: 0;
+  font-family: "Space Grotesk", sans-serif;
+}
+.upload-subtitle {
+  margin: 10px 0 18px;
+  color: var(--muted);
+}
+.stepper {
+  display: grid;
+  grid-template-columns: repeat(3, 1fr);
+  gap: 8px;
+  margin: 10px 0 16px;
+}
+.step-chip {
+  text-align: center;
+  border: 1px solid var(--border);
+  border-radius: 10px;
+  padding: 8px 10px;
+  font-size: 13px;
+  font-weight: 800;
+  color: #5d7190;
+  background: #f3f6fb;
+  transition: all 0.2s ease;
+}
+.step-chip.active {
+  color: #0f2d4e;
+  border-color: #b7cde7;
+  background: #e8f1fc;
+  box-shadow: inset 0 0 0 1px rgba(38, 97, 166, 0.15);
+}
+.workflow-step {
+  margin-top: 6px;
+}
+.summary-box {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  background: var(--surface-soft);
+  padding: 12px;
+  color: #25496f;
+  box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.6);
+}
+.summary-box p {
+  margin: 5px 0;
+  font-size: 14px;
+}
+.workflow-actions {
+  display: flex;
+  gap: 10px;
+  margin-top: 12px;
+  flex-wrap: wrap;
+}
+.upload-zone-wrap {
+  margin-top: 2px;
+}
+.upload-zone {
+  border: 1.5px dashed #b8cbe0;
+  border-radius: 14px;
+  background: linear-gradient(180deg, #f8fbff 0%, #f3f8ff 100%);
+  min-height: 132px;
+  display: grid;
+  place-content: center;
+  text-align: center;
+  gap: 6px;
+  cursor: pointer;
+  padding: 14px;
+  transition: border-color 0.2s ease, background 0.2s ease, transform 0.18s ease;
+}
+.upload-zone:hover {
+  border-color: #7ca4cf;
+  background: linear-gradient(180deg, #fafdff 0%, #eef5ff 100%);
+  transform: translateY(-1px);
+}
+.upload-icon {
+  width: 34px;
+  height: 34px;
+  border-radius: 999px;
+  margin: 0 auto;
+  display: grid;
+  place-content: center;
+  font-size: 22px;
+  font-weight: 700;
+  color: #21507f;
+  background: #e5eef9;
+}
+.upload-title {
+  font-size: 14px;
+  font-weight: 800;
+  color: #1f4469;
+}
+.upload-hint {
+  font-size: 12px;
+  color: #5f7691;
+}
+.file-input-hidden {
+  position: absolute;
+  left: -10000px;
+  width: 1px;
+  height: 1px;
+  opacity: 0;
+}
+.chat-panel {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  background: #f7fbff;
+  padding: 12px;
+  margin-top: 10px;
+  display: grid;
+  gap: 10px;
+  max-height: 220px;
+  overflow-y: auto;
+}
+.chat-bubble {
+  padding: 10px 12px;
+  border-radius: 12px;
+  font-size: 13px;
+  line-height: 1.5;
+}
+.chat-bubble.user {
+  justify-self: end;
+  max-width: 92%;
+  background: #e8f1ff;
+  border: 1px solid #bfd6f4;
+  color: #1f4268;
+}
+.chat-bubble.bot {
+  justify-self: start;
+  max-width: 96%;
+  background: #ffffff;
+  border: 1px solid #d4e0ee;
+  color: #274968;
+}
+.logout-btn {
+  border: 1px solid var(--border);
+  background: #ffffff;
+  border-radius: 10px;
+  padding: 8px 12px;
+  font-weight: 700;
+  cursor: pointer;
+}
+.secondary-btn {
+  border: 1px solid #b8cbe0;
+  background: #f1f6fc;
+  color: #1f4469;
+  border-radius: 12px;
+  padding: 12px 14px;
+  font-weight: 800;
+  font-size: 14px;
+  cursor: pointer;
+  transition: background 0.18s ease, transform 0.14s ease;
+}
+.secondary-btn:hover {
+  background: #e7f0fa;
+}
+.secondary-btn:active {
+  transform: scale(0.98);
+}
+.section {
+  padding: 20px 0 26px;
+}
+.section-card {
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: 18px;
+  padding: 24px;
+  box-shadow: 0 10px 24px rgba(12, 34, 58, 0.09);
+  transition: box-shadow 0.2s ease, transform 0.2s ease;
+}
+.section-card:hover {
+  box-shadow: 0 14px 26px rgba(12, 34, 58, 0.13);
+  transform: translateY(-1px);
+}
+.section-card h2 {
+  margin: 0 0 10px;
+  font-family: "Space Grotesk", sans-serif;
+}
+.section-card p {
+  margin: 0;
+  color: var(--muted);
+  line-height: 1.7;
+}
+.service-grid {
+  margin-top: 14px;
+  display: grid;
+  grid-template-columns: repeat(3, 1fr);
+  gap: 12px;
+}
+.service-grid article {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 14px;
+  background: var(--surface-soft);
+}
+.service-grid h3 {
+  margin: 0 0 8px;
+  font-size: 16px;
+}
+.contact-grid {
+  margin-top: 14px;
+  display: grid;
+  gap: 8px;
+  color: #193b61;
+}
+.analysis-result {
+  margin-top: 16px;
+  border-top: 1px solid var(--border);
+  padding-top: 14px;
+}
+.result-summary h3 {
+  margin: 0 0 8px;
+  font-family: "Space Grotesk", sans-serif;
+}
+.result-summary p {
+  margin: 4px 0;
+  color: #1d3352;
+}
+.result-visual {
+  margin-top: 12px;
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 12px;
+  background: linear-gradient(180deg, #f8fbff 0%, #f4f8fd 100%);
+}
+.result-visual h3 {
+  margin: 0 0 10px;
+}
+.bar-row {
+  display: grid;
+  grid-template-columns: 170px 1fr 52px;
+  align-items: center;
+  gap: 8px;
+  margin-bottom: 8px;
+}
+.bar-label,
+.bar-value {
+  font-size: 13px;
+  font-weight: 700;
+}
+.bar-track {
+  width: 100%;
+  height: 12px;
+  border-radius: 999px;
+  background: #dde5f1;
+  overflow: hidden;
+}
+.bar-fill {
+  height: 100%;
+  border-radius: 999px;
+}
+.bar-fill.dup {
+  background: #2d6ec8;
+}
+.bar-fill.inc {
+  background: #d08f28;
+}
+.bar-fill.con {
+  background: #bd4b58;
+}
+.result-list {
+  margin-top: 12px;
+  display: grid;
+  gap: 10px;
+}
+.result-card {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 10px 12px;
+  background: #f9fbfe;
+  transition: box-shadow 0.16s ease;
+}
+.result-card:hover {
+  box-shadow: 0 10px 20px rgba(12, 34, 58, 0.09);
+}
+.result-card h4 {
+  margin: 0 0 6px;
+}
+.result-muted {
+  color: var(--muted);
+}
+.table-wrap {
+  width: 100%;
+  overflow-x: auto;
+}
+.result-table {
+  width: 100%;
+  border-collapse: collapse;
+  margin-top: 8px;
+}
+.result-table th,
+.result-table td {
+  border: 1px solid var(--border);
+  padding: 8px;
+  text-align: left;
+  font-size: 13px;
+  vertical-align: top;
+}
+.result-table th {
+  background: #eef4ff;
+}
+@keyframes fadeInUp {
+  from {
+    opacity: 0;
+    transform: translateY(8px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+@media (max-width: 980px) {
+  .hero-grid {
+    grid-template-columns: 1fr;
+  }
+  .hero-metrics {
+    grid-template-columns: 1fr;
+  }
+  .preview-grid {
+    grid-template-columns: repeat(2, 1fr);
+  }
+  .service-grid {
+    grid-template-columns: 1fr;
+  }
+  .bar-row {
+    grid-template-columns: 1fr;
+    gap: 6px;
+  }
+  .nav-links {
+    gap: 12px;
+    flex-wrap: wrap;
+    justify-content: flex-end;
+  }
+  .topbar-inner {
+    padding-block: 8px;
+  }
+}
+.page-links {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+}
+.page-link {
+  border: 1px solid #bfd0e3;
+  border-radius: 10px;
+  padding: 6px 10px;
+  font-size: 13px;
+  font-weight: 700;
+  color: #23496f;
+  text-decoration: none;
+  background: #f4f8fd;
+}
+.page-link.active {
+  background: #e7f1ff;
+  border-color: #98b9dc;
+  color: #14395f;
+}
+.flow-main {
+  padding: 28px 0 36px;
+}
+.flow-card {
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: 18px;
+  box-shadow: 0 14px 30px rgba(12, 31, 53, 0.12);
+  padding: 22px;
+}
+.flow-card h1 {
+  margin: 0;
+  font-family: "Space Grotesk", sans-serif;
+  font-size: clamp(28px, 4vw, 40px);
+}
+.user-badge {
+  border: 1px solid #c6d9ee;
+  border-radius: 999px;
+  padding: 8px 12px;
+  background: #f2f8ff;
+  color: #24486d;
+  font-weight: 700;
+  font-size: 13px;
+}
+.loading-panel {
+  margin-top: 16px;
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 18px;
+  background: #f5f9ff;
+  display: grid;
+  justify-items: center;
+  gap: 10px;
+}
+.spinner {
+  width: 30px;
+  height: 30px;
+  border: 3px solid #c8d8eb;
+  border-top-color: #1f5fa6;
+  border-radius: 50%;
+  animation: spin 0.8s linear infinite;
+}
+.stats-grid {
+  display: grid;
+  grid-template-columns: repeat(3, minmax(0, 1fr));
+  gap: 10px;
+  margin: 12px 0 14px;
+}
+.stat-card {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 12px;
+  background: #f9fbfe;
+}
+.stat-card h3 {
+  margin: 0;
+  font-size: 14px;
+}
+.stat-card p {
+  margin: 6px 0 0;
+  font-size: 28px;
+  font-weight: 800;
+}
+.stat-dup p {
+  color: #2d6ec8;
+}
+.stat-inc p {
+  color: #d08f28;
+}
+.stat-con p {
+  color: #bd4b58;
+}
+.summary-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 10px;
+  margin-bottom: 14px;
+}
+.summary-item {
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 10px;
+  background: #f9fbfe;
+  display: grid;
+  gap: 4px;
+}
+.summary-item span {
+  color: var(--muted);
+  font-size: 13px;
+}
+.summary-item strong {
+  color: #1b3d63;
+  font-size: 14px;
+}
+.section-subtitle {
+  margin: 6px 0 10px;
+  font-family: "Space Grotesk", sans-serif;
+  font-size: 20px;
+  color: #183d62;
+}
+.detailed-summary-text {
+  white-space: pre-wrap;
+  line-height: 1.65;
+  color: #1d3552;
+  font-size: 14px;
+}
+.as-link {
+  text-decoration: none;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+}
+.submit-link {
+  min-width: 220px;
+}
+@keyframes spin {
+  to {
+    transform: rotate(360deg);
+  }
+}
+@media (max-width: 980px) {
+  .page-links {
+    gap: 6px;
+    flex-wrap: wrap;
+    justify-content: flex-end;
+  }
+  .stats-grid,
+  .summary-grid {
+    grid-template-columns: 1fr;
+  }
+}

frontend/summary.html ADDED Viewed

	@@ -0,0 +1,59 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Final Summary | LegalSI</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700;800&family=Space+Grotesk:wght@500;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="styles.css" />
+  </head>
+  <body>
+    <header class="topbar">
+      <div class="container topbar-inner">
+        <a class="brand" href="index.html#home">LegalSI</a>
+        <div class="page-links">
+          <a class="page-link" href="upload.html">Upload</a>
+          <a class="page-link" href="issues.html">Issue Analysis</a>
+          <a class="page-link active" href="summary.html">Final Summary</a>
+          <button id="logoutBtn" class="logout-btn" type="button">Logout</button>
+        </div>
+      </div>
+    </header>
+    <main class="flow-main">
+      <section class="container flow-card">
+        <div class="upload-header">
+          <h1>Final Document Summary</h1>
+          <span id="userBadge" class="user-badge"></span>
+        </div>
+        <p class="upload-subtitle">Overall analysis result for the entire uploaded legal document.</p>
+        <div id="summaryDetails" class="summary-grid"></div>
+        <h3 class="section-subtitle">Detailed Document Summary</h3>
+        <article class="result-card">
+          <div id="detailedSummaryText" class="detailed-summary-text"></div>
+        </article>
+        <h3 class="section-subtitle">Page-wise Summary</h3>
+        <div id="pageSummaryBoard" class="result-list"></div>
+        <h3 class="section-subtitle">Top Findings</h3>
+        <div id="findingsBoard" class="result-list"></div>
+        <h3 class="section-subtitle">Line Error Dashboard</h3>
+        <article class="result-card">
+          <div id="lineErrorDashboard"></div>
+        </article>
+        <div class="workflow-actions">
+          <a class="secondary-btn as-link" href="issues.html">Back to Issue Analysis</a>
+          <a class="submit-btn as-link submit-link" href="upload.html">Analyze New Document</a>
+        </div>
+      </section>
+    </main>
+    <script src="app.js"></script>
+  </body>
+</html>

frontend/upload.html ADDED Viewed

	@@ -0,0 +1,75 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Upload Document | LegalSI</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700;800&family=Space+Grotesk:wght@500;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="styles.css" />
+  </head>
+  <body>
+    <header class="topbar">
+      <div class="container topbar-inner">
+        <a class="brand" href="index.html#home">LegalSI</a>
+        <div class="page-links">
+          <a class="page-link active" href="upload.html">Upload</a>
+          <a class="page-link" href="issues.html">Issue Analysis</a>
+          <a class="page-link" href="summary.html">Final Summary</a>
+          <button id="logoutBtn" class="logout-btn" type="button">Logout</button>
+        </div>
+      </div>
+    </header>
+    <main class="flow-main">
+      <section class="container flow-card">
+        <div class="upload-header">
+          <h1>Upload Document</h1>
+          <span id="userBadge" class="user-badge"></span>
+        </div>
+        <p class="upload-subtitle">Upload legal document, then continue to issue analysis and final summary pages.</p>
+        <form id="uploadForm" class="auth-form" novalidate>
+          <div class="field">
+            <label for="scanMode">Scan Mode</label>
+            <select id="scanMode" class="control">
+              <option>Standard Scan (Recommended)</option>
+              <option>Deep Search (Fuzzy)</option>
+              <option>Strict (Duplicates Only)</option>
+            </select>
+          </div>
+          <div class="field upload-zone-wrap">
+            <label for="legalFile">Upload File (PDF/DOCX/TXT)</label>
+            <label class="upload-zone" for="legalFile">
+              <span class="upload-icon">+</span>
+              <span class="upload-title">Drop your document or click to browse</span>
+              <span class="upload-hint">Supported: PDF, DOCX, TXT</span>
+            </label>
+            <input id="legalFile" class="control file-input-hidden" type="file" accept=".pdf,.docx,.txt" required />
+          </div>
+          <div id="analysisInputSummary" class="summary-box hidden"></div>
+          <div class="workflow-actions">
+            <a class="secondary-btn as-link" href="index.html#home">Back to Home</a>
+            <button id="runUploadBtn" class="submit-btn" type="submit">Upload and Analyze</button>
+          </div>
+        </form>
+        <div id="loadingState" class="loading-panel hidden" aria-live="polite">
+          <div class="spinner"></div>
+          <p>Analyzing document. Please wait...</p>
+        </div>
+        <p id="uploadMessage" class="message" aria-live="polite"></p>
+      </section>
+    </main>
+    <script src="app.js"></script>
+  </body>
+</html>

frontend/workflow.html ADDED Viewed

	@@ -0,0 +1,11 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta http-equiv="refresh" content="0;url=upload.html" />
+    <title>Redirecting</title>
+  </head>
+  <body>
+    <p>Redirecting to upload page...</p>
+  </body>
+</html>

ingestion/docx_reader.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from docx import Document
+def extract_text_from_docx(path):
+    doc = Document(path)
+    return "\n".join(p.text for p in doc.paragraphs)

ingestion/pdf_reader.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import pdfplumber
+def extract_text_from_pdf(path):
+    text = ""
+    with pdfplumber.open(path) as pdf:
+        for page in pdf.pages:
+            if page.extract_text():
+                text += page.extract_text() + "\n"
+    return text

main.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from ingestion.pdf_reader import extract_text_from_pdf
+from preprocessing.clause_extraction import extract_clauses
+from embeddings.sbert_encoder import generate_embeddings
+from storage.faiss_index import create_faiss_index
+from analysis.similarity_search import get_similar
+from analysis.common_analyzer import analyze_pair
+from output.report_generator import generate_report
+import numpy as np
+# Load document
+text = extract_text_from_pdf("data/sample_docs/policy.pdf")
+# Clause extraction
+clauses = extract_clauses(text)
+# Embeddings
+embeddings = generate_embeddings(clauses)
+index = create_faiss_index(embeddings)
+results = []
+for i, emb in enumerate(embeddings):
+    idxs, dists = get_similar(index, emb)
+    for j, dist in zip(idxs, dists):
+        if i == j:
+            continue
+        similarity = 1 / (1 + dist)
+        # Use new Common Analyzer (Centralized Logic)
+        issue_type, score = analyze_pair(clauses[i]["text"], clauses[j]["text"], similarity)
+        if issue_type:
+            results.append({
+                "type": issue_type,
+                "confidence": score,
+                "clause_1": clauses[i]["text"],
+                "clause_2": clauses[j]["text"]
+            })
+generate_report(results)
+print("✅ Analysis completed. Report generated.")

preprocessing/clause_extraction.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import re
+def extract_clauses(text_data):
+    """
+    Extracts clauses from text chunks with location data.
+    Args:
+        text_data: List[Dict] with 'text' and 'page' keys.
+    Returns:
+        List[Dict]: [{'id', 'text', 'page', 'line'}]
+    """
+    unique_clauses = []
+    seen = set()
+    clause_id = 0
+    for chunk in text_data:
+        raw_text = chunk.get("text", "")
+        page_num = chunk.get("page", 1)
+        # Split into lines first to track line numbers roughly
+        # Or split by sentence and find position.
+        # Simple approach: Split by sentence, then find approximate line number in chunk
+        sentences = re.split(r'(?<=[.!?])\s+', raw_text)
+        # Helper to find line number
+        def get_line_number(substring, source_text):
+            idx = source_text.find(substring)
+            if idx == -1: return 1
+            return source_text[:idx].count('\n') + 1
+        for s in sentences:
+            s_clean = s.strip()
+            if len(s_clean) > 30 and s_clean not in seen:
+                seen.add(s_clean)
+                # Estimate line number within the page
+                line_offset = get_line_number(s_clean, raw_text)
+                unique_clauses.append({
+                    "id": clause_id,
+                    "text": s_clean,
+                    "page": page_num,
+                    "line": line_offset
+                })
+                clause_id += 1
+    return unique_clauses

preprocessing/text_extractor.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import pdfplumber
+import docx
+import io
+def extract_text_from_file(file_obj, file_type):
+    """
+    Extracts text from various file formats with page/location tracking.
+    Args:
+        file_obj: The uploaded file object (bytes).
+        file_type: 'pdf', 'docx', or 'txt'.
+    Returns:
+        List[Dict]: List of {'text': str, 'page': int}
+    """
+    extracted_data = []
+    try:
+        if file_type == "pdf":
+            with pdfplumber.open(file_obj) as pdf:
+                for i, page in enumerate(pdf.pages):
+                    page_text = page.extract_text()
+                    if page_text:
+                        extracted_data.append({
+                            "text": page_text,
+                            "page": i + 1
+                        })
+        elif file_type == "docx":
+            doc = docx.Document(file_obj)
+            # DOCX doesn't have strict pages, so we'll treat paragraphs/sections
+            # as a stream. We'll mark it as Page 1 for now, or maybe
+            # increment 'page' every N paragraphs to simulate flow?
+            # Better: Return logical sections.
+            full_text = ""
+            for para in doc.paragraphs:
+                full_text += para.text + "\n"
+            extracted_data.append({
+                "text": full_text,
+                "page": 1 # DOCX treated as single continuous flow unless paginated
+            })
+        elif file_type == "txt":
+            # Assuming utf-8 encoding
+            text = file_obj.read().decode("utf-8")
+            extracted_data.append({
+                "text": text,
+                "page": 1
+            })
+    except Exception as e:
+        print(f"Error extracting text: {e}")
+        return []
+    return extracted_data

reproduce_issue.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import sys
+import os
+import numpy as np
+from sentence_transformers import SentenceTransformer, util
+sys.path.append(os.getcwd())
+try:
+    from analysis.common_analyzer import analyze_pair
+    from preprocessing.clause_extraction import extract_clauses
+except ImportError:
+    # Handle case where run from root
+    sys.path.append(os.path.join(os.getcwd(), 'analysis'))
+    sys.path.append(os.path.join(os.getcwd(), 'preprocessing'))
+    from analysis.common_analyzer import analyze_pair
+    from preprocessing.clause_extraction import extract_clauses
+def test_reproduction():
+    print("--- Section 1: Core Logic Test ---")
+    t1 = "Audit reports must be retained for a minimum of three (3) years."
+    t2 = "Audit reports shall be deleted after one (1) year to reduce storage overhead."
+    print(f"Text 1: {t1}")
+    print(f"Text 2: {t2}")
+    # 1. Calculate Similarity
+    print("Loading embedding model...")
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    e1 = model.encode(t1)
+    e2 = model.encode(t2)
+    sim = util.cos_sim(e1, e2).item()
+    print(f"Similarity Score: {sim:.4f}")
+    # 2. Test analyze_pair
+    print("Running analyze_pair...")
+    label, conf, reason = analyze_pair(t1, t2, sim)
+    print(f"Result: Label={label}, Conf={conf}, Reason={reason}")
+    if label == "CANDIDATE":
+        print("!!! PASSED Phase 1: ACCEPTED as CANDIDATE")
+        # 3. Test NLI
+        from analysis.nli_verifier import NLIVerifier
+        print("\nRunning NLI Verification (Phase 2)...")
+        verifier = NLIVerifier()
+        is_contra, nli_conf, nli_label = verifier.predict(t1, t2)
+        print(f"NLI Result: IsContra={is_contra}, Conf={nli_conf}, Label={nli_label}")
+    elif label:
+        print(f"!!! PASSED Phase 1: ACCEPTED as {label} (No NLI needed usually, but logic might vary)")
+    else:
+        print("!!! PASSED Phase 1: REJECTED (None)")
+    print("\n--- Section 2: Pipeline & Metadata Test ---")
+    mock_text = [
+        {"text": "Section 1. This is a test clause on page 1.", "page": 1},
+        {"text": "Section 2. This is another clause on page 2.", "page": 2}
+    ]
+    print("Testing extract_clauses with structured input...")
+    clauses = extract_clauses(mock_text)
+    if len(clauses) > 0 and 'page' in clauses[0] and 'line' in clauses[0]:
+        print(f"SUCCESS: Extracted {len(clauses)} clauses with metadata.")
+        print(f"Sample: {clauses[0]}")
+    else:
+        print("FAIL: Metadata extraction failed.")
+if __name__ == "__main__":
+    test_reproduction()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pdfplumber
+python-docx
+spacy
+sentence-transformers
+faiss-cpu
+numpy
+streamlit
+transformers
+torch
+huggingface_hub
+reportlab

storage/faiss_index.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import faiss
+import numpy as np
+def create_faiss_index(embeddings):
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(np.array(embeddings))
+    return index

ui/app.py ADDED Viewed

	@@ -0,0 +1,871 @@

+import os
+import sys
+from pathlib import Path
+import importlib
+import json
+import base64
+import re
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from preprocessing.text_extractor import extract_text_from_file
+from preprocessing.clause_extraction import extract_clauses
+from embeddings.sbert_encoder import generate_embeddings
+from storage.faiss_index import create_faiss_index
+from analysis.similarity_search import get_similar
+import analysis.common_analyzer
+importlib.reload(analysis.common_analyzer)
+from analysis.common_analyzer import analyze_pair
+from analysis.nli_verifier import NLIVerifier
+from analysis.llama_legal_verifier import LlamaLegalVerifier
+from output.pdf_generator import generate_pdf_report
+from auth.user_store import authenticate_user, create_user
+APP_TITLE = "Legal Semantic Integrity"
+DEFAULT_MODEL_PATH = "merged_tinyllama_instruction"
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+def init_state():
+    st.session_state.setdefault("is_authenticated", False)
+    st.session_state.setdefault("username", "")
+    st.session_state.setdefault("analysis_done", False)
+    st.session_state.setdefault("results", [])
+    st.session_state.setdefault("line_issues", [])
+    st.session_state.setdefault("uploaded_name", "")
+    st.session_state.setdefault("uploaded_ext", "")
+    st.session_state.setdefault("uploaded_bytes", b"")
+def _extract_party_name(text: str, role: str) -> str:
+    """
+    Try to extract a nearby party name for vendor/vendee from clause text.
+    Falls back to role-present markers when exact name is not available.
+    """
+    if not text:
+        return "Not found"
+    t = " ".join(str(text).split())
+    role_l = role.lower()
+    # Pattern examples:
+    # "Vendor Mr. Ravi Kumar", "Vendee: Sita Devi", "the vendor, John Doe"
+    patterns = [
+        rf"\b{role_l}\b\s*[:,-]?\s*(?:mr\.?|mrs\.?|ms\.?)?\s*([A-Z][A-Za-z.\s]{{2,60}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
+        rf"\bthe\s+{role_l}\b\s*[:,-]?\s*(?:is\s+)?(?:mr\.?|mrs\.?|ms\.?)?\s*([A-Z][A-Za-z.\s]{{2,60}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
+    ]
+    for pat in patterns:
+        m = re.search(pat, t, flags=re.IGNORECASE)
+        if m:
+            name = " ".join(m.group(1).split())
+            # Filter generic captures like "hereinafter called"
+            if name and not re.search(r"hereinafter|called|referred|party|agreement", name, re.IGNORECASE):
+                return name[:80]
+    if re.search(rf"\b{role_l}\b", t, flags=re.IGNORECASE):
+        return f"{role.title()} mentioned (name not parsed)"
+    return "Not found"
+def _clean_candidate_name(name: str) -> str:
+    name = re.sub(r"\s+", " ", str(name)).strip(" ,.;:-")
+    if not name:
+        return ""
+    banned = r"hereinafter|called|referred|party|agreement|vendor|vendee|purchaser|buyer|seller"
+    if re.search(banned, name, flags=re.IGNORECASE):
+        return ""
+    return name[:80]
+def _extract_document_parties(text_data):
+    full_text = "\n".join(chunk.get("text", "") for chunk in (text_data or []))
+    compact = " ".join(full_text.split())
+    parties = {"Vendor": "Not found", "Vendee": "Not found"}
+    # Common legal intro patterns:
+    # "Mr. X ... hereinafter called the VENDOR"
+    # "Y ... hereinafter called the VENDEE"
+    role_patterns = {
+        "Vendor": [
+            r"(Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80}?)\s+(?:son of|wife of|daughter of|residing at|aged about|hereinafter)\b[^.]{0,120}\bvendor\b",
+            r"\bvendor\b\s*[:,-]?\s*(?:is\s+)?(?:Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80})(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
+        ],
+        "Vendee": [
+            r"(Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80}?)\s+(?:son of|wife of|daughter of|residing at|aged about|hereinafter)\b[^.]{0,120}\bvendee\b",
+            r"\bvendee\b\s*[:,-]?\s*(?:is\s+)?(?:Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80})(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
+        ],
+    }
+    for role, patterns in role_patterns.items():
+        for pat in patterns:
+            m = re.search(pat, compact, flags=re.IGNORECASE)
+            if not m:
+                continue
+            candidate = m.group(2) if (m.lastindex or 0) >= 2 else m.group(1)
+            cleaned = _clean_candidate_name(candidate)
+            if cleaned:
+                parties[role] = cleaned
+                break
+        # Secondary fallback: explicit role in text without name
+        if parties[role] == "Not found" and re.search(rf"\b{role.lower()}\b", compact, flags=re.IGNORECASE):
+            parties[role] = f"{role} mentioned (name not parsed)"
+    return parties
+def _extract_parties(text1: str, text2: str, doc_parties=None):
+    vendor = _extract_party_name(text1, "vendor")
+    if vendor == "Not found":
+        vendor = _extract_party_name(text2, "vendor")
+    vendee = _extract_party_name(text1, "vendee")
+    if vendee == "Not found":
+        vendee = _extract_party_name(text2, "vendee")
+    if doc_parties:
+        if vendor in ["Not found", "Vendor mentioned (name not parsed)"] and doc_parties.get("Vendor"):
+            vendor = doc_parties.get("Vendor")
+        if vendee in ["Not found", "Vendee mentioned (name not parsed)"] and doc_parties.get("Vendee"):
+            vendee = doc_parties.get("Vendee")
+    return vendor, vendee
+@st.cache_resource
+def load_verifier(backend: str, llama_model_path: str):
+    if backend == "llama":
+        return LlamaLegalVerifier(model_path=llama_model_path)
+    return NLIVerifier(model_name="cross-encoder/nli-distilroberta-base")
+def apply_theme():
+    st.markdown(
+        """
+        <style>
+        @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&display=swap');
+        @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500&display=swap');
+        :root {
+            --bg-soft: #f6fbff;
+            --ink-900: #0b2f4a;
+            --ink-700: #21506f;
+            --accent-500: #0a84c6;
+            --accent-700: #005b88;
+            --mint-500: #2aa198;
+            --warn-500: #c57b00;
+            --danger-500: #c44736;
+            --card-border: #dbeaf4;
+        }
+        html, body, [class*="css"] {
+            font-family: 'Space Grotesk', sans-serif;
+        }
+        .stApp {
+            background:
+                radial-gradient(900px 420px at -15% -25%, #d7f0ff 0%, rgba(215,240,255,0) 62%),
+                radial-gradient(900px 420px at 115% -20%, #fff2d8 0%, rgba(255,242,216,0) 62%),
+                linear-gradient(180deg, #f8fcff 0%, #ffffff 55%);
+        }
+        .hero {
+            border: 1px solid var(--card-border);
+            background: linear-gradient(145deg, #f0f8ff 0%, #fffdf8 95%);
+            border-radius: 18px;
+            padding: 20px 22px;
+            margin-bottom: 14px;
+            box-shadow: 0 10px 24px rgba(9, 59, 102, 0.07);
+            animation: fadeIn .45s ease-out;
+        }
+        .hero h2 {
+            margin: 0;
+            color: var(--ink-900);
+            letter-spacing: .2px;
+            font-weight: 700;
+        }
+        .hero p {
+            margin: 8px 0 0 0;
+            color: var(--ink-700);
+        }
+        .step {
+            border-left: 4px solid var(--accent-500);
+            background: #ffffff;
+            border-radius: 8px;
+            padding: 8px 12px;
+            margin-bottom: 8px;
+            font-weight: 500;
+            color: #12344d;
+            box-shadow: 0 6px 16px rgba(12, 53, 88, 0.05);
+        }
+        .mini-card {
+            border: 1px solid var(--card-border);
+            border-radius: 14px;
+            background: #ffffff;
+            padding: 14px 14px;
+            margin-bottom: 10px;
+            box-shadow: 0 6px 16px rgba(12, 53, 88, 0.04);
+            animation: fadeIn .55s ease-out;
+        }
+        .mini-label {
+            color: #43627c;
+            font-size: 0.78rem;
+            letter-spacing: .02em;
+            text-transform: uppercase;
+            margin-bottom: 6px;
+        }
+        .mini-value {
+            color: #082d48;
+            font-size: 1.45rem;
+            font-weight: 700;
+            line-height: 1.2;
+        }
+        .mono {
+            font-family: 'IBM Plex Mono', monospace;
+        }
+        .tag {
+            display: inline-block;
+            border-radius: 999px;
+            padding: 5px 10px;
+            font-size: 0.75rem;
+            font-weight: 600;
+            margin-right: 6px;
+            margin-top: 5px;
+            border: 1px solid;
+        }
+        .tag-info { color: var(--accent-700); border-color: #b7def4; background: #ecf7ff; }
+        .tag-ok { color: #186b64; border-color: #bceae5; background: #ecfffc; }
+        .tag-warn { color: #8c5c00; border-color: #f2d9a4; background: #fff7e8; }
+        .tag-risk { color: #9f3124; border-color: #efb5ad; background: #fff1ee; }
+        [data-testid="stDataFrame"] div[role="table"] {
+            border-radius: 12px;
+            border: 1px solid #d6e8f4;
+            overflow: hidden;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(8px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+def login_page():
+    col_intro, col_auth = st.columns([1.15, 1], gap="large")
+    with col_intro:
+        st.markdown(
+            """
+            <div class="hero">
+                <h2>Legal Semantic Integrity Portal</h2>
+                <p>Interactive contract diagnostics with line-level visibility and legal conflict tracing.</p>
+                <div>
+                    <span class="tag tag-info">Step 1: Secure Login</span>
+                    <span class="tag tag-ok">Step 2: Upload & Analyze</span>
+                    <span class="tag tag-warn">Step 3: Error-Line Dashboard</span>
+                </div>
+            </div>
+            <div class="mini-card">
+                <div class="mini-label">What You Get</div>
+                <div class="mono">Duplicate clauses, legal contradictions, and exact page/line issue map.</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    with col_auth:
+        st.markdown('<div class="step">Step 1 of 3: Login</div>', unsafe_allow_html=True)
+        tab_login, tab_signup = st.tabs(["Sign In", "Create Account"])
+        with tab_login:
+            with st.form("login_form", clear_on_submit=False):
+                username = st.text_input("Username")
+                password = st.text_input("Password", type="password")
+                submit = st.form_submit_button("Login")
+            if submit:
+                ok, message = authenticate_user(username, password)
+                if ok:
+                    st.session_state.is_authenticated = True
+                    st.session_state.username = username.strip().lower()
+                    st.success(message)
+                    st.rerun()
+                else:
+                    st.error(message)
+        with tab_signup:
+            with st.form("signup_form", clear_on_submit=True):
+                new_username = st.text_input("New Username")
+                new_password = st.text_input("New Password", type="password")
+                confirm_password = st.text_input("Confirm Password", type="password")
+                create_submit = st.form_submit_button("Create Account")
+            if create_submit:
+                if new_password != confirm_password:
+                    st.error("Passwords do not match.")
+                else:
+                    ok, message = create_user(new_username, new_password)
+                    if ok:
+                        st.success(message)
+                    else:
+                        st.error(message)
+    st.caption("Local accounts are saved in data/users.db")
+def run_analysis(uploaded_file, sensitivity: float, backend: str, llama_model_path: str):
+    file_ext = uploaded_file.name.split(".")[-1].lower()
+    with st.spinner("Extracting text..."):
+        text_data = extract_text_from_file(uploaded_file, file_ext)
+    if not text_data:
+        st.error("Could not extract text from this file.")
+        return [], []
+    with st.spinner("Extracting clauses..."):
+        clauses = extract_clauses(text_data)
+    doc_parties = _extract_document_parties(text_data)
+    if not clauses:
+        st.warning("No valid clauses were detected.")
+        return [], []
+    with st.spinner("Building semantic index..."):
+        embeddings = generate_embeddings(clauses)
+        index = create_faiss_index(embeddings)
+    resolved_model_path = Path(llama_model_path)
+    if not resolved_model_path.is_absolute():
+        resolved_model_path = PROJECT_ROOT / resolved_model_path
+    verifier = load_verifier(backend=backend, llama_model_path=str(resolved_model_path))
+    results = []
+    seen_pairs = set()
+    progress = st.progress(0)
+    total = len(embeddings)
+    for i, emb in enumerate(embeddings):
+        idxs, dists = get_similar(index, emb, k=5)
+        for j, dist in zip(idxs, dists):
+            if i >= j:
+                continue
+            if (i, j) in seen_pairs:
+                continue
+            seen_pairs.add((i, j))
+            similarity = 1 / (1 + dist)
+            label, confidence, reason = analyze_pair(
+                clauses[i]["text"],
+                clauses[j]["text"],
+                similarity,
+                threshold=sensitivity,
+            )
+            if not label:
+                continue
+            result = {
+                "Label": label,
+                "Confidence": float(confidence),
+                "Reason": reason,
+                "Clause 1": clauses[i]["text"],
+                "Clause 2": clauses[j]["text"],
+                "Page 1": clauses[i]["page"],
+                "Line 1": clauses[i]["line"],
+                "Page 2": clauses[j]["page"],
+                "Line 2": clauses[j]["line"],
+                "Location 1": f"Pg {clauses[i]['page']}, Ln {clauses[i]['line']}",
+                "Location 2": f"Pg {clauses[j]['page']}, Ln {clauses[j]['line']}",
+            }
+            vendor_name, vendee_name = _extract_parties(
+                result["Clause 1"], result["Clause 2"], doc_parties=doc_parties
+            )
+            result["Vendor"] = vendor_name
+            result["Vendee"] = vendee_name
+            if backend == "llama":
+                _, llm_conf, llm_label, llm_reason = verifier.predict(result["Clause 1"], result["Clause 2"])
+            else:
+                _, llm_conf, llm_label = verifier.predict(result["Clause 1"], result["Clause 2"])
+                llm_reason = f"NLI label: {llm_label}"
+            if llm_label == "Neutral":
+                # Do not erase strong rule-based findings just because LLM is neutral.
+                if result["Label"] in ["NUMERIC_INCONSISTENCY", "LEGAL_CONFLICT"]:
+                    result["Reason"] = f"{result['Reason']} | LLM neutral review"
+                else:
+                    result["Label"] = "NO_CONFLICT"
+                    result["Reason"] = "LLM marked as neutral"
+            elif llm_label == "Entailment":
+                result["Label"] = "DUPLICATION"
+                result["Reason"] = "LLM marked as entailment"
+            elif llm_label == "Contradiction":
+                if result["Label"] in ["CANDIDATE", "QUALIFICATION"]:
+                    result["Label"] = "LEGAL_CONFLICT"
+                result["Reason"] = llm_reason
+            result["Confidence"] = float(llm_conf)
+            results.append(result)
+        progress.progress((i + 1) / total)
+    progress.empty()
+    line_issues = []
+    for r in results:
+        if r["Label"] == "NO_CONFLICT":
+            continue
+        line_issues.append(
+            {
+                "Issue Type": r["Label"],
+                "Confidence": round(r["Confidence"], 4),
+                "Page": r["Page 1"],
+                "Line": r["Line 1"],
+                "Snippet": r["Clause 1"][:160],
+                "Reason": r["Reason"],
+                "Vendor": r.get("Vendor", "Not found"),
+                "Vendee": r.get("Vendee", "Not found"),
+            }
+        )
+        line_issues.append(
+            {
+                "Issue Type": r["Label"],
+                "Confidence": round(r["Confidence"], 4),
+                "Page": r["Page 2"],
+                "Line": r["Line 2"],
+                "Snippet": r["Clause 2"][:160],
+                "Reason": r["Reason"],
+                "Vendor": r.get("Vendor", "Not found"),
+                "Vendee": r.get("Vendee", "Not found"),
+            }
+        )
+    line_issues.sort(key=lambda item: (item["Page"], item["Line"]))
+    return results, line_issues
+def upload_page():
+    st.markdown(
+        """
+        <div class="hero">
+            <h2>Upload And Scan</h2>
+            <p>Drop your legal document, choose model/backend, and run full semantic integrity analysis.</p>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    st.markdown('<div class="step">Step 2 of 3: Upload Document</div>', unsafe_allow_html=True)
+    with st.sidebar:
+        st.header("Scan Settings")
+        scan_mode = st.radio(
+            "Select scan mode",
+            (
+                "Standard Scan (Recommended)",
+                "Deep Search (Fuzzy)",
+                "Strict (Duplicates Only)",
+            ),
+            index=0,
+        )
+        if "Standard" in scan_mode:
+            sensitivity = 0.60
+        elif "Deep" in scan_mode:
+            sensitivity = 0.50
+        else:
+            sensitivity = 0.85
+        # Locked configuration requested by user:
+        # always use local fine-tuned Llama verifier and hide controls.
+        model_backend = "llama"
+        llama_model_path = DEFAULT_MODEL_PATH
+        st.caption("Verifier backend: llama (fixed)")
+        st.caption("Local model: merged_tinyllama_instruction (fixed)")
+        st.markdown(
+            f"""
+            <div class="mini-card">
+                <div class="mini-label">Active Mode</div>
+                <div class="mini-value">{scan_mode.split('(')[0].strip()}</div>
+                <div class="mono">Sensitivity: {sensitivity} | Backend: {model_backend}</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    col_left, col_right = st.columns([1.35, 1], gap="large")
+    with col_left:
+        uploaded_file = st.file_uploader(
+            "Upload a legal document",
+            type=["pdf", "docx", "txt"],
+            help="Supported files: PDF, DOCX, TXT",
+        )
+    with col_right:
+        st.markdown(
+            """
+            <div class="mini-card">
+                <div class="mini-label">Supported Inputs</div>
+                <div class="mono">PDF / DOCX / TXT</div>
+            </div>
+            <div class="mini-card">
+                <div class="mini-label">Output</div>
+                <div class="mono">Pair Findings + Error-Line Dashboard + PDF/JSON Export</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    if uploaded_file is None:
+        st.info("Upload a file to continue.")
+        return
+    st.session_state.uploaded_name = uploaded_file.name
+    st.session_state.uploaded_ext = uploaded_file.name.split(".")[-1].lower()
+    st.session_state.uploaded_bytes = uploaded_file.getvalue()
+    st.success(f"File ready: {uploaded_file.name}")
+    if st.button("Run Full Analysis", type="primary"):
+        try:
+            results, line_issues = run_analysis(
+                uploaded_file=uploaded_file,
+                sensitivity=sensitivity,
+                backend=model_backend,
+                llama_model_path=llama_model_path,
+            )
+            st.session_state.results = results
+            st.session_state.line_issues = line_issues
+            st.session_state.analysis_done = True
+            st.rerun()
+        except Exception as exc:
+            st.error(f"Analysis failed: {exc}")
+def dashboard_page():
+    st.markdown(
+        """
+        <div class="hero">
+            <h2>Interactive Findings Dashboard</h2>
+            <p>Trace conflicts by issue type, confidence, and exact line location.</p>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    st.markdown('<div class="step">Step 3 of 3: Dashboard</div>', unsafe_allow_html=True)
+    results = st.session_state.results
+    line_issues = st.session_state.line_issues
+    if not results:
+        st.warning("No results found.")
+        return
+    df = pd.DataFrame(results)
+    df["Confidence"] = df["Confidence"].astype(float)
+    issues_df = df[~df["Label"].isin(["NO_CONFLICT"])].copy()
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.markdown(
+            f"""
+            <div class="mini-card">
+                <div class="mini-label">User</div>
+                <div class="mini-value">{st.session_state.username or "N/A"}</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    with col2:
+        st.markdown(
+            f"""
+            <div class="mini-card">
+                <div class="mini-label">Pairs Reviewed</div>
+                <div class="mini-value">{len(df)}</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    with col3:
+        st.markdown(
+            f"""
+            <div class="mini-card">
+                <div class="mini-label">Detected Issues</div>
+                <div class="mini-value">{len(issues_df)}</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    with col4:
+        max_conf = float(df["Confidence"].max()) if not df.empty else 0.0
+        st.markdown(
+            f"""
+            <div class="mini-card">
+                <div class="mini-label">Max Confidence</div>
+                <div class="mini-value">{max_conf:.2f}</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+    st.subheader("Issue Analytics Dashboard")
+    if line_issues:
+        line_df = pd.DataFrame(line_issues).copy()
+        line_df["Page"] = line_df["Page"].astype(int)
+        line_df["Line"] = line_df["Line"].astype(int)
+        line_df["Confidence"] = line_df["Confidence"].astype(float)
+        filter_col1, filter_col2, filter_col3 = st.columns([1.2, 1, 1], gap="large")
+        with filter_col1:
+            issue_types = sorted(line_df["Issue Type"].dropna().unique().tolist())
+            issue_sel = st.multiselect("Issue Types", issue_types, default=issue_types)
+        with filter_col2:
+            conf_min = st.slider("Min Confidence (analytics)", 0.0, 1.0, 0.0, 0.01)
+            page_min, page_max = int(line_df["Page"].min()), int(line_df["Page"].max())
+            if page_min == page_max:
+                st.caption(f"Single issue page: {page_min}")
+                page_sel = (page_min, page_max)
+            else:
+                page_sel = st.slider("Page Range (analytics)", page_min, page_max, (page_min, page_max))
+        with filter_col3:
+            vendors = ["All"] + sorted(line_df["Vendor"].dropna().astype(str).unique().tolist())
+            vendees = ["All"] + sorted(line_df["Vendee"].dropna().astype(str).unique().tolist())
+            vendor_sel = st.selectbox("Vendor", vendors, index=0)
+            vendee_sel = st.selectbox("Vendee", vendees, index=0)
+        filtered = line_df.copy()
+        if issue_sel:
+            filtered = filtered[filtered["Issue Type"].isin(issue_sel)]
+        filtered = filtered[filtered["Confidence"] >= conf_min]
+        filtered = filtered[(filtered["Page"] >= page_sel[0]) & (filtered["Page"] <= page_sel[1])]
+        if vendor_sel != "All":
+            filtered = filtered[filtered["Vendor"] == vendor_sel]
+        if vendee_sel != "All":
+            filtered = filtered[filtered["Vendee"] == vendee_sel]
+        total_issues = len(filtered)
+        conflict_rate = (len(issues_df) / len(df) * 100.0) if len(df) else 0.0
+        top_issue = filtered["Issue Type"].mode().iloc[0] if not filtered.empty else "N/A"
+        highest_risk_page = (
+            int(filtered.groupby("Page")["Confidence"].mean().idxmax()) if not filtered.empty else "N/A"
+        )
+        k1, k2, k3, k4 = st.columns(4)
+        k1.metric("Filtered Issues", total_issues)
+        k2.metric("Conflict Rate", f"{conflict_rate:.1f}%")
+        k3.metric("Top Issue Type", top_issue)
+        k4.metric("Highest Risk Page", highest_risk_page)
+        if filtered.empty:
+            st.warning("No analytics data for current filter.")
+        else:
+            pie_df = filtered["Issue Type"].value_counts().reset_index()
+            pie_df.columns = ["Issue Type", "Count"]
+            pie_fig = px.pie(
+                pie_df,
+                names="Issue Type",
+                values="Count",
+                title="Issue Type Split",
+                hole=0.35,
+            )
+            pie_fig.update_layout(margin=dict(l=10, r=10, t=50, b=10))
+            st.plotly_chart(pie_fig, use_container_width=True)
+            top_lines = filtered.sort_values(by=["Confidence"], ascending=False).head(10)
+            st.markdown("**Top 10 High-Risk Lines**")
+            st.dataframe(
+                top_lines[["Issue Type", "Confidence", "Page", "Line", "Vendor", "Vendee", "Snippet", "Reason"]],
+                use_container_width=True,
+            )
+    else:
+        st.info("No issue analytics data available.")
+    tab_findings, tab_line_map, tab_export = st.tabs(
+        ["Findings Table", "Error Line Map", "Export"]
+    )
+    with tab_findings:
+        st.subheader("Detected Issues")
+        left, right = st.columns([1, 1.1])
+        with left:
+            display_mode = st.radio(
+                "Display mode",
+                ["Issues Only", "All Analyzed Pairs"],
+                horizontal=True,
+            )
+        with right:
+            conf_threshold = st.slider("Minimum confidence", 0.0, 1.0, 0.0, 0.01)
+        display_df = issues_df if display_mode == "Issues Only" else df
+        display_df = display_df[display_df["Confidence"] >= conf_threshold]
+        if display_mode == "Issues Only" and display_df.empty:
+            st.warning("No issues match this filter.")
+            st.info("Try lower confidence or switch to 'All Analyzed Pairs'.")
+        elif display_df.empty:
+            st.info("No analyzed pairs match this filter.")
+        else:
+            display_df = display_df.copy().reset_index(drop=True)
+            display_df.insert(0, "S.No", range(1, len(display_df) + 1))
+            cols = [
+                "S.No",
+                "Label",
+                "Confidence",
+                "Reason",
+                "Location 1",
+                "Location 2",
+                "Clause 1",
+                "Clause 2",
+            ]
+            st.dataframe(display_df[cols], use_container_width=True)
+    with tab_line_map:
+        st.subheader("Error Line Dashboard")
+        if line_issues:
+            line_df = pd.DataFrame(line_issues)
+            labels = sorted(line_df["Issue Type"].dropna().unique().tolist())
+            selected = st.multiselect("Filter issue types", labels, default=labels)
+            page_min = int(line_df["Page"].min()) if not line_df.empty else 1
+            page_max = int(line_df["Page"].max()) if not line_df.empty else 1
+            if page_min == page_max:
+                st.caption(f"Only one page with issues: Page {page_min}")
+                page_range = (page_min, page_max)
+            else:
+                page_range = st.slider("Page range", page_min, page_max, (page_min, page_max))
+            if selected:
+                line_df = line_df[line_df["Issue Type"].isin(selected)]
+            line_df = line_df[(line_df["Page"] >= page_range[0]) & (line_df["Page"] <= page_range[1])]
+            st.dataframe(line_df, use_container_width=True)
+            st.markdown("**Issue Occurrence By Line With Parties**")
+            by_line = line_df.copy()
+            by_line = by_line.sort_values(by=["Page", "Line", "Confidence"], ascending=[True, True, False])
+            st.dataframe(
+                by_line[["Issue Type", "Page", "Line", "Vendor", "Vendee", "Confidence", "Reason"]],
+                use_container_width=True,
+            )
+            st.subheader("Jump To Error Line")
+            if not line_df.empty:
+                line_df = line_df.reset_index(drop=True)
+                line_df.insert(0, "Item", range(1, len(line_df) + 1))
+                line_df["Jump"] = line_df.apply(
+                    lambda r: f"#{r['Item']} | Pg {int(r['Page'])}, Ln {int(r['Line'])} | {r['Issue Type']}",
+                    axis=1,
+                )
+                selected_jump = st.selectbox("Select issue line", line_df["Jump"].tolist())
+                chosen = line_df[line_df["Jump"] == selected_jump].iloc[0]
+                c1, c2 = st.columns([1.1, 1], gap="large")
+                with c1:
+                    st.markdown(
+                        f"""
+                        <div class="mini-card">
+                            <div class="mini-label">Selected Line</div>
+                            <div class="mini-value">Pg {int(chosen['Page'])} · Ln {int(chosen['Line'])}</div>
+                            <div class="mono">{chosen['Issue Type']} | Confidence: {float(chosen['Confidence']):.2f}</div>
+                        </div>
+                        """,
+                        unsafe_allow_html=True,
+                    )
+                    st.caption("Snippet")
+                    st.code(str(chosen["Snippet"]), language="text")
+                    st.caption("Reason")
+                    st.write(str(chosen["Reason"]))
+                with c2:
+                    is_pdf = st.session_state.uploaded_ext == "pdf"
+                    if is_pdf and st.session_state.uploaded_bytes:
+                        st.caption("PDF Preview (jumped to selected page)")
+                        page_number = int(chosen["Page"])
+                        pdf_b64 = base64.b64encode(st.session_state.uploaded_bytes).decode("utf-8")
+                        pdf_html = f"""
+                        <iframe
+                            src="data:application/pdf;base64,{pdf_b64}#page={page_number}&zoom=110"
+                            width="100%"
+                            height="520"
+                            style="border:1px solid #d6e8f4; border-radius: 10px;"
+                        ></iframe>
+                        """
+                        st.markdown(pdf_html, unsafe_allow_html=True)
+                    else:
+                        st.info("Inline PDF preview is available for PDF uploads. Current file is not PDF.")
+        else:
+            st.info("No line-level issues to display.")
+    with tab_export:
+        st.subheader("Download Reports")
+        json_payload = json.dumps(results, indent=2)
+        st.download_button(
+            label="Download JSON Report",
+            data=json_payload,
+            file_name="semantic_integrity_report.json",
+            mime="application/json",
+        )
+        pdf_bytes = generate_pdf_report([r for r in results if r["Label"] != "NO_CONFLICT"])
+        st.download_button(
+            label="Download PDF Report",
+            data=pdf_bytes,
+            file_name="semantic_integrity_report.pdf",
+            mime="application/pdf",
+        )
+    if st.button("Analyze Another Document"):
+        st.session_state.analysis_done = False
+        st.session_state.results = []
+        st.session_state.line_issues = []
+        st.rerun()
+def main():
+    st.set_page_config(page_title=APP_TITLE, layout="wide")
+    apply_theme()
+    init_state()
+    top_col1, top_col2 = st.columns([5, 1])
+    with top_col1:
+        st.title(APP_TITLE)
+    with top_col2:
+        if st.session_state.is_authenticated and st.button("Logout"):
+            st.session_state.is_authenticated = False
+            st.session_state.username = ""
+            st.session_state.analysis_done = False
+            st.session_state.results = []
+            st.session_state.line_issues = []
+            st.rerun()
+    if not st.session_state.is_authenticated:
+        login_page()
+        return
+    if not st.session_state.analysis_done:
+        upload_page()
+    else:
+        dashboard_page()
+if __name__ == "__main__":
+    main()