Spaces:
Running
Running
first commit
Browse files- .gitignore +58 -0
- analysis/common_analyzer.py +288 -0
- analysis/consistency_check.py +4 -0
- analysis/contradiction_check.py +23 -0
- analysis/duplication_check.py +2 -0
- analysis/llama_legal_verifier.py +93 -0
- analysis/nli_validator.py +20 -0
- analysis/nli_verifier.py +56 -0
- analysis/similarity_search.py +3 -0
- auth/user_store.py +90 -0
- backend/README.md +22 -0
- backend/app.py +717 -0
- backend/requirements.txt +5 -0
- domain_rules/belongings_check.py +7 -0
- domain_rules/belongings_keywords.py +5 -0
- domain_rules/legal_rules.py +0 -0
- embeddings/sbert_encoder.py +25 -0
- frontend/README.md +36 -0
- frontend/app.js +509 -0
- frontend/assets/legal-tech-bg.svg +31 -0
- frontend/index.html +226 -0
- frontend/issues.html +48 -0
- frontend/styles.css +957 -0
- frontend/summary.html +59 -0
- frontend/upload.html +75 -0
- frontend/workflow.html +11 -0
- ingestion/docx_reader.py +5 -0
- ingestion/pdf_reader.py +9 -0
- main.py +43 -0
- preprocessing/clause_extraction.py +47 -0
- preprocessing/text_extractor.py +54 -0
- reproduce_issue.py +69 -0
- requirements.txt +11 -0
- storage/faiss_index.py +8 -0
- ui/app.py +871 -0
.gitignore
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OS / editor
|
| 2 |
+
.DS_Store
|
| 3 |
+
Thumbs.db
|
| 4 |
+
.idea/
|
| 5 |
+
.vscode/
|
| 6 |
+
|
| 7 |
+
# Python
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.py[cod]
|
| 10 |
+
*$py.class
|
| 11 |
+
.pytest_cache/
|
| 12 |
+
.mypy_cache/
|
| 13 |
+
.ruff_cache/
|
| 14 |
+
.coverage
|
| 15 |
+
.coverage.*
|
| 16 |
+
htmlcov/
|
| 17 |
+
dist/
|
| 18 |
+
build/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
|
| 21 |
+
# Virtual environments
|
| 22 |
+
venv/
|
| 23 |
+
.venv/
|
| 24 |
+
backend/.venv/
|
| 25 |
+
ENV/
|
| 26 |
+
env/
|
| 27 |
+
|
| 28 |
+
# Environment files
|
| 29 |
+
.env
|
| 30 |
+
.env.*
|
| 31 |
+
!.env.example
|
| 32 |
+
|
| 33 |
+
# Logs
|
| 34 |
+
*.log
|
| 35 |
+
logs/
|
| 36 |
+
|
| 37 |
+
# Databases / local state
|
| 38 |
+
*.db
|
| 39 |
+
*.sqlite
|
| 40 |
+
*.sqlite3
|
| 41 |
+
|
| 42 |
+
# Runtime artifacts
|
| 43 |
+
output/
|
| 44 |
+
tmp/
|
| 45 |
+
*.tmp
|
| 46 |
+
|
| 47 |
+
# Frontend
|
| 48 |
+
node_modules/
|
| 49 |
+
.next/
|
| 50 |
+
coverage/
|
| 51 |
+
|
| 52 |
+
# Local model/checkpoint artifacts (large)
|
| 53 |
+
merged_tinyllama_instruction/
|
| 54 |
+
*.bin
|
| 55 |
+
*.pt
|
| 56 |
+
*.ckpt
|
| 57 |
+
*.safetensors
|
| 58 |
+
|
analysis/common_analyzer.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Strict Domain Analyzer for Legal Documents.
|
| 3 |
+
Implements specific checks for:
|
| 4 |
+
- Entity Roles (Vendor vs Vendee)
|
| 5 |
+
- Domain Categories (Financial, Possession, Ownership, etc.)
|
| 6 |
+
- Timeline Logic (Agreement vs Registration)
|
| 7 |
+
- Numeric Consistency within context
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
# =========================
|
| 13 |
+
# 1. STRICT CLASSIFICATION
|
| 14 |
+
# =========================
|
| 15 |
+
|
| 16 |
+
def is_legal_boilerplate(text):
|
| 17 |
+
"""Detects standard legal headers, footers, and witness blocks."""
|
| 18 |
+
t = text.lower()
|
| 19 |
+
patterns = [
|
| 20 |
+
"in witness whereof", "signed and delivered", "witnesses:",
|
| 21 |
+
"schedule", "jurisdiction", "arbitration", "notice",
|
| 22 |
+
"all that piece and parcel", "north by", "south by"
|
| 23 |
+
]
|
| 24 |
+
# If it's very short (< 5 words) and contains a keyword
|
| 25 |
+
words = t.split()
|
| 26 |
+
if len(words) < 5 and any(p in t for p in patterns):
|
| 27 |
+
return True
|
| 28 |
+
|
| 29 |
+
# If it's just a signature block
|
| 30 |
+
if "signed by" in t or "witness" in t:
|
| 31 |
+
return True
|
| 32 |
+
|
| 33 |
+
return False
|
| 34 |
+
|
| 35 |
+
def get_clause_domain(text):
|
| 36 |
+
"""
|
| 37 |
+
Classify clause into strict legal domains.
|
| 38 |
+
Returns: 'FINANCIAL', 'POSSESSION', 'OWNERSHIP', 'ENCUMBRANCE', 'ADMINISTRATIVE', 'RECITAL', 'DEFINITION', 'OPERATIVE' or 'GENERAL'
|
| 39 |
+
"""
|
| 40 |
+
t = text.lower()
|
| 41 |
+
|
| 42 |
+
# 1. RECITAL (Background)
|
| 43 |
+
if t.startswith("whereas") or "and whereas" in t:
|
| 44 |
+
return "RECITAL"
|
| 45 |
+
|
| 46 |
+
# 2. DEFINITION
|
| 47 |
+
if "shall mean" in t or "expression vendor" in t or "expression vendee" in t:
|
| 48 |
+
return "DEFINITION"
|
| 49 |
+
|
| 50 |
+
# 3. FINANCIAL (Money, Consideration)
|
| 51 |
+
if any(w in t for w in ["rs.", "rupees", "paid", "consideration", "sum of", "amount", "price", "cheque", "bank"]):
|
| 52 |
+
return "FINANCIAL"
|
| 53 |
+
|
| 54 |
+
# 4. POSSESSION (Handover, Vacant)
|
| 55 |
+
if any(w in t for w in ["possession", "handed over", "delivered", "vacant"]):
|
| 56 |
+
return "POSSESSION"
|
| 57 |
+
|
| 58 |
+
# 5. OWNERSHIP / TITLE
|
| 59 |
+
if any(w in t for w in ["owner", "title", "interest", "rights", "absolute", "fee simple"]):
|
| 60 |
+
return "OWNERSHIP"
|
| 61 |
+
|
| 62 |
+
# 6. ENCUMBRANCE (Loans, Mortgages)
|
| 63 |
+
if any(w in t for w in ["encumbrance", "mortgage", "loan", "charge", "lien", "litigation"]):
|
| 64 |
+
return "ENCUMBRANCE"
|
| 65 |
+
|
| 66 |
+
# 7. ADMINISTRATIVE (Boilerplate)
|
| 67 |
+
if any(w in t for w in ["witness", "signed", "schedule", "jurisdiction", "arbitration", "notice"]):
|
| 68 |
+
return "ADMINISTRATIVE"
|
| 69 |
+
|
| 70 |
+
# 8. OPERATIVE (Action)
|
| 71 |
+
if t.startswith("that") or "hereby" in t or "now this deed" in t:
|
| 72 |
+
return "OPERATIVE"
|
| 73 |
+
|
| 74 |
+
return "GENERAL"
|
| 75 |
+
|
| 76 |
+
def get_entities(text):
|
| 77 |
+
"""
|
| 78 |
+
Strictly detect if clause belongs to a specific entity.
|
| 79 |
+
"""
|
| 80 |
+
t = text.lower()
|
| 81 |
+
entities = set()
|
| 82 |
+
if "vendor" in t: entities.add("Vendor")
|
| 83 |
+
if "vendee" in t: entities.add("Vendee")
|
| 84 |
+
return entities
|
| 85 |
+
|
| 86 |
+
# =========================
|
| 87 |
+
# 2. EXTRACTION HELPERS
|
| 88 |
+
# =========================
|
| 89 |
+
|
| 90 |
+
def extract_numbers(text):
|
| 91 |
+
"""Extract numeric values for comparison."""
|
| 92 |
+
# Matches Rs. 100, 1,00,000, 500 sq ft (just the numbers)
|
| 93 |
+
return [int(n.replace(",", "")) for n in re.findall(r'\b\d{1,3}(?:,\d{3})*\b', text)]
|
| 94 |
+
|
| 95 |
+
def has_negation(text):
|
| 96 |
+
neg_words = ["not", "never", "no", "cannot", "must not", "shall not"]
|
| 97 |
+
return any(w in text.lower() for w in neg_words)
|
| 98 |
+
|
| 99 |
+
def has_exception_language(text):
|
| 100 |
+
"""Detects legal exception/qualification identifiers."""
|
| 101 |
+
qualifiers = [
|
| 102 |
+
"subject to", "notwithstanding", "except as provided",
|
| 103 |
+
"unless otherwise", "provided however", "without prejudice"
|
| 104 |
+
]
|
| 105 |
+
return any(q in text.lower() for q in qualifiers)
|
| 106 |
+
|
| 107 |
+
def is_definition(text):
|
| 108 |
+
"""Strictly checks if a clause is a definition."""
|
| 109 |
+
t = text.lower()
|
| 110 |
+
if "shall mean" in t or "means" in t or "defined as" in t:
|
| 111 |
+
return True
|
| 112 |
+
return False
|
| 113 |
+
|
| 114 |
+
def is_party_intro(text):
|
| 115 |
+
"""Detects if a clause is just listing a party description."""
|
| 116 |
+
t = text.lower()
|
| 117 |
+
|
| 118 |
+
# Strong Indicators: Address patterns, Relations, IDs
|
| 119 |
+
# Regex for "Door No", "D.No", "residing at"
|
| 120 |
+
address_pattern = r"(door\s*no|d\.no|residing\s*at|post\s*,\s*village)"
|
| 121 |
+
|
| 122 |
+
# Regex for relations: "son of", "wife of", "daughter of", "w/o", "s/o", or just "son", "wife" in context
|
| 123 |
+
relation_pattern = r"\b(son|wife|daughter|husband|father|mother|s/o|w/o|d/o)\b"
|
| 124 |
+
|
| 125 |
+
# Regex for IDs: "aadhaar", "pan no", "id card"
|
| 126 |
+
id_pattern = r"(aadhaar|pan\s*no|id\s*card|mobile\s*no)"
|
| 127 |
+
|
| 128 |
+
# Check for presence of these patterns
|
| 129 |
+
has_address = re.search(address_pattern, t)
|
| 130 |
+
has_relation = re.search(relation_pattern, t)
|
| 131 |
+
has_id = re.search(id_pattern, t)
|
| 132 |
+
|
| 133 |
+
# If it has at least 2 strong components (e.g. Relation + ID, or Address + Relation), it's a bio
|
| 134 |
+
score = 0
|
| 135 |
+
if has_address: score += 1
|
| 136 |
+
if has_relation: score += 1
|
| 137 |
+
if has_id: score += 1
|
| 138 |
+
|
| 139 |
+
return score >= 2
|
| 140 |
+
|
| 141 |
+
# =========================
|
| 142 |
+
# 3. CORE LOGIC GATES
|
| 143 |
+
# =========================
|
| 144 |
+
|
| 145 |
+
def analyze_pair(text1, text2, similarity, threshold=0.75):
|
| 146 |
+
"""
|
| 147 |
+
Strict Analyzer returning (Label, Score, Reason).
|
| 148 |
+
Args:
|
| 149 |
+
threshold: Minimum similarity score to consider as CANDIDATE (default 0.75)
|
| 150 |
+
"""
|
| 151 |
+
# Force Reload Trigger
|
| 152 |
+
|
| 153 |
+
# --- GATE 0: BOILERPLATE CHECK ---
|
| 154 |
+
if is_legal_boilerplate(text1) or is_legal_boilerplate(text2):
|
| 155 |
+
return None, 0.0, "Boilerplate (Skipped)"
|
| 156 |
+
|
| 157 |
+
# --- GATE 1: DOMAIN MISMATCH ---
|
| 158 |
+
d1 = get_clause_domain(text1)
|
| 159 |
+
d2 = get_clause_domain(text2)
|
| 160 |
+
|
| 161 |
+
# If domains are totally different, SKIP.
|
| 162 |
+
# Exception: OPERATIVE and GENERAL might overlap, but strictly FINANCIAL vs POSSESSION should skip.
|
| 163 |
+
if d1 != "GENERAL" and d2 != "GENERAL" and d1 != d2:
|
| 164 |
+
# RELAXATION: Only bypass if similarity is VERY high (suggesting misclassification).
|
| 165 |
+
# Otherwise, DO NOT compare apples (Financial) to oranges (Possession),
|
| 166 |
+
# even in Deep Search mode.
|
| 167 |
+
if similarity < 0.85:
|
| 168 |
+
return None, 0.0, "Domain Mismatch"
|
| 169 |
+
|
| 170 |
+
# --- HARDENED CHECK: GENERAL vs SPECIFIC ---
|
| 171 |
+
# Common source of noise: "Any other details" matching "The price is Rs 100"
|
| 172 |
+
# Block GENERAL vs Specific unless similarity is high
|
| 173 |
+
if (d1 == "GENERAL" and d2 != "GENERAL") or (d2 == "GENERAL" and d1 != "GENERAL"):
|
| 174 |
+
if similarity < 0.80:
|
| 175 |
+
return None, 0.0, "General vs Specific Domain (Skipped)"
|
| 176 |
+
|
| 177 |
+
# --- SPECIFIC FILTER: MONEY vs TIMELINE ---
|
| 178 |
+
# Prevents "Price is X" vs "Payment due on Date Y" (confusing numbers/dates)
|
| 179 |
+
# Check if one clause is purely FINANCIAL and other is purely TIMELINE/DATE based
|
| 180 |
+
is_financial = d1 == "FINANCIAL" or d2 == "FINANCIAL"
|
| 181 |
+
has_date = re.search(r"\d{1,2}[./-]\d{1,2}[./-]\d{2,4}", text1) or \
|
| 182 |
+
re.search(r"\d{1,2}[./-]\d{1,2}[./-]\d{2,4}", text2)
|
| 183 |
+
|
| 184 |
+
if is_financial and has_date:
|
| 185 |
+
# If one talks about Price/Amount and other has a Date,
|
| 186 |
+
# unless they are explicitly about "Payment Schedule", they are likely different.
|
| 187 |
+
if "schedule" not in text1.lower() and "schedule" not in text2.lower():
|
| 188 |
+
if similarity < 0.85:
|
| 189 |
+
return None, 0.0, "Financial vs Timeline Mismatch"
|
| 190 |
+
|
| 191 |
+
# --- SPECIFIC FILTER: ELIGIBILITY vs ASSISTANCE ---
|
| 192 |
+
# Prevents "Eligibility criteria" vs "Assistance details" (Common in schemes)
|
| 193 |
+
# Check for keywords like "eligible", "qualify" vs "grant", "support", "help"
|
| 194 |
+
t1_lower, t2_lower = text1.lower(), text2.lower()
|
| 195 |
+
is_eligibility = any(w in t1_lower for w in ["eligible", "qualify", "criteria", "requirement"]) or \
|
| 196 |
+
any(w in t2_lower for w in ["eligible", "qualify", "criteria", "requirement"])
|
| 197 |
+
is_assistance = any(w in t1_lower for w in ["provide", "grant", "subsidy", "support", "assistance"]) or \
|
| 198 |
+
any(w in t2_lower for w in ["provide", "grant", "subsidy", "support", "assistance"])
|
| 199 |
+
|
| 200 |
+
if is_eligibility and is_assistance:
|
| 201 |
+
# Unless precise overlap, these are distinct sections
|
| 202 |
+
if similarity < 0.85:
|
| 203 |
+
return None, 0.0, "Eligibility vs Assistance Mismatch"
|
| 204 |
+
|
| 205 |
+
# --- GATE 1.5: PARTY DESCRIPTION CHECK ---
|
| 206 |
+
# If both clauses are just descriptions of people (addresses, relations), skip.
|
| 207 |
+
if is_party_intro(text1) and is_party_intro(text2):
|
| 208 |
+
return None, 0.0, "Party Description (Skipped)"
|
| 209 |
+
|
| 210 |
+
# --- GATE 2: ENTITY MISMATCH ---
|
| 211 |
+
e1 = get_entities(text1)
|
| 212 |
+
e2 = get_entities(text2)
|
| 213 |
+
# If one is Vendor ONLY and other is Vendee ONLY -> SKIP
|
| 214 |
+
if e1 and e2 and e1 != e2 and not (e1 & e2):
|
| 215 |
+
# RELAXATION: Only bypass if similarity is VERY high.
|
| 216 |
+
if similarity < 0.85:
|
| 217 |
+
return None, 0.0, "Entity Role Mismatch"
|
| 218 |
+
|
| 219 |
+
# --- GATE 2.5: DEFINITION GUARD ---
|
| 220 |
+
# Don't compare definitions with operative clauses generally
|
| 221 |
+
if is_definition(text1) or is_definition(text2):
|
| 222 |
+
# Only compare if both are definitions (conflicting definitions)
|
| 223 |
+
if not (is_definition(text1) and is_definition(text2)):
|
| 224 |
+
return None, 0.0, "Definition vs Operative"
|
| 225 |
+
|
| 226 |
+
# --- GATE 3: POSSESSION TIMELINE ---
|
| 227 |
+
# "Possession at agreement" vs "Possession at registration" is NOT a contradiction.
|
| 228 |
+
if d1 == "POSSESSION" and d2 == "POSSESSION":
|
| 229 |
+
keywords_a = ["agreement", "earnest"]
|
| 230 |
+
keywords_b = ["registration", "sale deed", "final"]
|
| 231 |
+
|
| 232 |
+
has_a = any(k in text1.lower() for k in keywords_a)
|
| 233 |
+
has_b = any(k in text2.lower() for k in keywords_b)
|
| 234 |
+
|
| 235 |
+
# If one talks about start and other about end, it's a sequence.
|
| 236 |
+
if (has_a and any(k in text2.lower() for k in keywords_b)) or \
|
| 237 |
+
(has_b and any(k in text1.lower() for k in keywords_a)):
|
| 238 |
+
return None, 0.0, "Possession Timeline Sequence"
|
| 239 |
+
|
| 240 |
+
# --- GATE 4: NUMERIC REASONING ---
|
| 241 |
+
# Only compare numbers if context allows
|
| 242 |
+
nums1 = extract_numbers(text1)
|
| 243 |
+
nums2 = extract_numbers(text2)
|
| 244 |
+
|
| 245 |
+
if nums1 and nums2 and nums1 != nums2:
|
| 246 |
+
# MAGNITUDE CHECK: If numbers differ by > 100x, likely different units (e.g. Price vs Area)
|
| 247 |
+
# e.g. 5,50,000 vs 1.25 -> Ratio is huge.
|
| 248 |
+
max1, max2 = max(nums1), max(nums2)
|
| 249 |
+
if max1 > 0 and max2 > 0:
|
| 250 |
+
ratio = max1 / max2 if max1 > max2 else max2 / max1
|
| 251 |
+
if ratio > 100:
|
| 252 |
+
return None, 0.0, "Numeric Magnitude Mismatch (Likely Unit Diff)"
|
| 253 |
+
|
| 254 |
+
# Check if they are in the same domain (likely valid comparison)
|
| 255 |
+
if d1 == d2 and d1 != "GENERAL":
|
| 256 |
+
return "NUMERIC_INCONSISTENCY", 0.9, f"Mismatch in {d1} values"
|
| 257 |
+
|
| 258 |
+
# If General, be careful.
|
| 259 |
+
# But if similarity is VERY high, it might be a contradiction.
|
| 260 |
+
if similarity > 0.9:
|
| 261 |
+
return "NUMERIC_INCONSISTENCY", 0.85, "Numeric Mismatch in similar context"
|
| 262 |
+
|
| 263 |
+
# --- GATE 4.5: EXCEPTION/HIERARCHY CHECK ---
|
| 264 |
+
# If high similarity but one has exception language
|
| 265 |
+
# We use a slightly lower threshold for exception detection to be safe
|
| 266 |
+
exception_threshold = max(0.65, threshold - 0.05)
|
| 267 |
+
if similarity > exception_threshold:
|
| 268 |
+
has_ex1 = has_exception_language(text1)
|
| 269 |
+
has_ex2 = has_exception_language(text2)
|
| 270 |
+
|
| 271 |
+
if (has_ex1 and not has_ex2) or (has_ex2 and not has_ex1):
|
| 272 |
+
return "QUALIFICATION", similarity, "Legal Exception/Qualification detected (Not a Conflict)"
|
| 273 |
+
|
| 274 |
+
# --- GATE 5: LOGICAL NEGATION ---
|
| 275 |
+
if (has_negation(text1) and not has_negation(text2)) or \
|
| 276 |
+
(has_negation(text2) and not has_negation(text1)):
|
| 277 |
+
# Only flag if high similarity implies they are talking about the same thing
|
| 278 |
+
# Negation check requires fairly high confidence they are related
|
| 279 |
+
if similarity > 0.85:
|
| 280 |
+
return "LEGAL_CONFLICT", 0.8, "Logical Negation detected"
|
| 281 |
+
|
| 282 |
+
# --- FINAL GATE: CANDIDATE FOR NLI ---
|
| 283 |
+
# If we are here, we passed the blocks.
|
| 284 |
+
# If similarity is high, let NLI decide.
|
| 285 |
+
if similarity > threshold:
|
| 286 |
+
return "CANDIDATE", similarity, "High Similarity - Pending NLI"
|
| 287 |
+
|
| 288 |
+
return None, 0.0, "Low Similarity"
|
analysis/consistency_check.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def check_inconsistency(text1, text2):
|
| 2 |
+
keywords = ["shall", "must", "may"]
|
| 3 |
+
return any(k in text1.lower() for k in keywords) and \
|
| 4 |
+
any(k in text2.lower() for k in keywords)
|
analysis/contradiction_check.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
def extract_number(text):
|
| 4 |
+
match = re.search(r'INR\s*([\d,]+)', text)
|
| 5 |
+
if match:
|
| 6 |
+
return int(match.group(1).replace(",", ""))
|
| 7 |
+
return None
|
| 8 |
+
|
| 9 |
+
def numeric_contradiction(text1, text2):
|
| 10 |
+
n1 = extract_number(text1)
|
| 11 |
+
n2 = extract_number(text2)
|
| 12 |
+
return n1 is not None and n2 is not None and n1 != n2
|
| 13 |
+
|
| 14 |
+
def ownership_contradiction(text1, text2):
|
| 15 |
+
t1 = text1.lower()
|
| 16 |
+
t2 = text2.lower()
|
| 17 |
+
return (
|
| 18 |
+
("must not own" in t1 and "may be eligible" in t2) or
|
| 19 |
+
("must not own" in t2 and "may be eligible" in t1)
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def check_contradiction(text1, text2):
|
| 23 |
+
return numeric_contradiction(text1, text2) or ownership_contradiction(text1, text2)
|
analysis/duplication_check.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def check_duplication(similarity, threshold=0.90):
|
| 2 |
+
return similarity >= threshold
|
analysis/llama_legal_verifier.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
from typing import Tuple
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class LlamaLegalVerifier:
|
| 10 |
+
"""
|
| 11 |
+
Verifies whether two legal clauses are contradictory, entailing, or neutral
|
| 12 |
+
using a local fine-tuned causal language model.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __init__(self, model_path: str):
|
| 16 |
+
if not os.path.isdir(model_path):
|
| 17 |
+
raise FileNotFoundError(f"Model path not found: {model_path}")
|
| 18 |
+
|
| 19 |
+
self.model_path = model_path
|
| 20 |
+
self.device = 0 if torch.cuda.is_available() else -1
|
| 21 |
+
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 22 |
+
|
| 23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
|
| 24 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 25 |
+
model_path,
|
| 26 |
+
local_files_only=True,
|
| 27 |
+
torch_dtype=dtype,
|
| 28 |
+
)
|
| 29 |
+
if tokenizer.pad_token_id is None:
|
| 30 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 31 |
+
|
| 32 |
+
self.generator = pipeline(
|
| 33 |
+
"text-generation",
|
| 34 |
+
model=model,
|
| 35 |
+
tokenizer=tokenizer,
|
| 36 |
+
device=self.device,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
@staticmethod
|
| 40 |
+
def _parse_label(text: str) -> str:
|
| 41 |
+
lowered = text.lower()
|
| 42 |
+
if "contradiction" in lowered:
|
| 43 |
+
return "Contradiction"
|
| 44 |
+
if "entailment" in lowered or "duplicate" in lowered or "same meaning" in lowered:
|
| 45 |
+
return "Entailment"
|
| 46 |
+
if "neutral" in lowered:
|
| 47 |
+
return "Neutral"
|
| 48 |
+
return "Neutral"
|
| 49 |
+
|
| 50 |
+
@staticmethod
|
| 51 |
+
def _parse_confidence(text: str) -> float:
|
| 52 |
+
matches = re.findall(r"(?<!\d)(0(?:\.\d+)?|1(?:\.0+)?)(?!\d)", text)
|
| 53 |
+
if matches:
|
| 54 |
+
try:
|
| 55 |
+
value = float(matches[0])
|
| 56 |
+
return max(0.0, min(1.0, value))
|
| 57 |
+
except ValueError:
|
| 58 |
+
return 0.60
|
| 59 |
+
return 0.60
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
def _parse_reason(text: str) -> str:
|
| 63 |
+
m = re.search(r"reason\s*:\s*(.+)", text, flags=re.IGNORECASE | re.DOTALL)
|
| 64 |
+
if m:
|
| 65 |
+
return m.group(1).strip()[:300]
|
| 66 |
+
return text.strip()[:300]
|
| 67 |
+
|
| 68 |
+
def predict(self, text1: str, text2: str) -> Tuple[bool, float, str, str]:
|
| 69 |
+
prompt = f"""You are a legal NLI verifier.
|
| 70 |
+
Classify relationship between Clause A and Clause B.
|
| 71 |
+
Allowed labels: Contradiction, Entailment, Neutral.
|
| 72 |
+
Return exactly in this format:
|
| 73 |
+
Label: <Contradiction|Entailment|Neutral>
|
| 74 |
+
Confidence: <0.00-1.00>
|
| 75 |
+
Reason: <one short legal reason>
|
| 76 |
+
|
| 77 |
+
Clause A: {text1}
|
| 78 |
+
Clause B: {text2}
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
output = self.generator(
|
| 82 |
+
prompt,
|
| 83 |
+
max_new_tokens=96,
|
| 84 |
+
do_sample=False,
|
| 85 |
+
return_full_text=False,
|
| 86 |
+
pad_token_id=self.generator.tokenizer.eos_token_id,
|
| 87 |
+
)[0]["generated_text"]
|
| 88 |
+
|
| 89 |
+
label = self._parse_label(output)
|
| 90 |
+
confidence = self._parse_confidence(output)
|
| 91 |
+
reason = self._parse_reason(output)
|
| 92 |
+
is_contradiction = label == "Contradiction" and confidence >= 0.50
|
| 93 |
+
return is_contradiction, confidence, label, reason
|
analysis/nli_validator.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
|
| 3 |
+
# Load once (slow only first time)
|
| 4 |
+
nli_pipeline = pipeline(
|
| 5 |
+
"text-classification",
|
| 6 |
+
model="roberta-large-mnli",
|
| 7 |
+
device=-1 # CPU
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
def nli_contradiction(text1, text2, threshold=0.8):
|
| 11 |
+
"""
|
| 12 |
+
Returns True if NLI model strongly predicts contradiction
|
| 13 |
+
"""
|
| 14 |
+
input_text = f"{text1} </s></s> {text2}"
|
| 15 |
+
result = nli_pipeline(input_text)[0]
|
| 16 |
+
|
| 17 |
+
return (
|
| 18 |
+
result["label"] == "CONTRADICTION" and
|
| 19 |
+
result["score"] >= threshold
|
| 20 |
+
)
|
analysis/nli_verifier.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sentence_transformers import CrossEncoder
|
| 4 |
+
from huggingface_hub import login
|
| 5 |
+
|
| 6 |
+
class NLIVerifier:
|
| 7 |
+
def __init__(self, model_name="cross-encoder/nli-distilroberta-base", hf_token=None):
|
| 8 |
+
"""
|
| 9 |
+
Initialize the NLI model using CrossEncoder.
|
| 10 |
+
"""
|
| 11 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 12 |
+
print(f"Loading NLI Model ({self.device})...")
|
| 13 |
+
|
| 14 |
+
if hf_token:
|
| 15 |
+
try:
|
| 16 |
+
login(token=hf_token)
|
| 17 |
+
print("Logged in to Hugging Face.")
|
| 18 |
+
except Exception as e:
|
| 19 |
+
print(f"HF Login Warning: {e}")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
self.model = CrossEncoder(model_name, device=self.device)
|
| 23 |
+
print("NLI Model Loaded Successfully.")
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f"Error loading model: {e}")
|
| 26 |
+
self.model = None
|
| 27 |
+
|
| 28 |
+
# Label mapping for cross-encoder/nli-distilroberta-base
|
| 29 |
+
# 0: Contradiction
|
| 30 |
+
# 1: Entailment
|
| 31 |
+
# 2: Neutral
|
| 32 |
+
self.labels = ["Contradiction", "Entailment", "Neutral"]
|
| 33 |
+
|
| 34 |
+
def predict(self, text1, text2):
|
| 35 |
+
"""
|
| 36 |
+
Verify if text1 and text2 contradict each other.
|
| 37 |
+
Returns: (IsContradiction: bool, Confidence: float, Label: str)
|
| 38 |
+
"""
|
| 39 |
+
if not self.model:
|
| 40 |
+
return False, 0.0, "Model Error"
|
| 41 |
+
|
| 42 |
+
# CrossEncoder returns logits
|
| 43 |
+
scores = self.model.predict([(text1, text2)])[0]
|
| 44 |
+
|
| 45 |
+
# Apply softmax to get probabilities
|
| 46 |
+
exp_scores = np.exp(scores)
|
| 47 |
+
probs = exp_scores / np.sum(exp_scores)
|
| 48 |
+
|
| 49 |
+
pred_label_idx = probs.argmax()
|
| 50 |
+
confidence = probs[pred_label_idx]
|
| 51 |
+
label = self.labels[pred_label_idx]
|
| 52 |
+
|
| 53 |
+
# Check if Contradiction (Index 0) is the winner with high confidence
|
| 54 |
+
is_contradiction = (pred_label_idx == 0 and confidence > 0.5)
|
| 55 |
+
|
| 56 |
+
return is_contradiction, float(confidence), label
|
analysis/similarity_search.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_similar(index, vector, k=5):
|
| 2 |
+
distances, indices = index.search(vector.reshape(1, -1), k)
|
| 3 |
+
return indices[0], distances[0]
|
auth/user_store.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import os
|
| 3 |
+
import secrets
|
| 4 |
+
import sqlite3
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Tuple
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
| 10 |
+
DATA_DIR = PROJECT_ROOT / "data"
|
| 11 |
+
DB_PATH = DATA_DIR / "users.db"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _ensure_db() -> None:
|
| 15 |
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 16 |
+
conn = sqlite3.connect(DB_PATH)
|
| 17 |
+
try:
|
| 18 |
+
conn.execute(
|
| 19 |
+
"""
|
| 20 |
+
CREATE TABLE IF NOT EXISTS users (
|
| 21 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 22 |
+
username TEXT UNIQUE NOT NULL,
|
| 23 |
+
password_hash TEXT NOT NULL,
|
| 24 |
+
salt TEXT NOT NULL,
|
| 25 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 26 |
+
)
|
| 27 |
+
"""
|
| 28 |
+
)
|
| 29 |
+
conn.commit()
|
| 30 |
+
finally:
|
| 31 |
+
conn.close()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _hash_password(password: str, salt_hex: str) -> str:
|
| 35 |
+
salt = bytes.fromhex(salt_hex)
|
| 36 |
+
digest = hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt, 120_000)
|
| 37 |
+
return digest.hex()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _normalize_username(username: str) -> str:
|
| 41 |
+
return username.strip().lower()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def create_user(username: str, password: str) -> Tuple[bool, str]:
|
| 45 |
+
_ensure_db()
|
| 46 |
+
normalized = _normalize_username(username)
|
| 47 |
+
|
| 48 |
+
if len(normalized) < 3:
|
| 49 |
+
return False, "Username must be at least 3 characters."
|
| 50 |
+
if len(password) < 8:
|
| 51 |
+
return False, "Password must be at least 8 characters."
|
| 52 |
+
|
| 53 |
+
salt_hex = secrets.token_hex(16)
|
| 54 |
+
password_hash = _hash_password(password, salt_hex)
|
| 55 |
+
|
| 56 |
+
conn = sqlite3.connect(DB_PATH)
|
| 57 |
+
try:
|
| 58 |
+
conn.execute(
|
| 59 |
+
"INSERT INTO users (username, password_hash, salt) VALUES (?, ?, ?)",
|
| 60 |
+
(normalized, password_hash, salt_hex),
|
| 61 |
+
)
|
| 62 |
+
conn.commit()
|
| 63 |
+
return True, "Account created successfully."
|
| 64 |
+
except sqlite3.IntegrityError:
|
| 65 |
+
return False, "Username already exists."
|
| 66 |
+
finally:
|
| 67 |
+
conn.close()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def authenticate_user(username: str, password: str) -> Tuple[bool, str]:
|
| 71 |
+
_ensure_db()
|
| 72 |
+
normalized = _normalize_username(username)
|
| 73 |
+
|
| 74 |
+
conn = sqlite3.connect(DB_PATH)
|
| 75 |
+
try:
|
| 76 |
+
row = conn.execute(
|
| 77 |
+
"SELECT password_hash, salt FROM users WHERE username = ?",
|
| 78 |
+
(normalized,),
|
| 79 |
+
).fetchone()
|
| 80 |
+
finally:
|
| 81 |
+
conn.close()
|
| 82 |
+
|
| 83 |
+
if not row:
|
| 84 |
+
return False, "User not found."
|
| 85 |
+
|
| 86 |
+
stored_hash, salt_hex = row
|
| 87 |
+
candidate_hash = _hash_password(password, salt_hex)
|
| 88 |
+
if candidate_hash != stored_hash:
|
| 89 |
+
return False, "Incorrect password."
|
| 90 |
+
return True, "Login successful."
|
backend/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Backend (Flask + SQLite)
|
| 2 |
+
|
| 3 |
+
## Setup
|
| 4 |
+
|
| 5 |
+
```bash
|
| 6 |
+
cd backend
|
| 7 |
+
python3 -m venv .venv
|
| 8 |
+
source .venv/bin/activate
|
| 9 |
+
pip install -r requirements.txt
|
| 10 |
+
python app.py
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
Server runs on `http://127.0.0.1:5000`.
|
| 14 |
+
|
| 15 |
+
## APIs
|
| 16 |
+
|
| 17 |
+
- `GET /api/health`
|
| 18 |
+
- `POST /api/register`
|
| 19 |
+
- `POST /api/login`
|
| 20 |
+
- `POST /api/analyze` (multipart form: `file`, `scanMode`)
|
| 21 |
+
|
| 22 |
+
SQLite database file is created at `backend/app.db`.
|
backend/app.py
ADDED
|
@@ -0,0 +1,717 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import os
|
| 5 |
+
import sqlite3
|
| 6 |
+
import sys
|
| 7 |
+
from difflib import SequenceMatcher
|
| 8 |
+
from datetime import datetime, timezone
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from flask import Flask, jsonify, request
|
| 12 |
+
from flask_cors import CORS
|
| 13 |
+
from werkzeug.security import check_password_hash, generate_password_hash
|
| 14 |
+
|
| 15 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 16 |
+
PROJECT_ROOT = BASE_DIR.parent
|
| 17 |
+
DB_PATH = Path(os.getenv("DB_PATH", BASE_DIR / "app.db"))
|
| 18 |
+
|
| 19 |
+
app = Flask(__name__)
|
| 20 |
+
CORS(app)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _bootstrap_site_packages() -> None:
|
| 24 |
+
"""
|
| 25 |
+
Make backend resilient when dependencies are split across:
|
| 26 |
+
- project venv site-packages
|
| 27 |
+
- user local site-packages (~/.local)
|
| 28 |
+
"""
|
| 29 |
+
py_ver = f"{sys.version_info.major}.{sys.version_info.minor}"
|
| 30 |
+
candidate_paths = [
|
| 31 |
+
PROJECT_ROOT / "venv" / "lib" / f"python{py_ver}" / "site-packages",
|
| 32 |
+
Path.home() / ".local" / "lib" / f"python{py_ver}" / "site-packages",
|
| 33 |
+
]
|
| 34 |
+
for path in candidate_paths:
|
| 35 |
+
path_str = str(path)
|
| 36 |
+
if path.exists() and path_str not in sys.path:
|
| 37 |
+
sys.path.append(path_str)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
_bootstrap_site_packages()
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_db_connection() -> sqlite3.Connection:
|
| 44 |
+
conn = sqlite3.connect(DB_PATH)
|
| 45 |
+
conn.row_factory = sqlite3.Row
|
| 46 |
+
return conn
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def init_db() -> None:
|
| 50 |
+
with get_db_connection() as conn:
|
| 51 |
+
conn.execute(
|
| 52 |
+
"""
|
| 53 |
+
CREATE TABLE IF NOT EXISTS users (
|
| 54 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 55 |
+
full_name TEXT NOT NULL,
|
| 56 |
+
email TEXT NOT NULL UNIQUE,
|
| 57 |
+
password_hash TEXT NOT NULL,
|
| 58 |
+
created_at TEXT NOT NULL
|
| 59 |
+
)
|
| 60 |
+
"""
|
| 61 |
+
)
|
| 62 |
+
conn.commit()
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _extract_text_data(file_bytes: bytes, file_ext: str):
|
| 66 |
+
if file_ext == "txt":
|
| 67 |
+
return [{"text": file_bytes.decode("utf-8", errors="ignore"), "page": 1}]
|
| 68 |
+
|
| 69 |
+
if file_ext == "pdf":
|
| 70 |
+
import pdfplumber
|
| 71 |
+
|
| 72 |
+
extracted = []
|
| 73 |
+
with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
|
| 74 |
+
for i, page in enumerate(pdf.pages):
|
| 75 |
+
text = page.extract_text() or ""
|
| 76 |
+
if text.strip():
|
| 77 |
+
extracted.append({"text": text, "page": i + 1})
|
| 78 |
+
return extracted
|
| 79 |
+
|
| 80 |
+
if file_ext == "docx":
|
| 81 |
+
import docx
|
| 82 |
+
|
| 83 |
+
doc = docx.Document(io.BytesIO(file_bytes))
|
| 84 |
+
text = "\n".join(p.text for p in doc.paragraphs if p.text is not None)
|
| 85 |
+
return [{"text": text, "page": 1}] if text.strip() else []
|
| 86 |
+
|
| 87 |
+
raise ValueError("Unsupported file type. Use PDF, DOCX, or TXT.")
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _extract_clauses(text_data):
|
| 91 |
+
import re
|
| 92 |
+
|
| 93 |
+
clauses = []
|
| 94 |
+
clause_id = 0
|
| 95 |
+
|
| 96 |
+
for chunk in text_data:
|
| 97 |
+
raw_text = chunk.get("text", "")
|
| 98 |
+
page_num = chunk.get("page", 1)
|
| 99 |
+
pattern = re.compile(r".+?(?:[.!?](?:\s+|$)|$)", re.DOTALL)
|
| 100 |
+
|
| 101 |
+
for match in pattern.finditer(raw_text):
|
| 102 |
+
cleaned = " ".join(match.group(0).split())
|
| 103 |
+
if len(cleaned) < 30:
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
start_idx = match.start()
|
| 107 |
+
line_no = raw_text[:start_idx].count("\n") + 1
|
| 108 |
+
clauses.append(
|
| 109 |
+
{
|
| 110 |
+
"id": clause_id,
|
| 111 |
+
"text": cleaned,
|
| 112 |
+
"page": page_num,
|
| 113 |
+
"line": line_no,
|
| 114 |
+
}
|
| 115 |
+
)
|
| 116 |
+
clause_id += 1
|
| 117 |
+
|
| 118 |
+
return clauses
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _normalize_person_name(raw: str) -> str:
|
| 122 |
+
import re
|
| 123 |
+
|
| 124 |
+
if not raw:
|
| 125 |
+
return ""
|
| 126 |
+
|
| 127 |
+
cleaned = " ".join(str(raw).split())
|
| 128 |
+
cleaned = re.sub(r"[^A-Za-z.\s]", " ", cleaned)
|
| 129 |
+
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
| 130 |
+
cleaned = re.sub(r"\b(mr|mrs|ms|miss|shri|smt)\.?\b", "", cleaned, flags=re.IGNORECASE)
|
| 131 |
+
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
| 132 |
+
|
| 133 |
+
stop_words = {
|
| 134 |
+
"the",
|
| 135 |
+
"vendor",
|
| 136 |
+
"vendee",
|
| 137 |
+
"party",
|
| 138 |
+
"agreement",
|
| 139 |
+
"hereinafter",
|
| 140 |
+
"called",
|
| 141 |
+
"referred",
|
| 142 |
+
"to",
|
| 143 |
+
"as",
|
| 144 |
+
"and",
|
| 145 |
+
"or",
|
| 146 |
+
"by",
|
| 147 |
+
"of",
|
| 148 |
+
}
|
| 149 |
+
parts = [p for p in cleaned.split(" ") if p and p.lower() not in stop_words]
|
| 150 |
+
if not parts:
|
| 151 |
+
return ""
|
| 152 |
+
|
| 153 |
+
parts = parts[:4]
|
| 154 |
+
name = " ".join(p.capitalize() for p in parts if len(p) > 1)
|
| 155 |
+
return name[:80].strip()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def _extract_party_name(text: str, role: str) -> str:
|
| 159 |
+
import re
|
| 160 |
+
|
| 161 |
+
if not text:
|
| 162 |
+
return "Not found"
|
| 163 |
+
|
| 164 |
+
compact = " ".join(str(text).split())
|
| 165 |
+
role_l = role.lower()
|
| 166 |
+
|
| 167 |
+
patterns = [
|
| 168 |
+
# Role -> Name (e.g., "vendor: suresh kumar")
|
| 169 |
+
rf"\b{role_l}\b\s*[:,-]?\s*(?:is\s+)?(?:mr\.?|mrs\.?|ms\.?|shri|smt\.?)?\s*([A-Za-z][A-Za-z.\s]{{1,80}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
|
| 170 |
+
rf"\bthe\s+{role_l}\b\s*[:,-]?\s*(?:is\s+)?(?:mr\.?|mrs\.?|ms\.?|shri|smt\.?)?\s*([A-Za-z][A-Za-z.\s]{{1,80}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
|
| 171 |
+
# Name -> role via legal wording
|
| 172 |
+
rf"(?:mr\.?|mrs\.?|ms\.?|shri|smt\.?)?\s*([A-Za-z][A-Za-z.\s]{{1,80}}?)\s+(?:hereinafter\s+(?:called|referred\s+to\s+as)|called)\s+(?:the\s+)?{role_l}\b",
|
| 173 |
+
# Name (role)
|
| 174 |
+
rf"\b([A-Za-z][A-Za-z.\s]{{1,60}}?)\s*\(\s*{role_l}\s*\)",
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
for pattern in patterns:
|
| 178 |
+
match = re.search(pattern, compact, flags=re.IGNORECASE)
|
| 179 |
+
if not match:
|
| 180 |
+
continue
|
| 181 |
+
candidate = _normalize_person_name(match.group(1))
|
| 182 |
+
if candidate:
|
| 183 |
+
return candidate
|
| 184 |
+
|
| 185 |
+
if re.search(rf"\b{role_l}\b", compact, flags=re.IGNORECASE):
|
| 186 |
+
return f"{role.title()} mentioned (name not parsed)"
|
| 187 |
+
return "Not found"
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def _extract_document_parties(text_data):
|
| 191 |
+
full_text = "\n".join(chunk.get("text", "") for chunk in (text_data or []))
|
| 192 |
+
vendor = _extract_party_name(full_text, "vendor")
|
| 193 |
+
vendee = _extract_party_name(full_text, "vendee")
|
| 194 |
+
return {"vendor": vendor, "vendee": vendee}
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def _similarity(a: str, b: str) -> float:
|
| 198 |
+
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _threshold_for_mode(scan_mode: str) -> float:
|
| 202 |
+
mode = (scan_mode or "").lower()
|
| 203 |
+
if "deep" in mode:
|
| 204 |
+
return 0.50
|
| 205 |
+
if "strict" in mode:
|
| 206 |
+
return 0.85
|
| 207 |
+
return 0.60
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def _normalized_clause_text(text: str) -> str:
|
| 211 |
+
import re
|
| 212 |
+
|
| 213 |
+
return re.sub(r"\s+", " ", str(text or "").strip().lower())
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def _token_set(text: str) -> set[str]:
|
| 217 |
+
import re
|
| 218 |
+
|
| 219 |
+
return set(re.findall(r"[a-z]{3,}", _normalized_clause_text(text)))
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def _numeric_tokens(text: str) -> set[str]:
|
| 223 |
+
import re
|
| 224 |
+
|
| 225 |
+
return set(re.findall(r"\b\d+(?:[.,]\d+)?%?\b", str(text or "")))
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def _rule_based_category(text_a: str, text_b: str, similarity: float):
|
| 229 |
+
a_norm = _normalized_clause_text(text_a)
|
| 230 |
+
b_norm = _normalized_clause_text(text_b)
|
| 231 |
+
tokens_a = _token_set(text_a)
|
| 232 |
+
tokens_b = _token_set(text_b)
|
| 233 |
+
common = len(tokens_a & tokens_b)
|
| 234 |
+
denom = max(len(tokens_a | tokens_b), 1)
|
| 235 |
+
jaccard = common / denom
|
| 236 |
+
|
| 237 |
+
if a_norm and b_norm and a_norm == b_norm:
|
| 238 |
+
return ("duplication", "DUPLICATION_EXACT", 0.99, "Exact repeated clause text.")
|
| 239 |
+
|
| 240 |
+
if similarity >= 0.94 and jaccard >= 0.88:
|
| 241 |
+
return ("duplication", "DUPLICATION_NEAR", 0.94, "Near-duplicate clause wording.")
|
| 242 |
+
|
| 243 |
+
nums_a = _numeric_tokens(text_a)
|
| 244 |
+
nums_b = _numeric_tokens(text_b)
|
| 245 |
+
if jaccard >= 0.45 and nums_a and nums_b and nums_a != nums_b:
|
| 246 |
+
return (
|
| 247 |
+
"inconsistency",
|
| 248 |
+
"NUMERIC_INCONSISTENCY",
|
| 249 |
+
0.9,
|
| 250 |
+
f"Numeric mismatch detected: {sorted(nums_a)} vs {sorted(nums_b)}.",
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
neg_words = ("shall not", "will not", "not", "never", "prohibited", "forbidden")
|
| 254 |
+
pos_words = ("shall", "will", "must", "required", "permitted", "allowed")
|
| 255 |
+
a_has_neg = any(w in a_norm for w in neg_words)
|
| 256 |
+
b_has_neg = any(w in b_norm for w in neg_words)
|
| 257 |
+
a_has_pos = any(w in a_norm for w in pos_words)
|
| 258 |
+
b_has_pos = any(w in b_norm for w in pos_words)
|
| 259 |
+
if jaccard >= 0.5 and ((a_has_neg and b_has_pos) or (b_has_neg and a_has_pos)):
|
| 260 |
+
return ("contradiction", "LEGAL_CONFLICT", 0.9, "Opposite obligation/negation polarity.")
|
| 261 |
+
|
| 262 |
+
return (None, None, 0.0, "")
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def _analyze_clauses(clauses, threshold: float):
|
| 266 |
+
if str(PROJECT_ROOT) not in sys.path:
|
| 267 |
+
sys.path.append(str(PROJECT_ROOT))
|
| 268 |
+
|
| 269 |
+
try:
|
| 270 |
+
from analysis.common_analyzer import analyze_pair
|
| 271 |
+
except Exception as exc:
|
| 272 |
+
raise RuntimeError(f"Analyzer import failed: {exc}") from exc
|
| 273 |
+
|
| 274 |
+
findings = []
|
| 275 |
+
line_issues = []
|
| 276 |
+
counts = {"duplication": 0, "inconsistency": 0, "contradiction": 0}
|
| 277 |
+
compared_pairs = 0
|
| 278 |
+
max_pairs = 15000
|
| 279 |
+
seen_findings = set()
|
| 280 |
+
seen_line_issues = set()
|
| 281 |
+
|
| 282 |
+
def normalize_category(label: str, reason: str, similarity: float) -> str | None:
|
| 283 |
+
lbl = (label or "").upper()
|
| 284 |
+
rsn = (reason or "").lower()
|
| 285 |
+
if lbl in {"NUMERIC_INCONSISTENCY"}:
|
| 286 |
+
return "inconsistency"
|
| 287 |
+
if lbl in {"LEGAL_CONFLICT", "CONTRADICTION"}:
|
| 288 |
+
return "contradiction"
|
| 289 |
+
if lbl in {"DUPLICATION", "ENTAILMENT"}:
|
| 290 |
+
return "duplication"
|
| 291 |
+
if lbl in {"CANDIDATE", "QUALIFICATION"} and similarity >= 0.92:
|
| 292 |
+
return "duplication"
|
| 293 |
+
if "negation" in rsn or "conflict" in rsn:
|
| 294 |
+
return "contradiction"
|
| 295 |
+
return None
|
| 296 |
+
|
| 297 |
+
for i in range(len(clauses)):
|
| 298 |
+
for j in range(i + 1, len(clauses)):
|
| 299 |
+
compared_pairs += 1
|
| 300 |
+
if compared_pairs > max_pairs:
|
| 301 |
+
break
|
| 302 |
+
|
| 303 |
+
clause_a = clauses[i]
|
| 304 |
+
clause_b = clauses[j]
|
| 305 |
+
similarity = _similarity(clause_a["text"], clause_b["text"])
|
| 306 |
+
|
| 307 |
+
category, label, confidence, reason = _rule_based_category(
|
| 308 |
+
clause_a["text"], clause_b["text"], similarity
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
if category is None:
|
| 312 |
+
label, confidence, reason = analyze_pair(
|
| 313 |
+
clause_a["text"],
|
| 314 |
+
clause_b["text"],
|
| 315 |
+
similarity,
|
| 316 |
+
threshold=threshold,
|
| 317 |
+
)
|
| 318 |
+
if not label or label == "NO_CONFLICT":
|
| 319 |
+
continue
|
| 320 |
+
|
| 321 |
+
category = normalize_category(label, reason, similarity)
|
| 322 |
+
if category is None:
|
| 323 |
+
continue
|
| 324 |
+
|
| 325 |
+
finding_key = (
|
| 326 |
+
category,
|
| 327 |
+
clause_a["page"],
|
| 328 |
+
clause_a["line"],
|
| 329 |
+
clause_b["page"],
|
| 330 |
+
clause_b["line"],
|
| 331 |
+
label,
|
| 332 |
+
)
|
| 333 |
+
if finding_key in seen_findings:
|
| 334 |
+
continue
|
| 335 |
+
seen_findings.add(finding_key)
|
| 336 |
+
|
| 337 |
+
findings.append(
|
| 338 |
+
{
|
| 339 |
+
"issueType": label,
|
| 340 |
+
"category": category,
|
| 341 |
+
"confidence": round(float(confidence), 4),
|
| 342 |
+
"reason": reason,
|
| 343 |
+
"clause1": clause_a["text"],
|
| 344 |
+
"clause2": clause_b["text"],
|
| 345 |
+
"location1": f"Pg {clause_a['page']}, Ln {clause_a['line']}",
|
| 346 |
+
"location2": f"Pg {clause_b['page']}, Ln {clause_b['line']}",
|
| 347 |
+
"page1": clause_a["page"],
|
| 348 |
+
"line1": clause_a["line"],
|
| 349 |
+
"page2": clause_b["page"],
|
| 350 |
+
"line2": clause_b["line"],
|
| 351 |
+
}
|
| 352 |
+
)
|
| 353 |
+
counts[category] += 1
|
| 354 |
+
for clause in (clause_a, clause_b):
|
| 355 |
+
line_key = (category, clause["page"], clause["line"], label)
|
| 356 |
+
if line_key in seen_line_issues:
|
| 357 |
+
continue
|
| 358 |
+
seen_line_issues.add(line_key)
|
| 359 |
+
line_issues.append(
|
| 360 |
+
{
|
| 361 |
+
"category": category,
|
| 362 |
+
"issueType": label,
|
| 363 |
+
"confidence": round(float(confidence), 4),
|
| 364 |
+
"page": clause["page"],
|
| 365 |
+
"line": clause["line"],
|
| 366 |
+
"location": f"Pg {clause['page']}, Ln {clause['line']}",
|
| 367 |
+
"reason": reason,
|
| 368 |
+
}
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
if compared_pairs > max_pairs:
|
| 372 |
+
break
|
| 373 |
+
|
| 374 |
+
findings.sort(key=lambda item: item["confidence"], reverse=True)
|
| 375 |
+
line_issues.sort(key=lambda item: (item["page"], item["line"]))
|
| 376 |
+
return findings, line_issues, counts, compared_pairs
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def _build_page_summaries(clauses, line_issues, text_data):
|
| 380 |
+
pages = {}
|
| 381 |
+
page_text_map = {}
|
| 382 |
+
|
| 383 |
+
for chunk in text_data or []:
|
| 384 |
+
page = int(chunk.get("page", 1))
|
| 385 |
+
if page in page_text_map:
|
| 386 |
+
continue
|
| 387 |
+
raw = str(chunk.get("text", "") or "")
|
| 388 |
+
lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
|
| 389 |
+
page_text_map[page] = " ".join(lines[:2])[:260]
|
| 390 |
+
|
| 391 |
+
for clause in clauses:
|
| 392 |
+
page = int(clause.get("page", 1))
|
| 393 |
+
pages.setdefault(
|
| 394 |
+
page,
|
| 395 |
+
{
|
| 396 |
+
"page": page,
|
| 397 |
+
"clauseCount": 0,
|
| 398 |
+
"duplicationCount": 0,
|
| 399 |
+
"inconsistencyCount": 0,
|
| 400 |
+
"contradictionCount": 0,
|
| 401 |
+
"issueCount": 0,
|
| 402 |
+
"keyLines": [],
|
| 403 |
+
"pageSnippet": page_text_map.get(page, ""),
|
| 404 |
+
},
|
| 405 |
+
)
|
| 406 |
+
pages[page]["clauseCount"] += 1
|
| 407 |
+
|
| 408 |
+
for issue in line_issues:
|
| 409 |
+
page = int(issue.get("page", 1))
|
| 410 |
+
pages.setdefault(
|
| 411 |
+
page,
|
| 412 |
+
{
|
| 413 |
+
"page": page,
|
| 414 |
+
"clauseCount": 0,
|
| 415 |
+
"duplicationCount": 0,
|
| 416 |
+
"inconsistencyCount": 0,
|
| 417 |
+
"contradictionCount": 0,
|
| 418 |
+
"issueCount": 0,
|
| 419 |
+
"keyLines": [],
|
| 420 |
+
"pageSnippet": page_text_map.get(page, ""),
|
| 421 |
+
},
|
| 422 |
+
)
|
| 423 |
+
category = issue.get("category")
|
| 424 |
+
if category in {"duplication", "inconsistency", "contradiction"}:
|
| 425 |
+
pages[page][f"{category}Count"] += 1
|
| 426 |
+
pages[page]["issueCount"] += 1
|
| 427 |
+
if len(pages[page]["keyLines"]) < 6:
|
| 428 |
+
line_ref = f"Ln {issue.get('line', '-')}: {issue.get('issueType', '-')}"
|
| 429 |
+
if line_ref not in pages[page]["keyLines"]:
|
| 430 |
+
pages[page]["keyLines"].append(line_ref)
|
| 431 |
+
|
| 432 |
+
page_summaries = []
|
| 433 |
+
for page in sorted(pages.keys()):
|
| 434 |
+
item = pages[page]
|
| 435 |
+
item["summaryText"] = (
|
| 436 |
+
f"Page {page} contains {item['clauseCount']} clauses and {item['issueCount']} flagged lines "
|
| 437 |
+
f"(duplication: {item['duplicationCount']}, inconsistency: {item['inconsistencyCount']}, "
|
| 438 |
+
f"contradiction: {item['contradictionCount']})."
|
| 439 |
+
)
|
| 440 |
+
page_summaries.append(item)
|
| 441 |
+
|
| 442 |
+
return page_summaries
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
def _shorten_text(text: str, limit: int = 220) -> str:
|
| 446 |
+
s = " ".join(str(text or "").split())
|
| 447 |
+
if len(s) <= limit:
|
| 448 |
+
return s
|
| 449 |
+
return s[: limit - 3].rstrip() + "..."
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def _clause_label(text: str, fallback_id: int) -> str:
|
| 453 |
+
import re
|
| 454 |
+
|
| 455 |
+
raw = str(text or "")
|
| 456 |
+
m = re.search(r"\bclause\s*(\d+)\s*(?:\(([^)]+)\))?", raw, flags=re.IGNORECASE)
|
| 457 |
+
if m:
|
| 458 |
+
num = m.group(1)
|
| 459 |
+
title = (m.group(2) or "").strip()
|
| 460 |
+
return f"Clause {num}" + (f" ({title})" if title else "")
|
| 461 |
+
return f"Clause {fallback_id}"
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
def _build_detailed_summary(clauses, page_summaries, findings):
|
| 465 |
+
from collections import defaultdict
|
| 466 |
+
|
| 467 |
+
clauses_by_page = defaultdict(list)
|
| 468 |
+
for clause in clauses:
|
| 469 |
+
clauses_by_page[int(clause.get("page", 1))].append(clause)
|
| 470 |
+
|
| 471 |
+
lines = ["Here is the detailed summary of the document content:", ""]
|
| 472 |
+
|
| 473 |
+
for page_item in page_summaries:
|
| 474 |
+
page = int(page_item.get("page", 1))
|
| 475 |
+
page_clauses = sorted(clauses_by_page.get(page, []), key=lambda c: (c.get("line", 0), c.get("id", 0)))
|
| 476 |
+
lines.append(f"Page {page} Summary:")
|
| 477 |
+
if not page_clauses:
|
| 478 |
+
lines.append(f"- No clauses extracted for Page {page}.")
|
| 479 |
+
lines.append("")
|
| 480 |
+
continue
|
| 481 |
+
|
| 482 |
+
for idx, clause in enumerate(page_clauses[:12], start=1):
|
| 483 |
+
label = _clause_label(clause.get("text", ""), idx)
|
| 484 |
+
summary = _shorten_text(clause.get("text", ""), 210)
|
| 485 |
+
lines.append(f"- {label}: {summary} (Page {page}, Line {clause.get('line', '-')})")
|
| 486 |
+
|
| 487 |
+
if len(page_clauses) > 12:
|
| 488 |
+
lines.append(f"- Additional clauses on this page: {len(page_clauses) - 12}")
|
| 489 |
+
lines.append("")
|
| 490 |
+
|
| 491 |
+
contradictions = [f for f in findings if f.get("category") == "contradiction"]
|
| 492 |
+
inconsistencies = [f for f in findings if f.get("category") == "inconsistency"]
|
| 493 |
+
duplicates = [f for f in findings if f.get("category") == "duplication"]
|
| 494 |
+
|
| 495 |
+
lines.append("Summary of Key Contradictions Noted:")
|
| 496 |
+
if contradictions:
|
| 497 |
+
for idx, item in enumerate(contradictions[:10], start=1):
|
| 498 |
+
lines.append(
|
| 499 |
+
f"- {idx}. {item.get('issueType', 'LEGAL_CONFLICT')}: "
|
| 500 |
+
f"{_shorten_text(item.get('reason', ''), 170)} "
|
| 501 |
+
f"({item.get('location1', '-') } vs {item.get('location2', '-')})"
|
| 502 |
+
)
|
| 503 |
+
else:
|
| 504 |
+
lines.append("- No strong contradiction pair detected.")
|
| 505 |
+
lines.append("")
|
| 506 |
+
|
| 507 |
+
lines.append("Summary of Key Inconsistencies Noted:")
|
| 508 |
+
if inconsistencies:
|
| 509 |
+
for idx, item in enumerate(inconsistencies[:10], start=1):
|
| 510 |
+
lines.append(
|
| 511 |
+
f"- {idx}. {item.get('issueType', 'INCONSISTENCY')}: "
|
| 512 |
+
f"{_shorten_text(item.get('reason', ''), 170)} "
|
| 513 |
+
f"({item.get('location1', '-') } vs {item.get('location2', '-')})"
|
| 514 |
+
)
|
| 515 |
+
else:
|
| 516 |
+
lines.append("- No strong inconsistency pair detected.")
|
| 517 |
+
lines.append("")
|
| 518 |
+
|
| 519 |
+
lines.append("Summary of Key Duplications Noted:")
|
| 520 |
+
if duplicates:
|
| 521 |
+
for idx, item in enumerate(duplicates[:10], start=1):
|
| 522 |
+
lines.append(
|
| 523 |
+
f"- {idx}. {item.get('issueType', 'DUPLICATION')}: "
|
| 524 |
+
f"{_shorten_text(item.get('reason', ''), 170)} "
|
| 525 |
+
f"({item.get('location1', '-') } vs {item.get('location2', '-')})"
|
| 526 |
+
)
|
| 527 |
+
else:
|
| 528 |
+
lines.append("- No major duplication pair detected.")
|
| 529 |
+
|
| 530 |
+
return "\n".join(lines)
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
# Ensure schema exists even when started via `flask run`.
|
| 534 |
+
init_db()
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
@app.get("/api/health")
|
| 538 |
+
def health_check():
|
| 539 |
+
return jsonify({"status": "ok"}), 200
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
@app.get("/")
|
| 543 |
+
def root():
|
| 544 |
+
return (
|
| 545 |
+
jsonify(
|
| 546 |
+
{
|
| 547 |
+
"message": "Backend is running.",
|
| 548 |
+
"endpoints": [
|
| 549 |
+
"GET /api/health",
|
| 550 |
+
"POST /api/register",
|
| 551 |
+
"POST /api/login",
|
| 552 |
+
"POST /api/analyze",
|
| 553 |
+
"GET /health",
|
| 554 |
+
"POST /register",
|
| 555 |
+
"POST /login",
|
| 556 |
+
"POST /analyze",
|
| 557 |
+
],
|
| 558 |
+
}
|
| 559 |
+
),
|
| 560 |
+
200,
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
@app.get("/health")
|
| 565 |
+
def health_check_alias():
|
| 566 |
+
return health_check()
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
@app.post("/api/register")
|
| 570 |
+
def register():
|
| 571 |
+
data = request.get_json(silent=True) or {}
|
| 572 |
+
|
| 573 |
+
full_name = str(data.get("fullName", "")).strip()
|
| 574 |
+
email = str(data.get("email", "")).strip().lower()
|
| 575 |
+
password = str(data.get("password", ""))
|
| 576 |
+
|
| 577 |
+
if not full_name or not email or not password:
|
| 578 |
+
return jsonify({"error": "fullName, email, and password are required."}), 400
|
| 579 |
+
|
| 580 |
+
if len(password) < 6:
|
| 581 |
+
return jsonify({"error": "Password must be at least 6 characters."}), 400
|
| 582 |
+
|
| 583 |
+
password_hash = generate_password_hash(password)
|
| 584 |
+
created_at = datetime.now(timezone.utc).isoformat()
|
| 585 |
+
|
| 586 |
+
try:
|
| 587 |
+
with get_db_connection() as conn:
|
| 588 |
+
conn.execute(
|
| 589 |
+
"INSERT INTO users (full_name, email, password_hash, created_at) VALUES (?, ?, ?, ?)",
|
| 590 |
+
(full_name, email, password_hash, created_at),
|
| 591 |
+
)
|
| 592 |
+
conn.commit()
|
| 593 |
+
except sqlite3.IntegrityError:
|
| 594 |
+
return jsonify({"error": "Email already registered."}), 409
|
| 595 |
+
|
| 596 |
+
return jsonify({"message": "User created successfully."}), 201
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
@app.post("/register")
|
| 600 |
+
def register_alias():
|
| 601 |
+
return register()
|
| 602 |
+
|
| 603 |
+
|
| 604 |
+
@app.post("/api/login")
|
| 605 |
+
def login():
|
| 606 |
+
data = request.get_json(silent=True) or {}
|
| 607 |
+
|
| 608 |
+
email = str(data.get("email", "")).strip().lower()
|
| 609 |
+
password = str(data.get("password", ""))
|
| 610 |
+
|
| 611 |
+
if not email or not password:
|
| 612 |
+
return jsonify({"error": "email and password are required."}), 400
|
| 613 |
+
|
| 614 |
+
with get_db_connection() as conn:
|
| 615 |
+
user = conn.execute(
|
| 616 |
+
"SELECT id, full_name, email, password_hash FROM users WHERE email = ?",
|
| 617 |
+
(email,),
|
| 618 |
+
).fetchone()
|
| 619 |
+
|
| 620 |
+
if user is None or not check_password_hash(user["password_hash"], password):
|
| 621 |
+
return jsonify({"error": "Invalid email or password."}), 401
|
| 622 |
+
|
| 623 |
+
return (
|
| 624 |
+
jsonify(
|
| 625 |
+
{
|
| 626 |
+
"message": "Login successful.",
|
| 627 |
+
"user": {
|
| 628 |
+
"id": user["id"],
|
| 629 |
+
"fullName": user["full_name"],
|
| 630 |
+
"email": user["email"],
|
| 631 |
+
},
|
| 632 |
+
}
|
| 633 |
+
),
|
| 634 |
+
200,
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
|
| 638 |
+
@app.post("/api/analyze")
|
| 639 |
+
def analyze():
|
| 640 |
+
uploaded = request.files.get("file")
|
| 641 |
+
scan_mode = request.form.get("scanMode", "Standard Scan (Recommended)")
|
| 642 |
+
threshold = _threshold_for_mode(scan_mode)
|
| 643 |
+
|
| 644 |
+
if uploaded is None or uploaded.filename is None or uploaded.filename.strip() == "":
|
| 645 |
+
return jsonify({"error": "Please upload a file."}), 400
|
| 646 |
+
|
| 647 |
+
file_ext = uploaded.filename.rsplit(".", 1)[-1].lower() if "." in uploaded.filename else ""
|
| 648 |
+
if file_ext not in {"pdf", "docx", "txt"}:
|
| 649 |
+
return jsonify({"error": "Unsupported file type. Use PDF, DOCX, or TXT."}), 400
|
| 650 |
+
|
| 651 |
+
try:
|
| 652 |
+
file_bytes = uploaded.read()
|
| 653 |
+
text_data = _extract_text_data(file_bytes=file_bytes, file_ext=file_ext)
|
| 654 |
+
if not text_data:
|
| 655 |
+
return jsonify({"error": "Could not extract text from file."}), 400
|
| 656 |
+
|
| 657 |
+
clauses = _extract_clauses(text_data)
|
| 658 |
+
if len(clauses) < 2:
|
| 659 |
+
return jsonify({"error": "Not enough clauses found for analysis."}), 400
|
| 660 |
+
|
| 661 |
+
parties = _extract_document_parties(text_data)
|
| 662 |
+
findings, line_issues, counts, compared_pairs = _analyze_clauses(
|
| 663 |
+
clauses=clauses, threshold=threshold
|
| 664 |
+
)
|
| 665 |
+
page_summaries = _build_page_summaries(
|
| 666 |
+
clauses=clauses, line_issues=line_issues, text_data=text_data
|
| 667 |
+
)
|
| 668 |
+
detailed_summary = _build_detailed_summary(
|
| 669 |
+
clauses=clauses,
|
| 670 |
+
page_summaries=page_summaries,
|
| 671 |
+
findings=findings,
|
| 672 |
+
)
|
| 673 |
+
except Exception as exc:
|
| 674 |
+
return jsonify({"error": f"Analysis failed: {exc}"}), 500
|
| 675 |
+
|
| 676 |
+
return (
|
| 677 |
+
jsonify(
|
| 678 |
+
{
|
| 679 |
+
"message": "Analysis completed.",
|
| 680 |
+
"summary": {
|
| 681 |
+
"scanMode": scan_mode,
|
| 682 |
+
"threshold": threshold,
|
| 683 |
+
"vendor": parties["vendor"],
|
| 684 |
+
"vendee": parties["vendee"],
|
| 685 |
+
"clauses": len(clauses),
|
| 686 |
+
"pairsCompared": compared_pairs,
|
| 687 |
+
"issuesFound": len(findings),
|
| 688 |
+
"duplicationCount": counts["duplication"],
|
| 689 |
+
"inconsistencyCount": counts["inconsistency"],
|
| 690 |
+
"contradictionCount": counts["contradiction"],
|
| 691 |
+
},
|
| 692 |
+
"pageSummaries": page_summaries,
|
| 693 |
+
"detailedSummary": detailed_summary,
|
| 694 |
+
"findings": findings[:50],
|
| 695 |
+
"lineIssues": line_issues[:200],
|
| 696 |
+
}
|
| 697 |
+
),
|
| 698 |
+
200,
|
| 699 |
+
)
|
| 700 |
+
|
| 701 |
+
|
| 702 |
+
@app.post("/login")
|
| 703 |
+
def login_alias():
|
| 704 |
+
return login()
|
| 705 |
+
|
| 706 |
+
|
| 707 |
+
@app.post("/analyze")
|
| 708 |
+
def analyze_alias():
|
| 709 |
+
return analyze()
|
| 710 |
+
|
| 711 |
+
|
| 712 |
+
if __name__ == "__main__":
|
| 713 |
+
# Keep defaults production-safe and compatible with restricted environments.
|
| 714 |
+
debug_mode = os.getenv("FLASK_DEBUG", "0") == "1"
|
| 715 |
+
host = os.getenv("HOST", "127.0.0.1")
|
| 716 |
+
port = int(os.getenv("PORT", "5000"))
|
| 717 |
+
app.run(host=host, port=port, debug=debug_mode, use_reloader=False)
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==3.1.0
|
| 2 |
+
Flask-Cors==5.0.0
|
| 3 |
+
Werkzeug==3.1.3
|
| 4 |
+
pdfplumber==0.11.5
|
| 5 |
+
python-docx==1.1.2
|
domain_rules/belongings_check.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def belongings_conflict(text1, text2):
|
| 2 |
+
t1 = text1.lower()
|
| 3 |
+
t2 = text2.lower()
|
| 4 |
+
if ("included" in t1 and "excluded" in t2) or \
|
| 5 |
+
("excluded" in t1 and "included" in t2):
|
| 6 |
+
return True
|
| 7 |
+
return False
|
domain_rules/belongings_keywords.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
BELONGINGS_KEYWORDS = [
|
| 2 |
+
"fixture", "fitting", "belonging", "movable",
|
| 3 |
+
"immovable", "furniture", "appliance",
|
| 4 |
+
"electrical", "plumbing", "included", "excluded"
|
| 5 |
+
]
|
domain_rules/legal_rules.py
ADDED
|
File without changes
|
embeddings/sbert_encoder.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
|
| 4 |
+
_model = None
|
| 5 |
+
|
| 6 |
+
def get_model():
|
| 7 |
+
global _model
|
| 8 |
+
if _model is None:
|
| 9 |
+
model_name = "all-MiniLM-L6-v2"
|
| 10 |
+
try:
|
| 11 |
+
print(f"Loading {model_name}...")
|
| 12 |
+
_model = SentenceTransformer(model_name)
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(f"Failed to load {model_name} online: {e}")
|
| 15 |
+
print("Attempting to load from local cache...")
|
| 16 |
+
try:
|
| 17 |
+
_model = SentenceTransformer(model_name, local_files_only=True)
|
| 18 |
+
except Exception as e2:
|
| 19 |
+
raise RuntimeError(f"Could not load model {model_name} (Online or Offline). Check connection.") from e2
|
| 20 |
+
return _model
|
| 21 |
+
|
| 22 |
+
def generate_embeddings(clauses):
|
| 23 |
+
model = get_model()
|
| 24 |
+
texts = [c["text"] for c in clauses]
|
| 25 |
+
return model.encode(texts, convert_to_numpy=True)
|
frontend/README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Frontend (Multi-Page Flow)
|
| 2 |
+
|
| 3 |
+
This frontend now uses a strict page flow:
|
| 4 |
+
|
| 5 |
+
1. `index.html` -> Login/Signup
|
| 6 |
+
2. `upload.html` -> Upload document and run analysis
|
| 7 |
+
3. `issues.html` -> Line-level issue page (duplication, inconsistency, contradiction)
|
| 8 |
+
4. `summary.html` -> Final full-document summary
|
| 9 |
+
|
| 10 |
+
## Run
|
| 11 |
+
|
| 12 |
+
Serve this folder using any static server from `frontend/`:
|
| 13 |
+
|
| 14 |
+
```bash
|
| 15 |
+
python -m http.server 8080
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
Open:
|
| 19 |
+
|
| 20 |
+
- `http://127.0.0.1:8080/index.html`
|
| 21 |
+
|
| 22 |
+
## Backend dependency
|
| 23 |
+
|
| 24 |
+
Frontend expects Flask backend endpoints:
|
| 25 |
+
|
| 26 |
+
- `POST /api/register`
|
| 27 |
+
- `POST /api/login`
|
| 28 |
+
- `POST /api/analyze`
|
| 29 |
+
|
| 30 |
+
Fallback aliases are also supported in client code (`/register`, `/login`, `/analyze`) across ports `5000` and `5001`.
|
| 31 |
+
|
| 32 |
+
## Notes
|
| 33 |
+
|
| 34 |
+
- Login state and analysis payload are stored in `sessionStorage`.
|
| 35 |
+
- If user session is missing, `upload.html`, `issues.html`, and `summary.html` redirect to `index.html`.
|
| 36 |
+
- If analysis payload is missing, `issues.html` and `summary.html` redirect to `upload.html`.
|
frontend/app.js
ADDED
|
@@ -0,0 +1,509 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const currentHost = window.location.hostname || "127.0.0.1";
|
| 2 |
+
|
| 3 |
+
const API_BASES = [
|
| 4 |
+
`http://${currentHost}:5000/api`,
|
| 5 |
+
`http://${currentHost}:5001/api`,
|
| 6 |
+
"http://127.0.0.1:5000/api",
|
| 7 |
+
"http://localhost:5000/api",
|
| 8 |
+
"http://127.0.0.1:5001/api",
|
| 9 |
+
"http://localhost:5001/api",
|
| 10 |
+
];
|
| 11 |
+
|
| 12 |
+
const ANALYZE_URLS = [
|
| 13 |
+
`http://${currentHost}:5000/api/analyze`,
|
| 14 |
+
`http://${currentHost}:5000/analyze`,
|
| 15 |
+
`http://${currentHost}:5001/api/analyze`,
|
| 16 |
+
`http://${currentHost}:5001/analyze`,
|
| 17 |
+
"http://127.0.0.1:5000/api/analyze",
|
| 18 |
+
"http://127.0.0.1:5000/analyze",
|
| 19 |
+
"http://localhost:5000/api/analyze",
|
| 20 |
+
"http://localhost:5000/analyze",
|
| 21 |
+
"http://127.0.0.1:5001/api/analyze",
|
| 22 |
+
"http://127.0.0.1:5001/analyze",
|
| 23 |
+
"http://localhost:5001/api/analyze",
|
| 24 |
+
"http://localhost:5001/analyze",
|
| 25 |
+
];
|
| 26 |
+
|
| 27 |
+
const page = (window.location.pathname.split("/").pop() || "index.html").toLowerCase();
|
| 28 |
+
|
| 29 |
+
function escapeHtml(value) {
|
| 30 |
+
return String(value)
|
| 31 |
+
.replaceAll("&", "&")
|
| 32 |
+
.replaceAll("<", "<")
|
| 33 |
+
.replaceAll(">", ">")
|
| 34 |
+
.replaceAll('"', """)
|
| 35 |
+
.replaceAll("'", "'");
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
function setText(el, text, type = null) {
|
| 39 |
+
if (!el) return;
|
| 40 |
+
el.textContent = text;
|
| 41 |
+
el.classList.remove("success", "error");
|
| 42 |
+
if (type) el.classList.add(type);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
function getUser() {
|
| 46 |
+
const userRaw = sessionStorage.getItem("lsi_user");
|
| 47 |
+
if (!userRaw) return null;
|
| 48 |
+
try {
|
| 49 |
+
return JSON.parse(userRaw);
|
| 50 |
+
} catch {
|
| 51 |
+
return null;
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
function setUser(user) {
|
| 56 |
+
sessionStorage.setItem("lsi_user", JSON.stringify(user));
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
function clearSession() {
|
| 60 |
+
sessionStorage.removeItem("lsi_user");
|
| 61 |
+
sessionStorage.removeItem("lsi_analysis_payload");
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
function getAnalysisPayload() {
|
| 65 |
+
const raw = sessionStorage.getItem("lsi_analysis_payload");
|
| 66 |
+
if (!raw) return null;
|
| 67 |
+
try {
|
| 68 |
+
return JSON.parse(raw);
|
| 69 |
+
} catch {
|
| 70 |
+
return null;
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
function setAnalysisPayload(payload) {
|
| 75 |
+
sessionStorage.setItem("lsi_analysis_payload", JSON.stringify(payload));
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
function ensureAuth() {
|
| 79 |
+
const user = getUser();
|
| 80 |
+
if (!user) {
|
| 81 |
+
window.location.href = "index.html#home";
|
| 82 |
+
return null;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
const badge = document.getElementById("userBadge");
|
| 86 |
+
if (badge) {
|
| 87 |
+
badge.textContent = `${user.fullName || user.email || "User"}`;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
const logoutBtn = document.getElementById("logoutBtn");
|
| 91 |
+
if (logoutBtn) {
|
| 92 |
+
logoutBtn.addEventListener("click", () => {
|
| 93 |
+
clearSession();
|
| 94 |
+
window.location.href = "index.html#home";
|
| 95 |
+
});
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
return user;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
async function postAuth(endpoint, payload) {
|
| 102 |
+
let response = null;
|
| 103 |
+
let data = null;
|
| 104 |
+
let lastNetworkError = null;
|
| 105 |
+
|
| 106 |
+
for (const base of API_BASES) {
|
| 107 |
+
try {
|
| 108 |
+
response = await fetch(`${base}${endpoint}`, {
|
| 109 |
+
method: "POST",
|
| 110 |
+
headers: { "Content-Type": "application/json" },
|
| 111 |
+
body: JSON.stringify(payload),
|
| 112 |
+
});
|
| 113 |
+
data = await response.json().catch(() => null);
|
| 114 |
+
lastNetworkError = null;
|
| 115 |
+
break;
|
| 116 |
+
} catch (error) {
|
| 117 |
+
lastNetworkError = error;
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
if (lastNetworkError) {
|
| 122 |
+
throw new Error(`Cannot reach backend at ${API_BASES.join(", ")}.`);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
return { response, data };
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
async function runDocumentAnalysis(formData) {
|
| 129 |
+
let response = null;
|
| 130 |
+
let data = null;
|
| 131 |
+
let lastNetworkError = null;
|
| 132 |
+
let status = null;
|
| 133 |
+
|
| 134 |
+
for (const url of ANALYZE_URLS) {
|
| 135 |
+
try {
|
| 136 |
+
response = await fetch(url, { method: "POST", body: formData });
|
| 137 |
+
data = await response.json().catch(() => null);
|
| 138 |
+
status = response.status;
|
| 139 |
+
lastNetworkError = null;
|
| 140 |
+
if (response.status !== 404) break;
|
| 141 |
+
} catch (error) {
|
| 142 |
+
lastNetworkError = error;
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
if (lastNetworkError) {
|
| 147 |
+
throw new Error("Cannot connect to backend for analysis.");
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
if (!response.ok) {
|
| 151 |
+
throw new Error(data?.error || `Analysis request failed with HTTP ${status || response.status}.`);
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
return data;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
function buildIssueRows(lineIssues, category) {
|
| 158 |
+
const rows = lineIssues
|
| 159 |
+
.filter((item) => item.category === category)
|
| 160 |
+
.slice(0, 80)
|
| 161 |
+
.map(
|
| 162 |
+
(item) => `
|
| 163 |
+
<tr>
|
| 164 |
+
<td>${escapeHtml(item.location || `Pg ${item.page}, Ln ${item.line}`)}</td>
|
| 165 |
+
<td>${escapeHtml(item.issueType || "-")}</td>
|
| 166 |
+
<td>${escapeHtml(item.confidence ?? "-")}</td>
|
| 167 |
+
</tr>
|
| 168 |
+
`
|
| 169 |
+
)
|
| 170 |
+
.join("");
|
| 171 |
+
|
| 172 |
+
if (!rows) {
|
| 173 |
+
return `<p class="result-muted">No ${category} lines detected.</p>`;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
return `
|
| 177 |
+
<div class="table-wrap">
|
| 178 |
+
<table class="result-table">
|
| 179 |
+
<thead>
|
| 180 |
+
<tr>
|
| 181 |
+
<th>Page/Line</th>
|
| 182 |
+
<th>Issue Type</th>
|
| 183 |
+
<th>Confidence</th>
|
| 184 |
+
</tr>
|
| 185 |
+
</thead>
|
| 186 |
+
<tbody>${rows}</tbody>
|
| 187 |
+
</table>
|
| 188 |
+
</div>
|
| 189 |
+
`;
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
function initIndexPage() {
|
| 193 |
+
const loginTab = document.getElementById("loginTab");
|
| 194 |
+
const signupTab = document.getElementById("signupTab");
|
| 195 |
+
const authForm = document.getElementById("authForm");
|
| 196 |
+
const nameField = document.getElementById("nameField");
|
| 197 |
+
const fullNameInput = document.getElementById("fullName");
|
| 198 |
+
const emailInput = document.getElementById("email");
|
| 199 |
+
const passwordInput = document.getElementById("password");
|
| 200 |
+
const submitBtn = document.getElementById("submitBtn");
|
| 201 |
+
const formSubtitle = document.getElementById("formSubtitle");
|
| 202 |
+
const message = document.getElementById("message");
|
| 203 |
+
|
| 204 |
+
let mode = "login";
|
| 205 |
+
|
| 206 |
+
function setMode(nextMode) {
|
| 207 |
+
mode = nextMode;
|
| 208 |
+
const isSignup = mode === "signup";
|
| 209 |
+
signupTab.classList.toggle("active", isSignup);
|
| 210 |
+
loginTab.classList.toggle("active", !isSignup);
|
| 211 |
+
nameField.classList.toggle("hidden", !isSignup);
|
| 212 |
+
submitBtn.textContent = isSignup ? "Create Account" : "Login";
|
| 213 |
+
formSubtitle.textContent = isSignup
|
| 214 |
+
? "Create your account to start securely."
|
| 215 |
+
: "Enter your credentials to access your account.";
|
| 216 |
+
fullNameInput.required = isSignup;
|
| 217 |
+
setText(message, "", null);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
async function handleAuthSubmit(event) {
|
| 221 |
+
event.preventDefault();
|
| 222 |
+
setText(message, "", null);
|
| 223 |
+
|
| 224 |
+
const email = emailInput.value.trim();
|
| 225 |
+
const password = passwordInput.value;
|
| 226 |
+
const fullName = fullNameInput.value.trim();
|
| 227 |
+
|
| 228 |
+
if (!email || !password || (mode === "signup" && !fullName)) {
|
| 229 |
+
setText(message, "Please fill all required fields.", "error");
|
| 230 |
+
return;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
submitBtn.disabled = true;
|
| 234 |
+
|
| 235 |
+
try {
|
| 236 |
+
const endpoint = mode === "signup" ? "/register" : "/login";
|
| 237 |
+
const payload = mode === "signup" ? { fullName, email, password } : { email, password };
|
| 238 |
+
const { response, data } = await postAuth(endpoint, payload);
|
| 239 |
+
|
| 240 |
+
if (!response.ok) {
|
| 241 |
+
throw new Error(data?.error || `Request failed with HTTP ${response.status}.`);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
if (mode === "signup") {
|
| 245 |
+
setText(message, "Account created. Please login now.", "success");
|
| 246 |
+
authForm.reset();
|
| 247 |
+
setMode("login");
|
| 248 |
+
return;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
const user = data?.user || { fullName: fullName || email, email };
|
| 252 |
+
setUser(user);
|
| 253 |
+
window.location.href = "upload.html";
|
| 254 |
+
} catch (error) {
|
| 255 |
+
setText(message, error.message || "Something went wrong.", "error");
|
| 256 |
+
} finally {
|
| 257 |
+
submitBtn.disabled = false;
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
loginTab.addEventListener("click", () => setMode("login"));
|
| 262 |
+
signupTab.addEventListener("click", () => setMode("signup"));
|
| 263 |
+
authForm.addEventListener("submit", handleAuthSubmit);
|
| 264 |
+
setMode("login");
|
| 265 |
+
|
| 266 |
+
if (getUser()) {
|
| 267 |
+
window.location.href = "upload.html";
|
| 268 |
+
}
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
function initUploadPage() {
|
| 272 |
+
if (!ensureAuth()) return;
|
| 273 |
+
|
| 274 |
+
const uploadForm = document.getElementById("uploadForm");
|
| 275 |
+
const legalFile = document.getElementById("legalFile");
|
| 276 |
+
const scanMode = document.getElementById("scanMode");
|
| 277 |
+
const uploadMessage = document.getElementById("uploadMessage");
|
| 278 |
+
const loadingState = document.getElementById("loadingState");
|
| 279 |
+
const analysisInputSummary = document.getElementById("analysisInputSummary");
|
| 280 |
+
|
| 281 |
+
legalFile.addEventListener("change", () => {
|
| 282 |
+
if (!legalFile.files || !legalFile.files[0]) return;
|
| 283 |
+
const selectedFile = legalFile.files[0];
|
| 284 |
+
analysisInputSummary.classList.remove("hidden");
|
| 285 |
+
analysisInputSummary.innerHTML = `
|
| 286 |
+
<p><strong>File:</strong> ${escapeHtml(selectedFile.name)}</p>
|
| 287 |
+
<p><strong>Type:</strong> ${escapeHtml(selectedFile.type || "unknown")}</p>
|
| 288 |
+
<p><strong>Size:</strong> ${escapeHtml((selectedFile.size / 1024).toFixed(2))} KB</p>
|
| 289 |
+
<p><strong>Scan Mode:</strong> ${escapeHtml(scanMode.value)}</p>
|
| 290 |
+
`;
|
| 291 |
+
setText(uploadMessage, `Selected: ${selectedFile.name}`, "success");
|
| 292 |
+
});
|
| 293 |
+
|
| 294 |
+
uploadForm.addEventListener("submit", async (event) => {
|
| 295 |
+
event.preventDefault();
|
| 296 |
+
setText(uploadMessage, "", null);
|
| 297 |
+
|
| 298 |
+
if (!legalFile.files || legalFile.files.length === 0) {
|
| 299 |
+
setText(uploadMessage, "Please choose a file to continue.", "error");
|
| 300 |
+
return;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
const selectedFile = legalFile.files[0];
|
| 304 |
+
const selectedScanMode = scanMode.value;
|
| 305 |
+
|
| 306 |
+
const formData = new FormData();
|
| 307 |
+
formData.append("file", selectedFile);
|
| 308 |
+
formData.append("scanMode", selectedScanMode);
|
| 309 |
+
|
| 310 |
+
uploadForm.classList.add("hidden");
|
| 311 |
+
loadingState.classList.remove("hidden");
|
| 312 |
+
|
| 313 |
+
try {
|
| 314 |
+
const payload = await runDocumentAnalysis(formData);
|
| 315 |
+
payload._meta = {
|
| 316 |
+
fileName: selectedFile.name,
|
| 317 |
+
fileType: selectedFile.type || "unknown",
|
| 318 |
+
fileSizeKb: Number((selectedFile.size / 1024).toFixed(2)),
|
| 319 |
+
};
|
| 320 |
+
setAnalysisPayload(payload);
|
| 321 |
+
window.location.href = "issues.html";
|
| 322 |
+
} catch (error) {
|
| 323 |
+
loadingState.classList.add("hidden");
|
| 324 |
+
uploadForm.classList.remove("hidden");
|
| 325 |
+
setText(uploadMessage, error.message || "Analysis failed.", "error");
|
| 326 |
+
}
|
| 327 |
+
});
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
function initIssuesPage() {
|
| 331 |
+
if (!ensureAuth()) return;
|
| 332 |
+
|
| 333 |
+
const payload = getAnalysisPayload();
|
| 334 |
+
if (!payload) {
|
| 335 |
+
window.location.href = "upload.html";
|
| 336 |
+
return;
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
const summary = payload.summary || {};
|
| 340 |
+
const lineIssues = Array.isArray(payload.lineIssues) ? payload.lineIssues : [];
|
| 341 |
+
|
| 342 |
+
const issueStats = document.getElementById("issueStats");
|
| 343 |
+
issueStats.innerHTML = `
|
| 344 |
+
<article class="stat-card stat-dup">
|
| 345 |
+
<h3>Duplication</h3>
|
| 346 |
+
<p>${escapeHtml(summary.duplicationCount ?? 0)}</p>
|
| 347 |
+
</article>
|
| 348 |
+
<article class="stat-card stat-inc">
|
| 349 |
+
<h3>Inconsistency</h3>
|
| 350 |
+
<p>${escapeHtml(summary.inconsistencyCount ?? 0)}</p>
|
| 351 |
+
</article>
|
| 352 |
+
<article class="stat-card stat-con">
|
| 353 |
+
<h3>Contradiction</h3>
|
| 354 |
+
<p>${escapeHtml(summary.contradictionCount ?? 0)}</p>
|
| 355 |
+
</article>
|
| 356 |
+
`;
|
| 357 |
+
|
| 358 |
+
const lineIssueTables = document.getElementById("lineIssueTables");
|
| 359 |
+
lineIssueTables.innerHTML = `
|
| 360 |
+
<section class="result-card">
|
| 361 |
+
<h4>Duplication Lines</h4>
|
| 362 |
+
${buildIssueRows(lineIssues, "duplication")}
|
| 363 |
+
</section>
|
| 364 |
+
<section class="result-card">
|
| 365 |
+
<h4>Inconsistency Lines</h4>
|
| 366 |
+
${buildIssueRows(lineIssues, "inconsistency")}
|
| 367 |
+
</section>
|
| 368 |
+
<section class="result-card">
|
| 369 |
+
<h4>Contradiction Lines</h4>
|
| 370 |
+
${buildIssueRows(lineIssues, "contradiction")}
|
| 371 |
+
</section>
|
| 372 |
+
`;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
function initSummaryPage() {
|
| 376 |
+
if (!ensureAuth()) return;
|
| 377 |
+
|
| 378 |
+
const payload = getAnalysisPayload();
|
| 379 |
+
if (!payload) {
|
| 380 |
+
window.location.href = "upload.html";
|
| 381 |
+
return;
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
const summary = payload.summary || {};
|
| 385 |
+
const findings = Array.isArray(payload.findings) ? payload.findings : [];
|
| 386 |
+
const pageSummaries = Array.isArray(payload.pageSummaries) ? payload.pageSummaries : [];
|
| 387 |
+
const lineIssues = Array.isArray(payload.lineIssues) ? payload.lineIssues : [];
|
| 388 |
+
const detailedSummary = String(payload.detailedSummary || "").trim();
|
| 389 |
+
const meta = payload._meta || {};
|
| 390 |
+
|
| 391 |
+
const summaryDetails = document.getElementById("summaryDetails");
|
| 392 |
+
summaryDetails.innerHTML = `
|
| 393 |
+
<article class="summary-item"><span>File</span><strong>${escapeHtml(meta.fileName || "-")}</strong></article>
|
| 394 |
+
<article class="summary-item"><span>Scan Mode</span><strong>${escapeHtml(summary.scanMode || "-")}</strong></article>
|
| 395 |
+
<article class="summary-item"><span>Threshold</span><strong>${escapeHtml(summary.threshold ?? "-")}</strong></article>
|
| 396 |
+
<article class="summary-item"><span>Vendor</span><strong>${escapeHtml(summary.vendor || "Not found")}</strong></article>
|
| 397 |
+
<article class="summary-item"><span>Vendee</span><strong>${escapeHtml(summary.vendee || "Not found")}</strong></article>
|
| 398 |
+
<article class="summary-item"><span>Clauses</span><strong>${escapeHtml(summary.clauses ?? 0)}</strong></article>
|
| 399 |
+
<article class="summary-item"><span>Pairs Compared</span><strong>${escapeHtml(summary.pairsCompared ?? 0)}</strong></article>
|
| 400 |
+
<article class="summary-item"><span>Total Issues</span><strong>${escapeHtml(summary.issuesFound ?? 0)}</strong></article>
|
| 401 |
+
`;
|
| 402 |
+
|
| 403 |
+
const findingsBoard = document.getElementById("findingsBoard");
|
| 404 |
+
const pageSummaryBoard = document.getElementById("pageSummaryBoard");
|
| 405 |
+
const detailedSummaryText = document.getElementById("detailedSummaryText");
|
| 406 |
+
const lineErrorDashboard = document.getElementById("lineErrorDashboard");
|
| 407 |
+
|
| 408 |
+
if (detailedSummaryText) {
|
| 409 |
+
detailedSummaryText.textContent = detailedSummary || "Detailed summary is not available for this document.";
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
if (pageSummaryBoard) {
|
| 413 |
+
if (pageSummaries.length === 0) {
|
| 414 |
+
pageSummaryBoard.innerHTML =
|
| 415 |
+
`<article class="result-card"><p class="result-muted">No page-wise summary available for this document.</p></article>`;
|
| 416 |
+
} else {
|
| 417 |
+
pageSummaryBoard.innerHTML = pageSummaries
|
| 418 |
+
.map((item) => {
|
| 419 |
+
const keyLines = Array.isArray(item.keyLines) ? item.keyLines : [];
|
| 420 |
+
const keyLineHtml = keyLines.length
|
| 421 |
+
? keyLines.map((k) => `<li>${escapeHtml(k)}</li>`).join("")
|
| 422 |
+
: "<li>No flagged lines on this page.</li>";
|
| 423 |
+
return `
|
| 424 |
+
<article class="result-card">
|
| 425 |
+
<h4>Page ${escapeHtml(item.page)}</h4>
|
| 426 |
+
<p><strong>Clauses:</strong> ${escapeHtml(item.clauseCount ?? 0)}</p>
|
| 427 |
+
<p><strong>Issues:</strong> ${escapeHtml(item.issueCount ?? 0)} (Duplication: ${escapeHtml(item.duplicationCount ?? 0)}, Inconsistency: ${escapeHtml(item.inconsistencyCount ?? 0)}, Contradiction: ${escapeHtml(item.contradictionCount ?? 0)})</p>
|
| 428 |
+
<p><strong>Page Snippet:</strong> ${escapeHtml(item.pageSnippet || "-")}</p>
|
| 429 |
+
<p><strong>Summary:</strong> ${escapeHtml(item.summaryText || "-")}</p>
|
| 430 |
+
<p><strong>Key Lines:</strong></p>
|
| 431 |
+
<ul>${keyLineHtml}</ul>
|
| 432 |
+
</article>
|
| 433 |
+
`;
|
| 434 |
+
})
|
| 435 |
+
.join("");
|
| 436 |
+
}
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
if (findings.length === 0) {
|
| 440 |
+
findingsBoard.innerHTML = `<article class="result-card"><p class="result-muted">No major findings detected for this document.</p></article>`;
|
| 441 |
+
return;
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
const topFindings = findings.slice(0, 20);
|
| 445 |
+
findingsBoard.innerHTML = topFindings
|
| 446 |
+
.map(
|
| 447 |
+
(item) => `
|
| 448 |
+
<article class="result-card">
|
| 449 |
+
<h4>${escapeHtml(item.category || "issue")} - ${escapeHtml(item.issueType || "-")}</h4>
|
| 450 |
+
<p><strong>Confidence:</strong> ${escapeHtml(item.confidence ?? "-")}</p>
|
| 451 |
+
<p><strong>Location A:</strong> ${escapeHtml(item.location1 || "-")}</p>
|
| 452 |
+
<p><strong>Location B:</strong> ${escapeHtml(item.location2 || "-")}</p>
|
| 453 |
+
<p><strong>Reason:</strong> ${escapeHtml(item.reason || "-")}</p>
|
| 454 |
+
</article>
|
| 455 |
+
`
|
| 456 |
+
)
|
| 457 |
+
.join("");
|
| 458 |
+
|
| 459 |
+
if (lineErrorDashboard) {
|
| 460 |
+
if (lineIssues.length === 0) {
|
| 461 |
+
lineErrorDashboard.innerHTML = `<p class="result-muted">No line-level errors detected.</p>`;
|
| 462 |
+
return;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
const rows = lineIssues
|
| 466 |
+
.slice(0, 200)
|
| 467 |
+
.map(
|
| 468 |
+
(item) => `
|
| 469 |
+
<tr>
|
| 470 |
+
<td>${escapeHtml(item.location || `Pg ${item.page}, Ln ${item.line}`)}</td>
|
| 471 |
+
<td>${escapeHtml(item.category || "-")}</td>
|
| 472 |
+
<td>${escapeHtml(item.issueType || "-")}</td>
|
| 473 |
+
<td>${escapeHtml(item.confidence ?? "-")}</td>
|
| 474 |
+
<td>${escapeHtml(item.reason || "-")}</td>
|
| 475 |
+
</tr>
|
| 476 |
+
`
|
| 477 |
+
)
|
| 478 |
+
.join("");
|
| 479 |
+
|
| 480 |
+
lineErrorDashboard.innerHTML = `
|
| 481 |
+
<div class="table-wrap">
|
| 482 |
+
<table class="result-table">
|
| 483 |
+
<thead>
|
| 484 |
+
<tr>
|
| 485 |
+
<th>Page/Line</th>
|
| 486 |
+
<th>Category</th>
|
| 487 |
+
<th>Issue Type</th>
|
| 488 |
+
<th>Confidence</th>
|
| 489 |
+
<th>Reason</th>
|
| 490 |
+
</tr>
|
| 491 |
+
</thead>
|
| 492 |
+
<tbody>${rows}</tbody>
|
| 493 |
+
</table>
|
| 494 |
+
</div>
|
| 495 |
+
`;
|
| 496 |
+
}
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
if (page === "index.html" || page === "") {
|
| 500 |
+
initIndexPage();
|
| 501 |
+
} else if (page === "upload.html") {
|
| 502 |
+
initUploadPage();
|
| 503 |
+
} else if (page === "issues.html") {
|
| 504 |
+
initIssuesPage();
|
| 505 |
+
} else if (page === "summary.html") {
|
| 506 |
+
initSummaryPage();
|
| 507 |
+
} else if (page === "workflow.html") {
|
| 508 |
+
window.location.href = "upload.html";
|
| 509 |
+
}
|
frontend/assets/legal-tech-bg.svg
ADDED
|
|
frontend/index.html
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Semantix • Legal Semantic Intelligence</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Space+Grotesk:wght@500;600;700&display=swap" rel="stylesheet" />
|
| 11 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css" />
|
| 12 |
+
<style>
|
| 13 |
+
:root {
|
| 14 |
+
--navy: #0f172a;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
.tail-container {
|
| 18 |
+
font-family: "Inter", system-ui, sans-serif;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.heading-font {
|
| 22 |
+
font-family: "Space Grotesk", sans-serif;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.hero-bg {
|
| 26 |
+
background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.card {
|
| 30 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.card:hover {
|
| 34 |
+
transform: translateY(-4px);
|
| 35 |
+
box-shadow: 0 20px 25px -5px rgb(15 23 42 / 0.1), 0 8px 10px -6px rgb(15 23 42 / 0.1);
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.document-3d {
|
| 39 |
+
perspective: 1200px;
|
| 40 |
+
transition: transform 0.6s cubic-bezier(0.23, 1, 0.32, 1);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.document-3d:hover {
|
| 44 |
+
transform: rotateX(12deg) rotateY(12deg) scale(1.03);
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
.document-inner {
|
| 48 |
+
box-shadow: 0 25px 50px -12px rgb(0 0 0 / 0.4), 0 0 80px -20px rgb(129 140 248 / 0.6), inset 0 4px 12px rgba(255, 255, 255, 0.3);
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
.scan-line {
|
| 52 |
+
position: absolute;
|
| 53 |
+
top: 0;
|
| 54 |
+
left: 0;
|
| 55 |
+
width: 100%;
|
| 56 |
+
height: 4px;
|
| 57 |
+
background: linear-gradient(90deg, transparent, #a5b4fc, transparent);
|
| 58 |
+
animation: scan 4s linear infinite;
|
| 59 |
+
opacity: 0.6;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.switcher button.active {
|
| 63 |
+
background: #ffffff;
|
| 64 |
+
color: #111827;
|
| 65 |
+
box-shadow: 0 4px 10px rgba(15, 23, 42, 0.12);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
@keyframes scan {
|
| 69 |
+
0% {
|
| 70 |
+
transform: translateY(-100%);
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
100% {
|
| 74 |
+
transform: translateY(380px);
|
| 75 |
+
}
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
@media (max-width: 768px) {
|
| 79 |
+
.nav-mobile-hide {
|
| 80 |
+
display: none;
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
</style>
|
| 84 |
+
</head>
|
| 85 |
+
<body class="tail-container bg-zinc-50 text-slate-900">
|
| 86 |
+
<header class="bg-white border-b border-slate-200 sticky top-0 z-50">
|
| 87 |
+
<div class="max-w-7xl mx-auto px-6 md:px-8 py-5 flex items-center justify-between gap-4">
|
| 88 |
+
<div class="flex items-center gap-x-3">
|
| 89 |
+
<div class="w-9 h-9 bg-gradient-to-br from-indigo-600 to-violet-600 rounded-2xl flex items-center justify-center text-white font-bold text-2xl leading-none pt-0.5">S</div>
|
| 90 |
+
<a href="#home" class="heading-font text-3xl font-semibold tracking-tighter text-slate-900">Semantix</a>
|
| 91 |
+
</div>
|
| 92 |
+
|
| 93 |
+
<nav class="nav-mobile-hide md:flex items-center gap-x-10 text-sm font-medium">
|
| 94 |
+
<a href="#home" class="hover:text-indigo-600 transition-colors">Home</a>
|
| 95 |
+
<a href="#about" class="hover:text-indigo-600 transition-colors">About</a>
|
| 96 |
+
<a href="#service" class="hover:text-indigo-600 transition-colors">Service</a>
|
| 97 |
+
<a href="#contact" class="hover:text-indigo-600 transition-colors">Contact</a>
|
| 98 |
+
</nav>
|
| 99 |
+
|
| 100 |
+
<a href="#authView" class="px-5 py-2.5 text-sm font-semibold bg-indigo-600 hover:bg-indigo-700 text-white rounded-2xl transition-colors">Get Started</a>
|
| 101 |
+
</div>
|
| 102 |
+
</header>
|
| 103 |
+
|
| 104 |
+
<main>
|
| 105 |
+
<section id="home" class="hero-bg min-h-screen flex items-center relative overflow-hidden">
|
| 106 |
+
<div class="absolute inset-0 bg-[radial-gradient(at_50%_30%,rgba(129,140,248,0.15),transparent)]"></div>
|
| 107 |
+
|
| 108 |
+
<div class="max-w-7xl mx-auto px-6 md:px-8 grid md:grid-cols-12 gap-14 items-center relative z-10 py-16">
|
| 109 |
+
<div class="md:col-span-7">
|
| 110 |
+
<div class="inline-flex items-center gap-x-2 bg-white/10 backdrop-blur-md border border-white/20 text-white text-xs font-medium px-4 py-2 rounded-3xl mb-6">
|
| 111 |
+
<span class="relative flex h-3 w-3">
|
| 112 |
+
<span class="animate-ping absolute inline-flex h-full w-full rounded-full bg-emerald-400 opacity-75"></span>
|
| 113 |
+
<span class="relative inline-flex rounded-full h-3 w-3 bg-emerald-500"></span>
|
| 114 |
+
</span>
|
| 115 |
+
AI LEGAL INTELLIGENCE
|
| 116 |
+
</div>
|
| 117 |
+
|
| 118 |
+
<h1 class="heading-font text-5xl md:text-7xl leading-none font-semibold tracking-tighter text-white max-w-2xl">
|
| 119 |
+
Legal Documents,<br />Deeply Understood
|
| 120 |
+
</h1>
|
| 121 |
+
|
| 122 |
+
<p class="mt-8 text-lg md:text-xl text-slate-300 max-w-xl">
|
| 123 |
+
Advanced semantic analysis that uncovers hidden risks and delivers crystal-clear clarity in every contract.
|
| 124 |
+
</p>
|
| 125 |
+
|
| 126 |
+
<div class="mt-12 flex justify-center md:justify-start">
|
| 127 |
+
<div class="document-3d relative inline-block">
|
| 128 |
+
<div class="document-inner w-[300px] md:w-[320px] h-[360px] md:h-[380px] bg-white rounded-3xl overflow-hidden border border-white/40 relative">
|
| 129 |
+
<div class="h-12 bg-gradient-to-r from-indigo-600 to-violet-600 flex items-center px-6 text-white text-sm font-medium">
|
| 130 |
+
CONTRACT • PAGE 1
|
| 131 |
+
</div>
|
| 132 |
+
|
| 133 |
+
<div class="p-6 space-y-3 text-[10px] leading-tight text-slate-700 font-mono">
|
| 134 |
+
<div class="h-2.5 bg-slate-200 rounded w-3/4"></div>
|
| 135 |
+
<div class="h-2.5 bg-slate-200 rounded w-11/12"></div>
|
| 136 |
+
<div class="h-2.5 bg-slate-200 rounded w-5/6"></div>
|
| 137 |
+
<div class="h-2.5 bg-slate-200 rounded w-full"></div>
|
| 138 |
+
<div class="h-2.5 bg-slate-200 rounded w-3/4"></div>
|
| 139 |
+
<div class="h-2.5 bg-slate-200 rounded w-10/12"></div>
|
| 140 |
+
</div>
|
| 141 |
+
|
| 142 |
+
<div class="absolute inset-0 bg-gradient-to-br from-indigo-400/10 to-violet-400/10 flex items-center justify-center">
|
| 143 |
+
<i class="fa-solid fa-wand-magic-sparkles text-white text-[120px] opacity-30"></i>
|
| 144 |
+
</div>
|
| 145 |
+
<div class="scan-line"></div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
|
| 151 |
+
<div class="md:col-span-5">
|
| 152 |
+
<section id="authView" class="bg-white rounded-3xl shadow-2xl p-8 md:p-10 card">
|
| 153 |
+
<div class="form-header mb-8">
|
| 154 |
+
<div class="switcher grid grid-cols-2 bg-slate-100 p-1 rounded-2xl mb-3" role="tablist" aria-label="Auth mode">
|
| 155 |
+
<button id="loginTab" class="active px-7 py-3 text-sm font-semibold rounded-[14px]" type="button">Login</button>
|
| 156 |
+
<button id="signupTab" class="px-7 py-3 text-sm font-semibold rounded-[14px]" type="button">Sign Up</button>
|
| 157 |
+
</div>
|
| 158 |
+
<p id="formSubtitle" class="text-slate-500 text-sm">Enter your credentials to access your account.</p>
|
| 159 |
+
</div>
|
| 160 |
+
|
| 161 |
+
<form id="authForm" class="space-y-5" novalidate>
|
| 162 |
+
<div id="nameField" class="hidden">
|
| 163 |
+
<label class="text-xs uppercase tracking-widest text-slate-500 block mb-1" for="fullName">Full Name</label>
|
| 164 |
+
<input id="fullName" name="fullName" type="text" placeholder="Jayasree" class="w-full bg-zinc-50 border border-slate-200 focus:border-indigo-500 rounded-2xl px-5 py-4 outline-none" />
|
| 165 |
+
</div>
|
| 166 |
+
<div>
|
| 167 |
+
<label class="text-xs uppercase tracking-widest text-slate-500 block mb-1" for="email">Email</label>
|
| 168 |
+
<input id="email" name="email" type="email" placeholder="you@lawfirm.in" autocomplete="email" required class="w-full bg-zinc-50 border border-slate-200 focus:border-indigo-500 rounded-2xl px-5 py-4 outline-none" />
|
| 169 |
+
</div>
|
| 170 |
+
<div>
|
| 171 |
+
<label class="text-xs uppercase tracking-widest text-slate-500 block mb-1" for="password">Password</label>
|
| 172 |
+
<input id="password" name="password" type="password" placeholder="Minimum 6 characters" autocomplete="current-password" required class="w-full bg-zinc-50 border border-slate-200 focus:border-indigo-500 rounded-2xl px-5 py-4 outline-none" />
|
| 173 |
+
</div>
|
| 174 |
+
<button id="submitBtn" type="submit" class="w-full bg-indigo-600 hover:bg-indigo-700 transition-colors text-white font-semibold py-4 rounded-3xl">Login</button>
|
| 175 |
+
</form>
|
| 176 |
+
<p id="message" class="text-center text-sm mt-6 text-slate-500"></p>
|
| 177 |
+
</section>
|
| 178 |
+
</div>
|
| 179 |
+
</div>
|
| 180 |
+
</section>
|
| 181 |
+
|
| 182 |
+
<section id="about" class="py-20 bg-white">
|
| 183 |
+
<div class="max-w-4xl mx-auto px-8 text-center">
|
| 184 |
+
<h2 class="heading-font text-4xl md:text-5xl font-semibold tracking-tighter mb-6">Reliable. Precise. Intelligent.</h2>
|
| 185 |
+
<p class="text-lg text-slate-600 max-w-2xl mx-auto">
|
| 186 |
+
Semantix delivers clear, accurate semantic analysis of legal documents, helping you catch issues instantly and work with confidence.
|
| 187 |
+
</p>
|
| 188 |
+
</div>
|
| 189 |
+
</section>
|
| 190 |
+
|
| 191 |
+
<section id="service" class="py-24 bg-slate-50">
|
| 192 |
+
<div class="max-w-7xl mx-auto px-8">
|
| 193 |
+
<h2 class="heading-font text-center text-4xl md:text-5xl font-semibold tracking-tighter mb-16">Built for serious legal work</h2>
|
| 194 |
+
<div class="grid md:grid-cols-3 gap-8">
|
| 195 |
+
<div class="bg-white p-10 rounded-3xl card text-center">
|
| 196 |
+
<div class="text-5xl mb-6">🔐</div>
|
| 197 |
+
<h3 class="font-semibold text-xl">Enterprise Security</h3>
|
| 198 |
+
<p class="text-slate-500 mt-3">Your documents stay private and protected.</p>
|
| 199 |
+
</div>
|
| 200 |
+
<div class="bg-white p-10 rounded-3xl card text-center">
|
| 201 |
+
<div class="text-5xl mb-6">🧠</div>
|
| 202 |
+
<h3 class="font-semibold text-xl">Smart Analysis</h3>
|
| 203 |
+
<p class="text-slate-500 mt-3">Understands legal language like a senior counsel.</p>
|
| 204 |
+
</div>
|
| 205 |
+
<div class="bg-white p-10 rounded-3xl card text-center">
|
| 206 |
+
<div class="text-5xl mb-6">📈</div>
|
| 207 |
+
<h3 class="font-semibold text-xl">Instant Insights</h3>
|
| 208 |
+
<p class="text-slate-500 mt-3">Visual dashboard with line-level clarity.</p>
|
| 209 |
+
</div>
|
| 210 |
+
</div>
|
| 211 |
+
</div>
|
| 212 |
+
</section>
|
| 213 |
+
|
| 214 |
+
<section id="contact" class="py-20 bg-slate-900 text-white text-center">
|
| 215 |
+
<div class="max-w-7xl mx-auto px-8">
|
| 216 |
+
<p class="text-sm uppercase tracking-widest text-slate-400">Made for legal professionals</p>
|
| 217 |
+
<h2 class="heading-font text-4xl mt-4">Ready for flawless contracts?</h2>
|
| 218 |
+
<a href="mailto:hello@semantix.ai" class="inline-block mt-10 px-10 py-4 bg-white text-slate-900 font-semibold rounded-3xl hover:bg-indigo-50 transition-colors">Contact Us</a>
|
| 219 |
+
<p class="mt-20 text-xs text-slate-500">© 2026 Semantix • Legal Semantic Intelligence</p>
|
| 220 |
+
</div>
|
| 221 |
+
</section>
|
| 222 |
+
</main>
|
| 223 |
+
|
| 224 |
+
<script src="app.js"></script>
|
| 225 |
+
</body>
|
| 226 |
+
</html>
|
frontend/issues.html
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Issue Analysis | LegalSI</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 9 |
+
<link
|
| 10 |
+
href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700;800&family=Space+Grotesk:wght@500;700&display=swap"
|
| 11 |
+
rel="stylesheet"
|
| 12 |
+
/>
|
| 13 |
+
<link rel="stylesheet" href="styles.css" />
|
| 14 |
+
</head>
|
| 15 |
+
<body>
|
| 16 |
+
<header class="topbar">
|
| 17 |
+
<div class="container topbar-inner">
|
| 18 |
+
<a class="brand" href="index.html#home">LegalSI</a>
|
| 19 |
+
<div class="page-links">
|
| 20 |
+
<a class="page-link" href="upload.html">Upload</a>
|
| 21 |
+
<a class="page-link active" href="issues.html">Issue Analysis</a>
|
| 22 |
+
<a class="page-link" href="summary.html">Final Summary</a>
|
| 23 |
+
<button id="logoutBtn" class="logout-btn" type="button">Logout</button>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</header>
|
| 27 |
+
|
| 28 |
+
<main class="flow-main">
|
| 29 |
+
<section class="container flow-card">
|
| 30 |
+
<div class="upload-header">
|
| 31 |
+
<h1>Line-Level Issue Analysis</h1>
|
| 32 |
+
<span id="userBadge" class="user-badge"></span>
|
| 33 |
+
</div>
|
| 34 |
+
<p class="upload-subtitle">Inconsistencies, contradictions, and duplications with page and line references.</p>
|
| 35 |
+
|
| 36 |
+
<div id="issueStats" class="stats-grid"></div>
|
| 37 |
+
<div id="lineIssueTables"></div>
|
| 38 |
+
|
| 39 |
+
<div class="workflow-actions">
|
| 40 |
+
<a class="secondary-btn as-link" href="upload.html">Back to Upload</a>
|
| 41 |
+
<a class="submit-btn as-link submit-link" href="summary.html">Next: Final Summary</a>
|
| 42 |
+
</div>
|
| 43 |
+
</section>
|
| 44 |
+
</main>
|
| 45 |
+
|
| 46 |
+
<script src="app.js"></script>
|
| 47 |
+
</body>
|
| 48 |
+
</html>
|
frontend/styles.css
ADDED
|
@@ -0,0 +1,957 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--bg: #f3f5f8;
|
| 3 |
+
--surface: #ffffff;
|
| 4 |
+
--surface-soft: #f8fafc;
|
| 5 |
+
--ink: #0e2238;
|
| 6 |
+
--muted: #5b6f85;
|
| 7 |
+
--border: #d3dee9;
|
| 8 |
+
--navy: #12385f;
|
| 9 |
+
--navy-2: #1f4d79;
|
| 10 |
+
--gold: #b78a28;
|
| 11 |
+
--primary: #1f5fa6;
|
| 12 |
+
--primary-2: #2e79c8;
|
| 13 |
+
--teal: #1f8a75;
|
| 14 |
+
--danger: #b93f4f;
|
| 15 |
+
--ok: #166a47;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
* {
|
| 19 |
+
box-sizing: border-box;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
html {
|
| 23 |
+
scroll-behavior: smooth;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
body {
|
| 27 |
+
margin: 0;
|
| 28 |
+
font-family: "Manrope", sans-serif;
|
| 29 |
+
color: var(--ink);
|
| 30 |
+
background:
|
| 31 |
+
radial-gradient(1000px 450px at -10% -8%, #dfe9f5 0%, rgba(223, 233, 245, 0) 60%),
|
| 32 |
+
radial-gradient(900px 420px at 110% -10%, #ece5d5 0%, rgba(236, 229, 213, 0) 58%),
|
| 33 |
+
linear-gradient(180deg, #eff3f7 0%, #f8fafd 42%, #ffffff 100%);
|
| 34 |
+
line-height: 1.45;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.container {
|
| 38 |
+
width: min(1180px, 92%);
|
| 39 |
+
margin: 0 auto;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.topbar {
|
| 43 |
+
position: sticky;
|
| 44 |
+
top: 0;
|
| 45 |
+
z-index: 20;
|
| 46 |
+
background: rgba(249, 251, 253, 0.94);
|
| 47 |
+
backdrop-filter: blur(6px);
|
| 48 |
+
border-bottom: 1px solid #cfd9e4;
|
| 49 |
+
box-shadow: 0 4px 18px rgba(14, 34, 56, 0.06);
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.topbar-inner {
|
| 53 |
+
display: flex;
|
| 54 |
+
align-items: center;
|
| 55 |
+
justify-content: space-between;
|
| 56 |
+
min-height: 68px;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.brand {
|
| 60 |
+
font-family: "Space Grotesk", sans-serif;
|
| 61 |
+
font-size: 24px;
|
| 62 |
+
font-weight: 700;
|
| 63 |
+
color: var(--navy);
|
| 64 |
+
text-decoration: none;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.nav-links {
|
| 68 |
+
display: flex;
|
| 69 |
+
gap: 20px;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.nav-links a {
|
| 73 |
+
color: #264868;
|
| 74 |
+
text-decoration: none;
|
| 75 |
+
font-weight: 700;
|
| 76 |
+
font-size: 14px;
|
| 77 |
+
padding: 6px 8px;
|
| 78 |
+
border-radius: 8px;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.nav-links a:hover {
|
| 82 |
+
background: #e9f0f8;
|
| 83 |
+
color: var(--navy);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.hero {
|
| 87 |
+
position: relative;
|
| 88 |
+
padding: 48px 0 42px;
|
| 89 |
+
overflow: hidden;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.hero-bg {
|
| 93 |
+
position: absolute;
|
| 94 |
+
inset: 0;
|
| 95 |
+
background:
|
| 96 |
+
linear-gradient(120deg, rgba(18, 56, 95, 0.1), rgba(183, 138, 40, 0.09)),
|
| 97 |
+
url("assets/legal-tech-bg.svg") right center / cover no-repeat;
|
| 98 |
+
opacity: 0.95;
|
| 99 |
+
pointer-events: none;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.hero-grid {
|
| 103 |
+
position: relative;
|
| 104 |
+
display: grid;
|
| 105 |
+
grid-template-columns: 1.1fr 0.95fr;
|
| 106 |
+
gap: 24px;
|
| 107 |
+
align-items: start;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
.hero-copy {
|
| 111 |
+
background: rgba(255, 255, 255, 0.82);
|
| 112 |
+
border: 1px solid var(--border);
|
| 113 |
+
border-radius: 20px;
|
| 114 |
+
padding: 24px;
|
| 115 |
+
box-shadow: 0 14px 34px rgba(15, 38, 66, 0.11);
|
| 116 |
+
animation: fadeInUp 0.45s ease-out;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.eyebrow {
|
| 120 |
+
margin: 0 0 10px;
|
| 121 |
+
font-size: 13px;
|
| 122 |
+
letter-spacing: 0.05em;
|
| 123 |
+
text-transform: uppercase;
|
| 124 |
+
color: var(--navy-2);
|
| 125 |
+
font-weight: 800;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.hero-copy h1 {
|
| 129 |
+
margin: 0;
|
| 130 |
+
font-size: clamp(30px, 4.6vw, 50px);
|
| 131 |
+
line-height: 1.08;
|
| 132 |
+
font-family: "Space Grotesk", sans-serif;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.hero-text {
|
| 136 |
+
margin: 14px 0 18px;
|
| 137 |
+
color: var(--muted);
|
| 138 |
+
line-height: 1.6;
|
| 139 |
+
max-width: 66ch;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.hero-cta-row {
|
| 143 |
+
display: flex;
|
| 144 |
+
gap: 10px;
|
| 145 |
+
flex-wrap: wrap;
|
| 146 |
+
margin: 8px 0 14px;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
.hero-cta-primary,
|
| 150 |
+
.hero-cta-secondary {
|
| 151 |
+
text-decoration: none;
|
| 152 |
+
border-radius: 11px;
|
| 153 |
+
font-size: 14px;
|
| 154 |
+
font-weight: 800;
|
| 155 |
+
padding: 10px 14px;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
.hero-cta-primary {
|
| 159 |
+
color: #ffffff;
|
| 160 |
+
background: linear-gradient(92deg, var(--navy), var(--primary-2) 58%, var(--teal));
|
| 161 |
+
box-shadow: 0 10px 18px rgba(17, 62, 110, 0.22);
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.hero-cta-secondary {
|
| 165 |
+
color: #1c446b;
|
| 166 |
+
background: #ecf4ff;
|
| 167 |
+
border: 1px solid #bfd6f2;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.trust-strip {
|
| 171 |
+
display: flex;
|
| 172 |
+
flex-wrap: wrap;
|
| 173 |
+
gap: 8px;
|
| 174 |
+
margin: 0 0 14px;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
.trust-strip span {
|
| 178 |
+
border: 1px solid #d0dded;
|
| 179 |
+
border-radius: 999px;
|
| 180 |
+
padding: 5px 10px;
|
| 181 |
+
font-size: 12px;
|
| 182 |
+
font-weight: 700;
|
| 183 |
+
color: #315579;
|
| 184 |
+
background: #f5f9ff;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
.hero-metrics {
|
| 188 |
+
display: grid;
|
| 189 |
+
grid-template-columns: repeat(3, 1fr);
|
| 190 |
+
gap: 10px;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
.hero-metrics > div {
|
| 194 |
+
border: 1px solid #d5e2f0;
|
| 195 |
+
background: #ffffff;
|
| 196 |
+
border-radius: 12px;
|
| 197 |
+
padding: 12px;
|
| 198 |
+
transition: transform 0.18s ease, box-shadow 0.18s ease;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.hero-metrics > div:hover {
|
| 202 |
+
transform: translateY(-2px);
|
| 203 |
+
box-shadow: 0 10px 18px rgba(16, 43, 74, 0.09);
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.hero-metrics h3 {
|
| 207 |
+
margin: 0;
|
| 208 |
+
font-size: 14px;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.hero-metrics p {
|
| 212 |
+
margin: 6px 0 0;
|
| 213 |
+
color: var(--muted);
|
| 214 |
+
font-size: 12px;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
.preview-card {
|
| 218 |
+
margin-top: 12px;
|
| 219 |
+
border: 1px solid #ccdaea;
|
| 220 |
+
border-radius: 14px;
|
| 221 |
+
padding: 12px;
|
| 222 |
+
background: linear-gradient(160deg, #f7fbff 0%, #edf5ff 100%);
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.preview-card h3 {
|
| 226 |
+
margin: 0 0 10px;
|
| 227 |
+
font-size: 14px;
|
| 228 |
+
color: #163a60;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
.preview-grid {
|
| 232 |
+
display: grid;
|
| 233 |
+
grid-template-columns: repeat(4, 1fr);
|
| 234 |
+
gap: 8px;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.preview-grid div {
|
| 238 |
+
border: 1px solid #c8d9ec;
|
| 239 |
+
border-radius: 10px;
|
| 240 |
+
background: #ffffff;
|
| 241 |
+
padding: 8px;
|
| 242 |
+
display: grid;
|
| 243 |
+
gap: 3px;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.preview-grid span {
|
| 247 |
+
font-size: 11px;
|
| 248 |
+
color: #5a7090;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
.preview-grid strong {
|
| 252 |
+
font-size: 19px;
|
| 253 |
+
color: #15395e;
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
.panel {
|
| 257 |
+
background: var(--surface);
|
| 258 |
+
border: 1px solid var(--border);
|
| 259 |
+
border-radius: 18px;
|
| 260 |
+
box-shadow: 0 14px 30px rgba(12, 31, 53, 0.12);
|
| 261 |
+
animation: fadeInUp 0.5s ease-out;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
.auth-panel {
|
| 265 |
+
padding: 22px;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.form-header {
|
| 269 |
+
margin-bottom: 18px;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.switcher {
|
| 273 |
+
display: grid;
|
| 274 |
+
grid-template-columns: 1fr 1fr;
|
| 275 |
+
background: #e9eff7;
|
| 276 |
+
border-radius: 12px;
|
| 277 |
+
padding: 4px;
|
| 278 |
+
margin-bottom: 12px;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
.switcher button {
|
| 282 |
+
border: 0;
|
| 283 |
+
background: transparent;
|
| 284 |
+
border-radius: 9px;
|
| 285 |
+
padding: 10px;
|
| 286 |
+
font-weight: 800;
|
| 287 |
+
cursor: pointer;
|
| 288 |
+
color: #315579;
|
| 289 |
+
transition: background 0.2s ease, color 0.2s ease, transform 0.12s ease;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
.switcher button.active {
|
| 293 |
+
color: #112a48;
|
| 294 |
+
background: #ffffff;
|
| 295 |
+
box-shadow: 0 6px 14px rgba(8, 26, 49, 0.08);
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.switcher button:active {
|
| 299 |
+
transform: scale(0.98);
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
#formSubtitle {
|
| 303 |
+
margin: 0;
|
| 304 |
+
color: var(--muted);
|
| 305 |
+
font-size: 14px;
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
.auth-form {
|
| 309 |
+
display: grid;
|
| 310 |
+
gap: 14px;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
.field {
|
| 314 |
+
display: grid;
|
| 315 |
+
gap: 7px;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
.field label {
|
| 319 |
+
font-size: 14px;
|
| 320 |
+
font-weight: 700;
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
.field input,
|
| 324 |
+
.control {
|
| 325 |
+
border: 1px solid var(--border);
|
| 326 |
+
border-radius: 12px;
|
| 327 |
+
padding: 12px 13px;
|
| 328 |
+
font: inherit;
|
| 329 |
+
background: #ffffff;
|
| 330 |
+
outline: none;
|
| 331 |
+
width: 100%;
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
.field input:focus,
|
| 335 |
+
.control:focus {
|
| 336 |
+
border-color: var(--primary);
|
| 337 |
+
box-shadow: 0 0 0 4px rgba(31, 95, 166, 0.16);
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
.hidden {
|
| 341 |
+
display: none;
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
.submit-btn {
|
| 345 |
+
margin-top: 8px;
|
| 346 |
+
border: 0;
|
| 347 |
+
border-radius: 12px;
|
| 348 |
+
padding: 12px;
|
| 349 |
+
background: linear-gradient(92deg, var(--navy), var(--primary-2) 58%, var(--teal));
|
| 350 |
+
color: #ffffff;
|
| 351 |
+
font-weight: 800;
|
| 352 |
+
font-size: 15px;
|
| 353 |
+
cursor: pointer;
|
| 354 |
+
transition: transform 0.16s ease, box-shadow 0.16s ease, filter 0.16s ease;
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
.submit-btn:hover {
|
| 358 |
+
filter: brightness(1.03);
|
| 359 |
+
transform: translateY(-1px);
|
| 360 |
+
box-shadow: 0 10px 18px rgba(17, 62, 110, 0.22);
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
.message {
|
| 364 |
+
min-height: 22px;
|
| 365 |
+
margin: 14px 0 0;
|
| 366 |
+
font-size: 14px;
|
| 367 |
+
font-weight: 700;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
.message.success {
|
| 371 |
+
color: var(--ok);
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
.message.error {
|
| 375 |
+
color: var(--danger);
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
.upload-header {
|
| 379 |
+
display: flex;
|
| 380 |
+
align-items: center;
|
| 381 |
+
justify-content: space-between;
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
.upload-header h2 {
|
| 385 |
+
margin: 0;
|
| 386 |
+
font-family: "Space Grotesk", sans-serif;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
.upload-subtitle {
|
| 390 |
+
margin: 10px 0 18px;
|
| 391 |
+
color: var(--muted);
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
.stepper {
|
| 395 |
+
display: grid;
|
| 396 |
+
grid-template-columns: repeat(3, 1fr);
|
| 397 |
+
gap: 8px;
|
| 398 |
+
margin: 10px 0 16px;
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
.step-chip {
|
| 402 |
+
text-align: center;
|
| 403 |
+
border: 1px solid var(--border);
|
| 404 |
+
border-radius: 10px;
|
| 405 |
+
padding: 8px 10px;
|
| 406 |
+
font-size: 13px;
|
| 407 |
+
font-weight: 800;
|
| 408 |
+
color: #5d7190;
|
| 409 |
+
background: #f3f6fb;
|
| 410 |
+
transition: all 0.2s ease;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
.step-chip.active {
|
| 414 |
+
color: #0f2d4e;
|
| 415 |
+
border-color: #b7cde7;
|
| 416 |
+
background: #e8f1fc;
|
| 417 |
+
box-shadow: inset 0 0 0 1px rgba(38, 97, 166, 0.15);
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
.workflow-step {
|
| 421 |
+
margin-top: 6px;
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
.summary-box {
|
| 425 |
+
border: 1px solid var(--border);
|
| 426 |
+
border-radius: 12px;
|
| 427 |
+
background: var(--surface-soft);
|
| 428 |
+
padding: 12px;
|
| 429 |
+
color: #25496f;
|
| 430 |
+
box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.6);
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
.summary-box p {
|
| 434 |
+
margin: 5px 0;
|
| 435 |
+
font-size: 14px;
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
.workflow-actions {
|
| 439 |
+
display: flex;
|
| 440 |
+
gap: 10px;
|
| 441 |
+
margin-top: 12px;
|
| 442 |
+
flex-wrap: wrap;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
.upload-zone-wrap {
|
| 446 |
+
margin-top: 2px;
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
.upload-zone {
|
| 450 |
+
border: 1.5px dashed #b8cbe0;
|
| 451 |
+
border-radius: 14px;
|
| 452 |
+
background: linear-gradient(180deg, #f8fbff 0%, #f3f8ff 100%);
|
| 453 |
+
min-height: 132px;
|
| 454 |
+
display: grid;
|
| 455 |
+
place-content: center;
|
| 456 |
+
text-align: center;
|
| 457 |
+
gap: 6px;
|
| 458 |
+
cursor: pointer;
|
| 459 |
+
padding: 14px;
|
| 460 |
+
transition: border-color 0.2s ease, background 0.2s ease, transform 0.18s ease;
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
.upload-zone:hover {
|
| 464 |
+
border-color: #7ca4cf;
|
| 465 |
+
background: linear-gradient(180deg, #fafdff 0%, #eef5ff 100%);
|
| 466 |
+
transform: translateY(-1px);
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
.upload-icon {
|
| 470 |
+
width: 34px;
|
| 471 |
+
height: 34px;
|
| 472 |
+
border-radius: 999px;
|
| 473 |
+
margin: 0 auto;
|
| 474 |
+
display: grid;
|
| 475 |
+
place-content: center;
|
| 476 |
+
font-size: 22px;
|
| 477 |
+
font-weight: 700;
|
| 478 |
+
color: #21507f;
|
| 479 |
+
background: #e5eef9;
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
.upload-title {
|
| 483 |
+
font-size: 14px;
|
| 484 |
+
font-weight: 800;
|
| 485 |
+
color: #1f4469;
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
.upload-hint {
|
| 489 |
+
font-size: 12px;
|
| 490 |
+
color: #5f7691;
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
.file-input-hidden {
|
| 494 |
+
position: absolute;
|
| 495 |
+
left: -10000px;
|
| 496 |
+
width: 1px;
|
| 497 |
+
height: 1px;
|
| 498 |
+
opacity: 0;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
.chat-panel {
|
| 502 |
+
border: 1px solid var(--border);
|
| 503 |
+
border-radius: 12px;
|
| 504 |
+
background: #f7fbff;
|
| 505 |
+
padding: 12px;
|
| 506 |
+
margin-top: 10px;
|
| 507 |
+
display: grid;
|
| 508 |
+
gap: 10px;
|
| 509 |
+
max-height: 220px;
|
| 510 |
+
overflow-y: auto;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
.chat-bubble {
|
| 514 |
+
padding: 10px 12px;
|
| 515 |
+
border-radius: 12px;
|
| 516 |
+
font-size: 13px;
|
| 517 |
+
line-height: 1.5;
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
.chat-bubble.user {
|
| 521 |
+
justify-self: end;
|
| 522 |
+
max-width: 92%;
|
| 523 |
+
background: #e8f1ff;
|
| 524 |
+
border: 1px solid #bfd6f4;
|
| 525 |
+
color: #1f4268;
|
| 526 |
+
}
|
| 527 |
+
|
| 528 |
+
.chat-bubble.bot {
|
| 529 |
+
justify-self: start;
|
| 530 |
+
max-width: 96%;
|
| 531 |
+
background: #ffffff;
|
| 532 |
+
border: 1px solid #d4e0ee;
|
| 533 |
+
color: #274968;
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
.logout-btn {
|
| 537 |
+
border: 1px solid var(--border);
|
| 538 |
+
background: #ffffff;
|
| 539 |
+
border-radius: 10px;
|
| 540 |
+
padding: 8px 12px;
|
| 541 |
+
font-weight: 700;
|
| 542 |
+
cursor: pointer;
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
.secondary-btn {
|
| 546 |
+
border: 1px solid #b8cbe0;
|
| 547 |
+
background: #f1f6fc;
|
| 548 |
+
color: #1f4469;
|
| 549 |
+
border-radius: 12px;
|
| 550 |
+
padding: 12px 14px;
|
| 551 |
+
font-weight: 800;
|
| 552 |
+
font-size: 14px;
|
| 553 |
+
cursor: pointer;
|
| 554 |
+
transition: background 0.18s ease, transform 0.14s ease;
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
.secondary-btn:hover {
|
| 558 |
+
background: #e7f0fa;
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
.secondary-btn:active {
|
| 562 |
+
transform: scale(0.98);
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
.section {
|
| 566 |
+
padding: 20px 0 26px;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.section-card {
|
| 570 |
+
background: var(--surface);
|
| 571 |
+
border: 1px solid var(--border);
|
| 572 |
+
border-radius: 18px;
|
| 573 |
+
padding: 24px;
|
| 574 |
+
box-shadow: 0 10px 24px rgba(12, 34, 58, 0.09);
|
| 575 |
+
transition: box-shadow 0.2s ease, transform 0.2s ease;
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
.section-card:hover {
|
| 579 |
+
box-shadow: 0 14px 26px rgba(12, 34, 58, 0.13);
|
| 580 |
+
transform: translateY(-1px);
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
.section-card h2 {
|
| 584 |
+
margin: 0 0 10px;
|
| 585 |
+
font-family: "Space Grotesk", sans-serif;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
.section-card p {
|
| 589 |
+
margin: 0;
|
| 590 |
+
color: var(--muted);
|
| 591 |
+
line-height: 1.7;
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
.service-grid {
|
| 595 |
+
margin-top: 14px;
|
| 596 |
+
display: grid;
|
| 597 |
+
grid-template-columns: repeat(3, 1fr);
|
| 598 |
+
gap: 12px;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
.service-grid article {
|
| 602 |
+
border: 1px solid var(--border);
|
| 603 |
+
border-radius: 12px;
|
| 604 |
+
padding: 14px;
|
| 605 |
+
background: var(--surface-soft);
|
| 606 |
+
}
|
| 607 |
+
|
| 608 |
+
.service-grid h3 {
|
| 609 |
+
margin: 0 0 8px;
|
| 610 |
+
font-size: 16px;
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
.contact-grid {
|
| 614 |
+
margin-top: 14px;
|
| 615 |
+
display: grid;
|
| 616 |
+
gap: 8px;
|
| 617 |
+
color: #193b61;
|
| 618 |
+
}
|
| 619 |
+
|
| 620 |
+
.analysis-result {
|
| 621 |
+
margin-top: 16px;
|
| 622 |
+
border-top: 1px solid var(--border);
|
| 623 |
+
padding-top: 14px;
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
.result-summary h3 {
|
| 627 |
+
margin: 0 0 8px;
|
| 628 |
+
font-family: "Space Grotesk", sans-serif;
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
.result-summary p {
|
| 632 |
+
margin: 4px 0;
|
| 633 |
+
color: #1d3352;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
.result-visual {
|
| 637 |
+
margin-top: 12px;
|
| 638 |
+
border: 1px solid var(--border);
|
| 639 |
+
border-radius: 12px;
|
| 640 |
+
padding: 12px;
|
| 641 |
+
background: linear-gradient(180deg, #f8fbff 0%, #f4f8fd 100%);
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
.result-visual h3 {
|
| 645 |
+
margin: 0 0 10px;
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
.bar-row {
|
| 649 |
+
display: grid;
|
| 650 |
+
grid-template-columns: 170px 1fr 52px;
|
| 651 |
+
align-items: center;
|
| 652 |
+
gap: 8px;
|
| 653 |
+
margin-bottom: 8px;
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
.bar-label,
|
| 657 |
+
.bar-value {
|
| 658 |
+
font-size: 13px;
|
| 659 |
+
font-weight: 700;
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
.bar-track {
|
| 663 |
+
width: 100%;
|
| 664 |
+
height: 12px;
|
| 665 |
+
border-radius: 999px;
|
| 666 |
+
background: #dde5f1;
|
| 667 |
+
overflow: hidden;
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
.bar-fill {
|
| 671 |
+
height: 100%;
|
| 672 |
+
border-radius: 999px;
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
.bar-fill.dup {
|
| 676 |
+
background: #2d6ec8;
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
.bar-fill.inc {
|
| 680 |
+
background: #d08f28;
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
.bar-fill.con {
|
| 684 |
+
background: #bd4b58;
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
.result-list {
|
| 688 |
+
margin-top: 12px;
|
| 689 |
+
display: grid;
|
| 690 |
+
gap: 10px;
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
.result-card {
|
| 694 |
+
border: 1px solid var(--border);
|
| 695 |
+
border-radius: 12px;
|
| 696 |
+
padding: 10px 12px;
|
| 697 |
+
background: #f9fbfe;
|
| 698 |
+
transition: box-shadow 0.16s ease;
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
.result-card:hover {
|
| 702 |
+
box-shadow: 0 10px 20px rgba(12, 34, 58, 0.09);
|
| 703 |
+
}
|
| 704 |
+
|
| 705 |
+
.result-card h4 {
|
| 706 |
+
margin: 0 0 6px;
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
.result-muted {
|
| 710 |
+
color: var(--muted);
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
.table-wrap {
|
| 714 |
+
width: 100%;
|
| 715 |
+
overflow-x: auto;
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
.result-table {
|
| 719 |
+
width: 100%;
|
| 720 |
+
border-collapse: collapse;
|
| 721 |
+
margin-top: 8px;
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
.result-table th,
|
| 725 |
+
.result-table td {
|
| 726 |
+
border: 1px solid var(--border);
|
| 727 |
+
padding: 8px;
|
| 728 |
+
text-align: left;
|
| 729 |
+
font-size: 13px;
|
| 730 |
+
vertical-align: top;
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
.result-table th {
|
| 734 |
+
background: #eef4ff;
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
@keyframes fadeInUp {
|
| 738 |
+
from {
|
| 739 |
+
opacity: 0;
|
| 740 |
+
transform: translateY(8px);
|
| 741 |
+
}
|
| 742 |
+
to {
|
| 743 |
+
opacity: 1;
|
| 744 |
+
transform: translateY(0);
|
| 745 |
+
}
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
@media (max-width: 980px) {
|
| 749 |
+
.hero-grid {
|
| 750 |
+
grid-template-columns: 1fr;
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
.hero-metrics {
|
| 754 |
+
grid-template-columns: 1fr;
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
+
.preview-grid {
|
| 758 |
+
grid-template-columns: repeat(2, 1fr);
|
| 759 |
+
}
|
| 760 |
+
|
| 761 |
+
.service-grid {
|
| 762 |
+
grid-template-columns: 1fr;
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
.bar-row {
|
| 766 |
+
grid-template-columns: 1fr;
|
| 767 |
+
gap: 6px;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
.nav-links {
|
| 771 |
+
gap: 12px;
|
| 772 |
+
flex-wrap: wrap;
|
| 773 |
+
justify-content: flex-end;
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
.topbar-inner {
|
| 777 |
+
padding-block: 8px;
|
| 778 |
+
}
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
.page-links {
|
| 782 |
+
display: flex;
|
| 783 |
+
align-items: center;
|
| 784 |
+
gap: 10px;
|
| 785 |
+
}
|
| 786 |
+
|
| 787 |
+
.page-link {
|
| 788 |
+
border: 1px solid #bfd0e3;
|
| 789 |
+
border-radius: 10px;
|
| 790 |
+
padding: 6px 10px;
|
| 791 |
+
font-size: 13px;
|
| 792 |
+
font-weight: 700;
|
| 793 |
+
color: #23496f;
|
| 794 |
+
text-decoration: none;
|
| 795 |
+
background: #f4f8fd;
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
.page-link.active {
|
| 799 |
+
background: #e7f1ff;
|
| 800 |
+
border-color: #98b9dc;
|
| 801 |
+
color: #14395f;
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
.flow-main {
|
| 805 |
+
padding: 28px 0 36px;
|
| 806 |
+
}
|
| 807 |
+
|
| 808 |
+
.flow-card {
|
| 809 |
+
background: var(--surface);
|
| 810 |
+
border: 1px solid var(--border);
|
| 811 |
+
border-radius: 18px;
|
| 812 |
+
box-shadow: 0 14px 30px rgba(12, 31, 53, 0.12);
|
| 813 |
+
padding: 22px;
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
.flow-card h1 {
|
| 817 |
+
margin: 0;
|
| 818 |
+
font-family: "Space Grotesk", sans-serif;
|
| 819 |
+
font-size: clamp(28px, 4vw, 40px);
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
.user-badge {
|
| 823 |
+
border: 1px solid #c6d9ee;
|
| 824 |
+
border-radius: 999px;
|
| 825 |
+
padding: 8px 12px;
|
| 826 |
+
background: #f2f8ff;
|
| 827 |
+
color: #24486d;
|
| 828 |
+
font-weight: 700;
|
| 829 |
+
font-size: 13px;
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
+
.loading-panel {
|
| 833 |
+
margin-top: 16px;
|
| 834 |
+
border: 1px solid var(--border);
|
| 835 |
+
border-radius: 12px;
|
| 836 |
+
padding: 18px;
|
| 837 |
+
background: #f5f9ff;
|
| 838 |
+
display: grid;
|
| 839 |
+
justify-items: center;
|
| 840 |
+
gap: 10px;
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
.spinner {
|
| 844 |
+
width: 30px;
|
| 845 |
+
height: 30px;
|
| 846 |
+
border: 3px solid #c8d8eb;
|
| 847 |
+
border-top-color: #1f5fa6;
|
| 848 |
+
border-radius: 50%;
|
| 849 |
+
animation: spin 0.8s linear infinite;
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
.stats-grid {
|
| 853 |
+
display: grid;
|
| 854 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 855 |
+
gap: 10px;
|
| 856 |
+
margin: 12px 0 14px;
|
| 857 |
+
}
|
| 858 |
+
|
| 859 |
+
.stat-card {
|
| 860 |
+
border: 1px solid var(--border);
|
| 861 |
+
border-radius: 12px;
|
| 862 |
+
padding: 12px;
|
| 863 |
+
background: #f9fbfe;
|
| 864 |
+
}
|
| 865 |
+
|
| 866 |
+
.stat-card h3 {
|
| 867 |
+
margin: 0;
|
| 868 |
+
font-size: 14px;
|
| 869 |
+
}
|
| 870 |
+
|
| 871 |
+
.stat-card p {
|
| 872 |
+
margin: 6px 0 0;
|
| 873 |
+
font-size: 28px;
|
| 874 |
+
font-weight: 800;
|
| 875 |
+
}
|
| 876 |
+
|
| 877 |
+
.stat-dup p {
|
| 878 |
+
color: #2d6ec8;
|
| 879 |
+
}
|
| 880 |
+
|
| 881 |
+
.stat-inc p {
|
| 882 |
+
color: #d08f28;
|
| 883 |
+
}
|
| 884 |
+
|
| 885 |
+
.stat-con p {
|
| 886 |
+
color: #bd4b58;
|
| 887 |
+
}
|
| 888 |
+
|
| 889 |
+
.summary-grid {
|
| 890 |
+
display: grid;
|
| 891 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 892 |
+
gap: 10px;
|
| 893 |
+
margin-bottom: 14px;
|
| 894 |
+
}
|
| 895 |
+
|
| 896 |
+
.summary-item {
|
| 897 |
+
border: 1px solid var(--border);
|
| 898 |
+
border-radius: 12px;
|
| 899 |
+
padding: 10px;
|
| 900 |
+
background: #f9fbfe;
|
| 901 |
+
display: grid;
|
| 902 |
+
gap: 4px;
|
| 903 |
+
}
|
| 904 |
+
|
| 905 |
+
.summary-item span {
|
| 906 |
+
color: var(--muted);
|
| 907 |
+
font-size: 13px;
|
| 908 |
+
}
|
| 909 |
+
|
| 910 |
+
.summary-item strong {
|
| 911 |
+
color: #1b3d63;
|
| 912 |
+
font-size: 14px;
|
| 913 |
+
}
|
| 914 |
+
|
| 915 |
+
.section-subtitle {
|
| 916 |
+
margin: 6px 0 10px;
|
| 917 |
+
font-family: "Space Grotesk", sans-serif;
|
| 918 |
+
font-size: 20px;
|
| 919 |
+
color: #183d62;
|
| 920 |
+
}
|
| 921 |
+
|
| 922 |
+
.detailed-summary-text {
|
| 923 |
+
white-space: pre-wrap;
|
| 924 |
+
line-height: 1.65;
|
| 925 |
+
color: #1d3552;
|
| 926 |
+
font-size: 14px;
|
| 927 |
+
}
|
| 928 |
+
|
| 929 |
+
.as-link {
|
| 930 |
+
text-decoration: none;
|
| 931 |
+
display: inline-flex;
|
| 932 |
+
align-items: center;
|
| 933 |
+
justify-content: center;
|
| 934 |
+
}
|
| 935 |
+
|
| 936 |
+
.submit-link {
|
| 937 |
+
min-width: 220px;
|
| 938 |
+
}
|
| 939 |
+
|
| 940 |
+
@keyframes spin {
|
| 941 |
+
to {
|
| 942 |
+
transform: rotate(360deg);
|
| 943 |
+
}
|
| 944 |
+
}
|
| 945 |
+
|
| 946 |
+
@media (max-width: 980px) {
|
| 947 |
+
.page-links {
|
| 948 |
+
gap: 6px;
|
| 949 |
+
flex-wrap: wrap;
|
| 950 |
+
justify-content: flex-end;
|
| 951 |
+
}
|
| 952 |
+
|
| 953 |
+
.stats-grid,
|
| 954 |
+
.summary-grid {
|
| 955 |
+
grid-template-columns: 1fr;
|
| 956 |
+
}
|
| 957 |
+
}
|
frontend/summary.html
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Final Summary | LegalSI</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 9 |
+
<link
|
| 10 |
+
href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700;800&family=Space+Grotesk:wght@500;700&display=swap"
|
| 11 |
+
rel="stylesheet"
|
| 12 |
+
/>
|
| 13 |
+
<link rel="stylesheet" href="styles.css" />
|
| 14 |
+
</head>
|
| 15 |
+
<body>
|
| 16 |
+
<header class="topbar">
|
| 17 |
+
<div class="container topbar-inner">
|
| 18 |
+
<a class="brand" href="index.html#home">LegalSI</a>
|
| 19 |
+
<div class="page-links">
|
| 20 |
+
<a class="page-link" href="upload.html">Upload</a>
|
| 21 |
+
<a class="page-link" href="issues.html">Issue Analysis</a>
|
| 22 |
+
<a class="page-link active" href="summary.html">Final Summary</a>
|
| 23 |
+
<button id="logoutBtn" class="logout-btn" type="button">Logout</button>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</header>
|
| 27 |
+
|
| 28 |
+
<main class="flow-main">
|
| 29 |
+
<section class="container flow-card">
|
| 30 |
+
<div class="upload-header">
|
| 31 |
+
<h1>Final Document Summary</h1>
|
| 32 |
+
<span id="userBadge" class="user-badge"></span>
|
| 33 |
+
</div>
|
| 34 |
+
<p class="upload-subtitle">Overall analysis result for the entire uploaded legal document.</p>
|
| 35 |
+
|
| 36 |
+
<div id="summaryDetails" class="summary-grid"></div>
|
| 37 |
+
<h3 class="section-subtitle">Detailed Document Summary</h3>
|
| 38 |
+
<article class="result-card">
|
| 39 |
+
<div id="detailedSummaryText" class="detailed-summary-text"></div>
|
| 40 |
+
</article>
|
| 41 |
+
<h3 class="section-subtitle">Page-wise Summary</h3>
|
| 42 |
+
<div id="pageSummaryBoard" class="result-list"></div>
|
| 43 |
+
<h3 class="section-subtitle">Top Findings</h3>
|
| 44 |
+
<div id="findingsBoard" class="result-list"></div>
|
| 45 |
+
<h3 class="section-subtitle">Line Error Dashboard</h3>
|
| 46 |
+
<article class="result-card">
|
| 47 |
+
<div id="lineErrorDashboard"></div>
|
| 48 |
+
</article>
|
| 49 |
+
|
| 50 |
+
<div class="workflow-actions">
|
| 51 |
+
<a class="secondary-btn as-link" href="issues.html">Back to Issue Analysis</a>
|
| 52 |
+
<a class="submit-btn as-link submit-link" href="upload.html">Analyze New Document</a>
|
| 53 |
+
</div>
|
| 54 |
+
</section>
|
| 55 |
+
</main>
|
| 56 |
+
|
| 57 |
+
<script src="app.js"></script>
|
| 58 |
+
</body>
|
| 59 |
+
</html>
|
frontend/upload.html
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Upload Document | LegalSI</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 9 |
+
<link
|
| 10 |
+
href="https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700;800&family=Space+Grotesk:wght@500;700&display=swap"
|
| 11 |
+
rel="stylesheet"
|
| 12 |
+
/>
|
| 13 |
+
<link rel="stylesheet" href="styles.css" />
|
| 14 |
+
</head>
|
| 15 |
+
<body>
|
| 16 |
+
<header class="topbar">
|
| 17 |
+
<div class="container topbar-inner">
|
| 18 |
+
<a class="brand" href="index.html#home">LegalSI</a>
|
| 19 |
+
<div class="page-links">
|
| 20 |
+
<a class="page-link active" href="upload.html">Upload</a>
|
| 21 |
+
<a class="page-link" href="issues.html">Issue Analysis</a>
|
| 22 |
+
<a class="page-link" href="summary.html">Final Summary</a>
|
| 23 |
+
<button id="logoutBtn" class="logout-btn" type="button">Logout</button>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</header>
|
| 27 |
+
|
| 28 |
+
<main class="flow-main">
|
| 29 |
+
<section class="container flow-card">
|
| 30 |
+
<div class="upload-header">
|
| 31 |
+
<h1>Upload Document</h1>
|
| 32 |
+
<span id="userBadge" class="user-badge"></span>
|
| 33 |
+
</div>
|
| 34 |
+
<p class="upload-subtitle">Upload legal document, then continue to issue analysis and final summary pages.</p>
|
| 35 |
+
|
| 36 |
+
<form id="uploadForm" class="auth-form" novalidate>
|
| 37 |
+
<div class="field">
|
| 38 |
+
<label for="scanMode">Scan Mode</label>
|
| 39 |
+
<select id="scanMode" class="control">
|
| 40 |
+
<option>Standard Scan (Recommended)</option>
|
| 41 |
+
<option>Deep Search (Fuzzy)</option>
|
| 42 |
+
<option>Strict (Duplicates Only)</option>
|
| 43 |
+
</select>
|
| 44 |
+
</div>
|
| 45 |
+
|
| 46 |
+
<div class="field upload-zone-wrap">
|
| 47 |
+
<label for="legalFile">Upload File (PDF/DOCX/TXT)</label>
|
| 48 |
+
<label class="upload-zone" for="legalFile">
|
| 49 |
+
<span class="upload-icon">+</span>
|
| 50 |
+
<span class="upload-title">Drop your document or click to browse</span>
|
| 51 |
+
<span class="upload-hint">Supported: PDF, DOCX, TXT</span>
|
| 52 |
+
</label>
|
| 53 |
+
<input id="legalFile" class="control file-input-hidden" type="file" accept=".pdf,.docx,.txt" required />
|
| 54 |
+
</div>
|
| 55 |
+
|
| 56 |
+
<div id="analysisInputSummary" class="summary-box hidden"></div>
|
| 57 |
+
|
| 58 |
+
<div class="workflow-actions">
|
| 59 |
+
<a class="secondary-btn as-link" href="index.html#home">Back to Home</a>
|
| 60 |
+
<button id="runUploadBtn" class="submit-btn" type="submit">Upload and Analyze</button>
|
| 61 |
+
</div>
|
| 62 |
+
</form>
|
| 63 |
+
|
| 64 |
+
<div id="loadingState" class="loading-panel hidden" aria-live="polite">
|
| 65 |
+
<div class="spinner"></div>
|
| 66 |
+
<p>Analyzing document. Please wait...</p>
|
| 67 |
+
</div>
|
| 68 |
+
|
| 69 |
+
<p id="uploadMessage" class="message" aria-live="polite"></p>
|
| 70 |
+
</section>
|
| 71 |
+
</main>
|
| 72 |
+
|
| 73 |
+
<script src="app.js"></script>
|
| 74 |
+
</body>
|
| 75 |
+
</html>
|
frontend/workflow.html
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta http-equiv="refresh" content="0;url=upload.html" />
|
| 6 |
+
<title>Redirecting</title>
|
| 7 |
+
</head>
|
| 8 |
+
<body>
|
| 9 |
+
<p>Redirecting to upload page...</p>
|
| 10 |
+
</body>
|
| 11 |
+
</html>
|
ingestion/docx_reader.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from docx import Document
|
| 2 |
+
|
| 3 |
+
def extract_text_from_docx(path):
|
| 4 |
+
doc = Document(path)
|
| 5 |
+
return "\n".join(p.text for p in doc.paragraphs)
|
ingestion/pdf_reader.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdfplumber
|
| 2 |
+
|
| 3 |
+
def extract_text_from_pdf(path):
|
| 4 |
+
text = ""
|
| 5 |
+
with pdfplumber.open(path) as pdf:
|
| 6 |
+
for page in pdf.pages:
|
| 7 |
+
if page.extract_text():
|
| 8 |
+
text += page.extract_text() + "\n"
|
| 9 |
+
return text
|
main.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ingestion.pdf_reader import extract_text_from_pdf
|
| 2 |
+
from preprocessing.clause_extraction import extract_clauses
|
| 3 |
+
from embeddings.sbert_encoder import generate_embeddings
|
| 4 |
+
from storage.faiss_index import create_faiss_index
|
| 5 |
+
from analysis.similarity_search import get_similar
|
| 6 |
+
from analysis.common_analyzer import analyze_pair
|
| 7 |
+
from output.report_generator import generate_report
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
# Load document
|
| 11 |
+
text = extract_text_from_pdf("data/sample_docs/policy.pdf")
|
| 12 |
+
|
| 13 |
+
# Clause extraction
|
| 14 |
+
clauses = extract_clauses(text)
|
| 15 |
+
|
| 16 |
+
# Embeddings
|
| 17 |
+
embeddings = generate_embeddings(clauses)
|
| 18 |
+
index = create_faiss_index(embeddings)
|
| 19 |
+
|
| 20 |
+
results = []
|
| 21 |
+
|
| 22 |
+
for i, emb in enumerate(embeddings):
|
| 23 |
+
idxs, dists = get_similar(index, emb)
|
| 24 |
+
for j, dist in zip(idxs, dists):
|
| 25 |
+
if i == j:
|
| 26 |
+
continue
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
similarity = 1 / (1 + dist)
|
| 30 |
+
|
| 31 |
+
# Use new Common Analyzer (Centralized Logic)
|
| 32 |
+
issue_type, score = analyze_pair(clauses[i]["text"], clauses[j]["text"], similarity)
|
| 33 |
+
|
| 34 |
+
if issue_type:
|
| 35 |
+
results.append({
|
| 36 |
+
"type": issue_type,
|
| 37 |
+
"confidence": score,
|
| 38 |
+
"clause_1": clauses[i]["text"],
|
| 39 |
+
"clause_2": clauses[j]["text"]
|
| 40 |
+
})
|
| 41 |
+
|
| 42 |
+
generate_report(results)
|
| 43 |
+
print("✅ Analysis completed. Report generated.")
|
preprocessing/clause_extraction.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
def extract_clauses(text_data):
|
| 4 |
+
"""
|
| 5 |
+
Extracts clauses from text chunks with location data.
|
| 6 |
+
Args:
|
| 7 |
+
text_data: List[Dict] with 'text' and 'page' keys.
|
| 8 |
+
Returns:
|
| 9 |
+
List[Dict]: [{'id', 'text', 'page', 'line'}]
|
| 10 |
+
"""
|
| 11 |
+
unique_clauses = []
|
| 12 |
+
seen = set()
|
| 13 |
+
clause_id = 0
|
| 14 |
+
|
| 15 |
+
for chunk in text_data:
|
| 16 |
+
raw_text = chunk.get("text", "")
|
| 17 |
+
page_num = chunk.get("page", 1)
|
| 18 |
+
|
| 19 |
+
# Split into lines first to track line numbers roughly
|
| 20 |
+
# Or split by sentence and find position.
|
| 21 |
+
|
| 22 |
+
# Simple approach: Split by sentence, then find approximate line number in chunk
|
| 23 |
+
sentences = re.split(r'(?<=[.!?])\s+', raw_text)
|
| 24 |
+
|
| 25 |
+
# Helper to find line number
|
| 26 |
+
def get_line_number(substring, source_text):
|
| 27 |
+
idx = source_text.find(substring)
|
| 28 |
+
if idx == -1: return 1
|
| 29 |
+
return source_text[:idx].count('\n') + 1
|
| 30 |
+
|
| 31 |
+
for s in sentences:
|
| 32 |
+
s_clean = s.strip()
|
| 33 |
+
if len(s_clean) > 30 and s_clean not in seen:
|
| 34 |
+
seen.add(s_clean)
|
| 35 |
+
|
| 36 |
+
# Estimate line number within the page
|
| 37 |
+
line_offset = get_line_number(s_clean, raw_text)
|
| 38 |
+
|
| 39 |
+
unique_clauses.append({
|
| 40 |
+
"id": clause_id,
|
| 41 |
+
"text": s_clean,
|
| 42 |
+
"page": page_num,
|
| 43 |
+
"line": line_offset
|
| 44 |
+
})
|
| 45 |
+
clause_id += 1
|
| 46 |
+
|
| 47 |
+
return unique_clauses
|
preprocessing/text_extractor.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import pdfplumber
|
| 3 |
+
import docx
|
| 4 |
+
import io
|
| 5 |
+
|
| 6 |
+
def extract_text_from_file(file_obj, file_type):
|
| 7 |
+
"""
|
| 8 |
+
Extracts text from various file formats with page/location tracking.
|
| 9 |
+
Args:
|
| 10 |
+
file_obj: The uploaded file object (bytes).
|
| 11 |
+
file_type: 'pdf', 'docx', or 'txt'.
|
| 12 |
+
Returns:
|
| 13 |
+
List[Dict]: List of {'text': str, 'page': int}
|
| 14 |
+
"""
|
| 15 |
+
extracted_data = []
|
| 16 |
+
try:
|
| 17 |
+
if file_type == "pdf":
|
| 18 |
+
with pdfplumber.open(file_obj) as pdf:
|
| 19 |
+
for i, page in enumerate(pdf.pages):
|
| 20 |
+
page_text = page.extract_text()
|
| 21 |
+
if page_text:
|
| 22 |
+
extracted_data.append({
|
| 23 |
+
"text": page_text,
|
| 24 |
+
"page": i + 1
|
| 25 |
+
})
|
| 26 |
+
|
| 27 |
+
elif file_type == "docx":
|
| 28 |
+
doc = docx.Document(file_obj)
|
| 29 |
+
# DOCX doesn't have strict pages, so we'll treat paragraphs/sections
|
| 30 |
+
# as a stream. We'll mark it as Page 1 for now, or maybe
|
| 31 |
+
# increment 'page' every N paragraphs to simulate flow?
|
| 32 |
+
# Better: Return logical sections.
|
| 33 |
+
full_text = ""
|
| 34 |
+
for para in doc.paragraphs:
|
| 35 |
+
full_text += para.text + "\n"
|
| 36 |
+
|
| 37 |
+
extracted_data.append({
|
| 38 |
+
"text": full_text,
|
| 39 |
+
"page": 1 # DOCX treated as single continuous flow unless paginated
|
| 40 |
+
})
|
| 41 |
+
|
| 42 |
+
elif file_type == "txt":
|
| 43 |
+
# Assuming utf-8 encoding
|
| 44 |
+
text = file_obj.read().decode("utf-8")
|
| 45 |
+
extracted_data.append({
|
| 46 |
+
"text": text,
|
| 47 |
+
"page": 1
|
| 48 |
+
})
|
| 49 |
+
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Error extracting text: {e}")
|
| 52 |
+
return []
|
| 53 |
+
|
| 54 |
+
return extracted_data
|
reproduce_issue.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sentence_transformers import SentenceTransformer, util
|
| 5 |
+
|
| 6 |
+
sys.path.append(os.getcwd())
|
| 7 |
+
try:
|
| 8 |
+
from analysis.common_analyzer import analyze_pair
|
| 9 |
+
from preprocessing.clause_extraction import extract_clauses
|
| 10 |
+
except ImportError:
|
| 11 |
+
# Handle case where run from root
|
| 12 |
+
sys.path.append(os.path.join(os.getcwd(), 'analysis'))
|
| 13 |
+
sys.path.append(os.path.join(os.getcwd(), 'preprocessing'))
|
| 14 |
+
from analysis.common_analyzer import analyze_pair
|
| 15 |
+
from preprocessing.clause_extraction import extract_clauses
|
| 16 |
+
|
| 17 |
+
def test_reproduction():
|
| 18 |
+
print("--- Section 1: Core Logic Test ---")
|
| 19 |
+
t1 = "Audit reports must be retained for a minimum of three (3) years."
|
| 20 |
+
t2 = "Audit reports shall be deleted after one (1) year to reduce storage overhead."
|
| 21 |
+
|
| 22 |
+
print(f"Text 1: {t1}")
|
| 23 |
+
print(f"Text 2: {t2}")
|
| 24 |
+
|
| 25 |
+
# 1. Calculate Similarity
|
| 26 |
+
print("Loading embedding model...")
|
| 27 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 28 |
+
e1 = model.encode(t1)
|
| 29 |
+
e2 = model.encode(t2)
|
| 30 |
+
|
| 31 |
+
sim = util.cos_sim(e1, e2).item()
|
| 32 |
+
print(f"Similarity Score: {sim:.4f}")
|
| 33 |
+
|
| 34 |
+
# 2. Test analyze_pair
|
| 35 |
+
print("Running analyze_pair...")
|
| 36 |
+
label, conf, reason = analyze_pair(t1, t2, sim)
|
| 37 |
+
print(f"Result: Label={label}, Conf={conf}, Reason={reason}")
|
| 38 |
+
|
| 39 |
+
if label == "CANDIDATE":
|
| 40 |
+
print("!!! PASSED Phase 1: ACCEPTED as CANDIDATE")
|
| 41 |
+
|
| 42 |
+
# 3. Test NLI
|
| 43 |
+
from analysis.nli_verifier import NLIVerifier
|
| 44 |
+
print("\nRunning NLI Verification (Phase 2)...")
|
| 45 |
+
verifier = NLIVerifier()
|
| 46 |
+
is_contra, nli_conf, nli_label = verifier.predict(t1, t2)
|
| 47 |
+
print(f"NLI Result: IsContra={is_contra}, Conf={nli_conf}, Label={nli_label}")
|
| 48 |
+
|
| 49 |
+
elif label:
|
| 50 |
+
print(f"!!! PASSED Phase 1: ACCEPTED as {label} (No NLI needed usually, but logic might vary)")
|
| 51 |
+
else:
|
| 52 |
+
print("!!! PASSED Phase 1: REJECTED (None)")
|
| 53 |
+
|
| 54 |
+
print("\n--- Section 2: Pipeline & Metadata Test ---")
|
| 55 |
+
mock_text = [
|
| 56 |
+
{"text": "Section 1. This is a test clause on page 1.", "page": 1},
|
| 57 |
+
{"text": "Section 2. This is another clause on page 2.", "page": 2}
|
| 58 |
+
]
|
| 59 |
+
print("Testing extract_clauses with structured input...")
|
| 60 |
+
clauses = extract_clauses(mock_text)
|
| 61 |
+
if len(clauses) > 0 and 'page' in clauses[0] and 'line' in clauses[0]:
|
| 62 |
+
print(f"SUCCESS: Extracted {len(clauses)} clauses with metadata.")
|
| 63 |
+
print(f"Sample: {clauses[0]}")
|
| 64 |
+
else:
|
| 65 |
+
print("FAIL: Metadata extraction failed.")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
test_reproduction()
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pdfplumber
|
| 2 |
+
python-docx
|
| 3 |
+
spacy
|
| 4 |
+
sentence-transformers
|
| 5 |
+
faiss-cpu
|
| 6 |
+
numpy
|
| 7 |
+
streamlit
|
| 8 |
+
transformers
|
| 9 |
+
torch
|
| 10 |
+
huggingface_hub
|
| 11 |
+
reportlab
|
storage/faiss_index.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faiss
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def create_faiss_index(embeddings):
|
| 5 |
+
dim = embeddings.shape[1]
|
| 6 |
+
index = faiss.IndexFlatL2(dim)
|
| 7 |
+
index.add(np.array(embeddings))
|
| 8 |
+
return index
|
ui/app.py
ADDED
|
@@ -0,0 +1,871 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
import importlib
|
| 6 |
+
import json
|
| 7 |
+
import base64
|
| 8 |
+
import re
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import plotly.express as px
|
| 12 |
+
import streamlit as st
|
| 13 |
+
|
| 14 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 15 |
+
|
| 16 |
+
from preprocessing.text_extractor import extract_text_from_file
|
| 17 |
+
from preprocessing.clause_extraction import extract_clauses
|
| 18 |
+
from embeddings.sbert_encoder import generate_embeddings
|
| 19 |
+
from storage.faiss_index import create_faiss_index
|
| 20 |
+
from analysis.similarity_search import get_similar
|
| 21 |
+
|
| 22 |
+
import analysis.common_analyzer
|
| 23 |
+
importlib.reload(analysis.common_analyzer)
|
| 24 |
+
from analysis.common_analyzer import analyze_pair
|
| 25 |
+
|
| 26 |
+
from analysis.nli_verifier import NLIVerifier
|
| 27 |
+
from analysis.llama_legal_verifier import LlamaLegalVerifier
|
| 28 |
+
from output.pdf_generator import generate_pdf_report
|
| 29 |
+
from auth.user_store import authenticate_user, create_user
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
APP_TITLE = "Legal Semantic Integrity"
|
| 33 |
+
DEFAULT_MODEL_PATH = "merged_tinyllama_instruction"
|
| 34 |
+
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def init_state():
|
| 38 |
+
st.session_state.setdefault("is_authenticated", False)
|
| 39 |
+
st.session_state.setdefault("username", "")
|
| 40 |
+
st.session_state.setdefault("analysis_done", False)
|
| 41 |
+
st.session_state.setdefault("results", [])
|
| 42 |
+
st.session_state.setdefault("line_issues", [])
|
| 43 |
+
st.session_state.setdefault("uploaded_name", "")
|
| 44 |
+
st.session_state.setdefault("uploaded_ext", "")
|
| 45 |
+
st.session_state.setdefault("uploaded_bytes", b"")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _extract_party_name(text: str, role: str) -> str:
|
| 49 |
+
"""
|
| 50 |
+
Try to extract a nearby party name for vendor/vendee from clause text.
|
| 51 |
+
Falls back to role-present markers when exact name is not available.
|
| 52 |
+
"""
|
| 53 |
+
if not text:
|
| 54 |
+
return "Not found"
|
| 55 |
+
|
| 56 |
+
t = " ".join(str(text).split())
|
| 57 |
+
role_l = role.lower()
|
| 58 |
+
|
| 59 |
+
# Pattern examples:
|
| 60 |
+
# "Vendor Mr. Ravi Kumar", "Vendee: Sita Devi", "the vendor, John Doe"
|
| 61 |
+
patterns = [
|
| 62 |
+
rf"\b{role_l}\b\s*[:,-]?\s*(?:mr\.?|mrs\.?|ms\.?)?\s*([A-Z][A-Za-z.\s]{{2,60}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
|
| 63 |
+
rf"\bthe\s+{role_l}\b\s*[:,-]?\s*(?:is\s+)?(?:mr\.?|mrs\.?|ms\.?)?\s*([A-Z][A-Za-z.\s]{{2,60}}?)(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
for pat in patterns:
|
| 67 |
+
m = re.search(pat, t, flags=re.IGNORECASE)
|
| 68 |
+
if m:
|
| 69 |
+
name = " ".join(m.group(1).split())
|
| 70 |
+
# Filter generic captures like "hereinafter called"
|
| 71 |
+
if name and not re.search(r"hereinafter|called|referred|party|agreement", name, re.IGNORECASE):
|
| 72 |
+
return name[:80]
|
| 73 |
+
|
| 74 |
+
if re.search(rf"\b{role_l}\b", t, flags=re.IGNORECASE):
|
| 75 |
+
return f"{role.title()} mentioned (name not parsed)"
|
| 76 |
+
return "Not found"
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _clean_candidate_name(name: str) -> str:
|
| 80 |
+
name = re.sub(r"\s+", " ", str(name)).strip(" ,.;:-")
|
| 81 |
+
if not name:
|
| 82 |
+
return ""
|
| 83 |
+
banned = r"hereinafter|called|referred|party|agreement|vendor|vendee|purchaser|buyer|seller"
|
| 84 |
+
if re.search(banned, name, flags=re.IGNORECASE):
|
| 85 |
+
return ""
|
| 86 |
+
return name[:80]
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _extract_document_parties(text_data):
|
| 90 |
+
full_text = "\n".join(chunk.get("text", "") for chunk in (text_data or []))
|
| 91 |
+
compact = " ".join(full_text.split())
|
| 92 |
+
parties = {"Vendor": "Not found", "Vendee": "Not found"}
|
| 93 |
+
|
| 94 |
+
# Common legal intro patterns:
|
| 95 |
+
# "Mr. X ... hereinafter called the VENDOR"
|
| 96 |
+
# "Y ... hereinafter called the VENDEE"
|
| 97 |
+
role_patterns = {
|
| 98 |
+
"Vendor": [
|
| 99 |
+
r"(Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80}?)\s+(?:son of|wife of|daughter of|residing at|aged about|hereinafter)\b[^.]{0,120}\bvendor\b",
|
| 100 |
+
r"\bvendor\b\s*[:,-]?\s*(?:is\s+)?(?:Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80})(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
|
| 101 |
+
],
|
| 102 |
+
"Vendee": [
|
| 103 |
+
r"(Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80}?)\s+(?:son of|wife of|daughter of|residing at|aged about|hereinafter)\b[^.]{0,120}\bvendee\b",
|
| 104 |
+
r"\bvendee\b\s*[:,-]?\s*(?:is\s+)?(?:Mr\.?|Mrs\.?|Ms\.?)?\s*([A-Z][A-Za-z.\s]{2,80})(?=,|\.|;|\bson of\b|\bwife of\b|\bresiding\b|\baged\b|$)",
|
| 105 |
+
],
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
for role, patterns in role_patterns.items():
|
| 109 |
+
for pat in patterns:
|
| 110 |
+
m = re.search(pat, compact, flags=re.IGNORECASE)
|
| 111 |
+
if not m:
|
| 112 |
+
continue
|
| 113 |
+
candidate = m.group(2) if (m.lastindex or 0) >= 2 else m.group(1)
|
| 114 |
+
cleaned = _clean_candidate_name(candidate)
|
| 115 |
+
if cleaned:
|
| 116 |
+
parties[role] = cleaned
|
| 117 |
+
break
|
| 118 |
+
# Secondary fallback: explicit role in text without name
|
| 119 |
+
if parties[role] == "Not found" and re.search(rf"\b{role.lower()}\b", compact, flags=re.IGNORECASE):
|
| 120 |
+
parties[role] = f"{role} mentioned (name not parsed)"
|
| 121 |
+
|
| 122 |
+
return parties
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _extract_parties(text1: str, text2: str, doc_parties=None):
|
| 126 |
+
vendor = _extract_party_name(text1, "vendor")
|
| 127 |
+
if vendor == "Not found":
|
| 128 |
+
vendor = _extract_party_name(text2, "vendor")
|
| 129 |
+
|
| 130 |
+
vendee = _extract_party_name(text1, "vendee")
|
| 131 |
+
if vendee == "Not found":
|
| 132 |
+
vendee = _extract_party_name(text2, "vendee")
|
| 133 |
+
|
| 134 |
+
if doc_parties:
|
| 135 |
+
if vendor in ["Not found", "Vendor mentioned (name not parsed)"] and doc_parties.get("Vendor"):
|
| 136 |
+
vendor = doc_parties.get("Vendor")
|
| 137 |
+
if vendee in ["Not found", "Vendee mentioned (name not parsed)"] and doc_parties.get("Vendee"):
|
| 138 |
+
vendee = doc_parties.get("Vendee")
|
| 139 |
+
|
| 140 |
+
return vendor, vendee
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
@st.cache_resource
|
| 144 |
+
def load_verifier(backend: str, llama_model_path: str):
|
| 145 |
+
if backend == "llama":
|
| 146 |
+
return LlamaLegalVerifier(model_path=llama_model_path)
|
| 147 |
+
return NLIVerifier(model_name="cross-encoder/nli-distilroberta-base")
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def apply_theme():
|
| 151 |
+
st.markdown(
|
| 152 |
+
"""
|
| 153 |
+
<style>
|
| 154 |
+
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&display=swap');
|
| 155 |
+
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500&display=swap');
|
| 156 |
+
|
| 157 |
+
:root {
|
| 158 |
+
--bg-soft: #f6fbff;
|
| 159 |
+
--ink-900: #0b2f4a;
|
| 160 |
+
--ink-700: #21506f;
|
| 161 |
+
--accent-500: #0a84c6;
|
| 162 |
+
--accent-700: #005b88;
|
| 163 |
+
--mint-500: #2aa198;
|
| 164 |
+
--warn-500: #c57b00;
|
| 165 |
+
--danger-500: #c44736;
|
| 166 |
+
--card-border: #dbeaf4;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
html, body, [class*="css"] {
|
| 170 |
+
font-family: 'Space Grotesk', sans-serif;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.stApp {
|
| 174 |
+
background:
|
| 175 |
+
radial-gradient(900px 420px at -15% -25%, #d7f0ff 0%, rgba(215,240,255,0) 62%),
|
| 176 |
+
radial-gradient(900px 420px at 115% -20%, #fff2d8 0%, rgba(255,242,216,0) 62%),
|
| 177 |
+
linear-gradient(180deg, #f8fcff 0%, #ffffff 55%);
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
.hero {
|
| 181 |
+
border: 1px solid var(--card-border);
|
| 182 |
+
background: linear-gradient(145deg, #f0f8ff 0%, #fffdf8 95%);
|
| 183 |
+
border-radius: 18px;
|
| 184 |
+
padding: 20px 22px;
|
| 185 |
+
margin-bottom: 14px;
|
| 186 |
+
box-shadow: 0 10px 24px rgba(9, 59, 102, 0.07);
|
| 187 |
+
animation: fadeIn .45s ease-out;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
.hero h2 {
|
| 191 |
+
margin: 0;
|
| 192 |
+
color: var(--ink-900);
|
| 193 |
+
letter-spacing: .2px;
|
| 194 |
+
font-weight: 700;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.hero p {
|
| 198 |
+
margin: 8px 0 0 0;
|
| 199 |
+
color: var(--ink-700);
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.step {
|
| 203 |
+
border-left: 4px solid var(--accent-500);
|
| 204 |
+
background: #ffffff;
|
| 205 |
+
border-radius: 8px;
|
| 206 |
+
padding: 8px 12px;
|
| 207 |
+
margin-bottom: 8px;
|
| 208 |
+
font-weight: 500;
|
| 209 |
+
color: #12344d;
|
| 210 |
+
box-shadow: 0 6px 16px rgba(12, 53, 88, 0.05);
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.mini-card {
|
| 214 |
+
border: 1px solid var(--card-border);
|
| 215 |
+
border-radius: 14px;
|
| 216 |
+
background: #ffffff;
|
| 217 |
+
padding: 14px 14px;
|
| 218 |
+
margin-bottom: 10px;
|
| 219 |
+
box-shadow: 0 6px 16px rgba(12, 53, 88, 0.04);
|
| 220 |
+
animation: fadeIn .55s ease-out;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
.mini-label {
|
| 224 |
+
color: #43627c;
|
| 225 |
+
font-size: 0.78rem;
|
| 226 |
+
letter-spacing: .02em;
|
| 227 |
+
text-transform: uppercase;
|
| 228 |
+
margin-bottom: 6px;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
.mini-value {
|
| 232 |
+
color: #082d48;
|
| 233 |
+
font-size: 1.45rem;
|
| 234 |
+
font-weight: 700;
|
| 235 |
+
line-height: 1.2;
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
.mono {
|
| 239 |
+
font-family: 'IBM Plex Mono', monospace;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
.tag {
|
| 243 |
+
display: inline-block;
|
| 244 |
+
border-radius: 999px;
|
| 245 |
+
padding: 5px 10px;
|
| 246 |
+
font-size: 0.75rem;
|
| 247 |
+
font-weight: 600;
|
| 248 |
+
margin-right: 6px;
|
| 249 |
+
margin-top: 5px;
|
| 250 |
+
border: 1px solid;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.tag-info { color: var(--accent-700); border-color: #b7def4; background: #ecf7ff; }
|
| 254 |
+
.tag-ok { color: #186b64; border-color: #bceae5; background: #ecfffc; }
|
| 255 |
+
.tag-warn { color: #8c5c00; border-color: #f2d9a4; background: #fff7e8; }
|
| 256 |
+
.tag-risk { color: #9f3124; border-color: #efb5ad; background: #fff1ee; }
|
| 257 |
+
|
| 258 |
+
[data-testid="stDataFrame"] div[role="table"] {
|
| 259 |
+
border-radius: 12px;
|
| 260 |
+
border: 1px solid #d6e8f4;
|
| 261 |
+
overflow: hidden;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
@keyframes fadeIn {
|
| 265 |
+
from { opacity: 0; transform: translateY(8px); }
|
| 266 |
+
to { opacity: 1; transform: translateY(0); }
|
| 267 |
+
}
|
| 268 |
+
</style>
|
| 269 |
+
""",
|
| 270 |
+
unsafe_allow_html=True,
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def login_page():
|
| 275 |
+
col_intro, col_auth = st.columns([1.15, 1], gap="large")
|
| 276 |
+
with col_intro:
|
| 277 |
+
st.markdown(
|
| 278 |
+
"""
|
| 279 |
+
<div class="hero">
|
| 280 |
+
<h2>Legal Semantic Integrity Portal</h2>
|
| 281 |
+
<p>Interactive contract diagnostics with line-level visibility and legal conflict tracing.</p>
|
| 282 |
+
<div>
|
| 283 |
+
<span class="tag tag-info">Step 1: Secure Login</span>
|
| 284 |
+
<span class="tag tag-ok">Step 2: Upload & Analyze</span>
|
| 285 |
+
<span class="tag tag-warn">Step 3: Error-Line Dashboard</span>
|
| 286 |
+
</div>
|
| 287 |
+
</div>
|
| 288 |
+
<div class="mini-card">
|
| 289 |
+
<div class="mini-label">What You Get</div>
|
| 290 |
+
<div class="mono">Duplicate clauses, legal contradictions, and exact page/line issue map.</div>
|
| 291 |
+
</div>
|
| 292 |
+
""",
|
| 293 |
+
unsafe_allow_html=True,
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
with col_auth:
|
| 297 |
+
st.markdown('<div class="step">Step 1 of 3: Login</div>', unsafe_allow_html=True)
|
| 298 |
+
tab_login, tab_signup = st.tabs(["Sign In", "Create Account"])
|
| 299 |
+
|
| 300 |
+
with tab_login:
|
| 301 |
+
with st.form("login_form", clear_on_submit=False):
|
| 302 |
+
username = st.text_input("Username")
|
| 303 |
+
password = st.text_input("Password", type="password")
|
| 304 |
+
submit = st.form_submit_button("Login")
|
| 305 |
+
|
| 306 |
+
if submit:
|
| 307 |
+
ok, message = authenticate_user(username, password)
|
| 308 |
+
if ok:
|
| 309 |
+
st.session_state.is_authenticated = True
|
| 310 |
+
st.session_state.username = username.strip().lower()
|
| 311 |
+
st.success(message)
|
| 312 |
+
st.rerun()
|
| 313 |
+
else:
|
| 314 |
+
st.error(message)
|
| 315 |
+
|
| 316 |
+
with tab_signup:
|
| 317 |
+
with st.form("signup_form", clear_on_submit=True):
|
| 318 |
+
new_username = st.text_input("New Username")
|
| 319 |
+
new_password = st.text_input("New Password", type="password")
|
| 320 |
+
confirm_password = st.text_input("Confirm Password", type="password")
|
| 321 |
+
create_submit = st.form_submit_button("Create Account")
|
| 322 |
+
|
| 323 |
+
if create_submit:
|
| 324 |
+
if new_password != confirm_password:
|
| 325 |
+
st.error("Passwords do not match.")
|
| 326 |
+
else:
|
| 327 |
+
ok, message = create_user(new_username, new_password)
|
| 328 |
+
if ok:
|
| 329 |
+
st.success(message)
|
| 330 |
+
else:
|
| 331 |
+
st.error(message)
|
| 332 |
+
|
| 333 |
+
st.caption("Local accounts are saved in data/users.db")
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def run_analysis(uploaded_file, sensitivity: float, backend: str, llama_model_path: str):
|
| 337 |
+
file_ext = uploaded_file.name.split(".")[-1].lower()
|
| 338 |
+
|
| 339 |
+
with st.spinner("Extracting text..."):
|
| 340 |
+
text_data = extract_text_from_file(uploaded_file, file_ext)
|
| 341 |
+
|
| 342 |
+
if not text_data:
|
| 343 |
+
st.error("Could not extract text from this file.")
|
| 344 |
+
return [], []
|
| 345 |
+
|
| 346 |
+
with st.spinner("Extracting clauses..."):
|
| 347 |
+
clauses = extract_clauses(text_data)
|
| 348 |
+
doc_parties = _extract_document_parties(text_data)
|
| 349 |
+
|
| 350 |
+
if not clauses:
|
| 351 |
+
st.warning("No valid clauses were detected.")
|
| 352 |
+
return [], []
|
| 353 |
+
|
| 354 |
+
with st.spinner("Building semantic index..."):
|
| 355 |
+
embeddings = generate_embeddings(clauses)
|
| 356 |
+
index = create_faiss_index(embeddings)
|
| 357 |
+
|
| 358 |
+
resolved_model_path = Path(llama_model_path)
|
| 359 |
+
if not resolved_model_path.is_absolute():
|
| 360 |
+
resolved_model_path = PROJECT_ROOT / resolved_model_path
|
| 361 |
+
verifier = load_verifier(backend=backend, llama_model_path=str(resolved_model_path))
|
| 362 |
+
|
| 363 |
+
results = []
|
| 364 |
+
seen_pairs = set()
|
| 365 |
+
|
| 366 |
+
progress = st.progress(0)
|
| 367 |
+
total = len(embeddings)
|
| 368 |
+
|
| 369 |
+
for i, emb in enumerate(embeddings):
|
| 370 |
+
idxs, dists = get_similar(index, emb, k=5)
|
| 371 |
+
|
| 372 |
+
for j, dist in zip(idxs, dists):
|
| 373 |
+
if i >= j:
|
| 374 |
+
continue
|
| 375 |
+
if (i, j) in seen_pairs:
|
| 376 |
+
continue
|
| 377 |
+
seen_pairs.add((i, j))
|
| 378 |
+
|
| 379 |
+
similarity = 1 / (1 + dist)
|
| 380 |
+
label, confidence, reason = analyze_pair(
|
| 381 |
+
clauses[i]["text"],
|
| 382 |
+
clauses[j]["text"],
|
| 383 |
+
similarity,
|
| 384 |
+
threshold=sensitivity,
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
if not label:
|
| 388 |
+
continue
|
| 389 |
+
|
| 390 |
+
result = {
|
| 391 |
+
"Label": label,
|
| 392 |
+
"Confidence": float(confidence),
|
| 393 |
+
"Reason": reason,
|
| 394 |
+
"Clause 1": clauses[i]["text"],
|
| 395 |
+
"Clause 2": clauses[j]["text"],
|
| 396 |
+
"Page 1": clauses[i]["page"],
|
| 397 |
+
"Line 1": clauses[i]["line"],
|
| 398 |
+
"Page 2": clauses[j]["page"],
|
| 399 |
+
"Line 2": clauses[j]["line"],
|
| 400 |
+
"Location 1": f"Pg {clauses[i]['page']}, Ln {clauses[i]['line']}",
|
| 401 |
+
"Location 2": f"Pg {clauses[j]['page']}, Ln {clauses[j]['line']}",
|
| 402 |
+
}
|
| 403 |
+
vendor_name, vendee_name = _extract_parties(
|
| 404 |
+
result["Clause 1"], result["Clause 2"], doc_parties=doc_parties
|
| 405 |
+
)
|
| 406 |
+
result["Vendor"] = vendor_name
|
| 407 |
+
result["Vendee"] = vendee_name
|
| 408 |
+
|
| 409 |
+
if backend == "llama":
|
| 410 |
+
_, llm_conf, llm_label, llm_reason = verifier.predict(result["Clause 1"], result["Clause 2"])
|
| 411 |
+
else:
|
| 412 |
+
_, llm_conf, llm_label = verifier.predict(result["Clause 1"], result["Clause 2"])
|
| 413 |
+
llm_reason = f"NLI label: {llm_label}"
|
| 414 |
+
|
| 415 |
+
if llm_label == "Neutral":
|
| 416 |
+
# Do not erase strong rule-based findings just because LLM is neutral.
|
| 417 |
+
if result["Label"] in ["NUMERIC_INCONSISTENCY", "LEGAL_CONFLICT"]:
|
| 418 |
+
result["Reason"] = f"{result['Reason']} | LLM neutral review"
|
| 419 |
+
else:
|
| 420 |
+
result["Label"] = "NO_CONFLICT"
|
| 421 |
+
result["Reason"] = "LLM marked as neutral"
|
| 422 |
+
elif llm_label == "Entailment":
|
| 423 |
+
result["Label"] = "DUPLICATION"
|
| 424 |
+
result["Reason"] = "LLM marked as entailment"
|
| 425 |
+
elif llm_label == "Contradiction":
|
| 426 |
+
if result["Label"] in ["CANDIDATE", "QUALIFICATION"]:
|
| 427 |
+
result["Label"] = "LEGAL_CONFLICT"
|
| 428 |
+
result["Reason"] = llm_reason
|
| 429 |
+
|
| 430 |
+
result["Confidence"] = float(llm_conf)
|
| 431 |
+
results.append(result)
|
| 432 |
+
|
| 433 |
+
progress.progress((i + 1) / total)
|
| 434 |
+
|
| 435 |
+
progress.empty()
|
| 436 |
+
|
| 437 |
+
line_issues = []
|
| 438 |
+
for r in results:
|
| 439 |
+
if r["Label"] == "NO_CONFLICT":
|
| 440 |
+
continue
|
| 441 |
+
line_issues.append(
|
| 442 |
+
{
|
| 443 |
+
"Issue Type": r["Label"],
|
| 444 |
+
"Confidence": round(r["Confidence"], 4),
|
| 445 |
+
"Page": r["Page 1"],
|
| 446 |
+
"Line": r["Line 1"],
|
| 447 |
+
"Snippet": r["Clause 1"][:160],
|
| 448 |
+
"Reason": r["Reason"],
|
| 449 |
+
"Vendor": r.get("Vendor", "Not found"),
|
| 450 |
+
"Vendee": r.get("Vendee", "Not found"),
|
| 451 |
+
}
|
| 452 |
+
)
|
| 453 |
+
line_issues.append(
|
| 454 |
+
{
|
| 455 |
+
"Issue Type": r["Label"],
|
| 456 |
+
"Confidence": round(r["Confidence"], 4),
|
| 457 |
+
"Page": r["Page 2"],
|
| 458 |
+
"Line": r["Line 2"],
|
| 459 |
+
"Snippet": r["Clause 2"][:160],
|
| 460 |
+
"Reason": r["Reason"],
|
| 461 |
+
"Vendor": r.get("Vendor", "Not found"),
|
| 462 |
+
"Vendee": r.get("Vendee", "Not found"),
|
| 463 |
+
}
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
line_issues.sort(key=lambda item: (item["Page"], item["Line"]))
|
| 467 |
+
|
| 468 |
+
return results, line_issues
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def upload_page():
|
| 472 |
+
st.markdown(
|
| 473 |
+
"""
|
| 474 |
+
<div class="hero">
|
| 475 |
+
<h2>Upload And Scan</h2>
|
| 476 |
+
<p>Drop your legal document, choose model/backend, and run full semantic integrity analysis.</p>
|
| 477 |
+
</div>
|
| 478 |
+
""",
|
| 479 |
+
unsafe_allow_html=True,
|
| 480 |
+
)
|
| 481 |
+
st.markdown('<div class="step">Step 2 of 3: Upload Document</div>', unsafe_allow_html=True)
|
| 482 |
+
|
| 483 |
+
with st.sidebar:
|
| 484 |
+
st.header("Scan Settings")
|
| 485 |
+
scan_mode = st.radio(
|
| 486 |
+
"Select scan mode",
|
| 487 |
+
(
|
| 488 |
+
"Standard Scan (Recommended)",
|
| 489 |
+
"Deep Search (Fuzzy)",
|
| 490 |
+
"Strict (Duplicates Only)",
|
| 491 |
+
),
|
| 492 |
+
index=0,
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
if "Standard" in scan_mode:
|
| 496 |
+
sensitivity = 0.60
|
| 497 |
+
elif "Deep" in scan_mode:
|
| 498 |
+
sensitivity = 0.50
|
| 499 |
+
else:
|
| 500 |
+
sensitivity = 0.85
|
| 501 |
+
|
| 502 |
+
# Locked configuration requested by user:
|
| 503 |
+
# always use local fine-tuned Llama verifier and hide controls.
|
| 504 |
+
model_backend = "llama"
|
| 505 |
+
llama_model_path = DEFAULT_MODEL_PATH
|
| 506 |
+
st.caption("Verifier backend: llama (fixed)")
|
| 507 |
+
st.caption("Local model: merged_tinyllama_instruction (fixed)")
|
| 508 |
+
st.markdown(
|
| 509 |
+
f"""
|
| 510 |
+
<div class="mini-card">
|
| 511 |
+
<div class="mini-label">Active Mode</div>
|
| 512 |
+
<div class="mini-value">{scan_mode.split('(')[0].strip()}</div>
|
| 513 |
+
<div class="mono">Sensitivity: {sensitivity} | Backend: {model_backend}</div>
|
| 514 |
+
</div>
|
| 515 |
+
""",
|
| 516 |
+
unsafe_allow_html=True,
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
col_left, col_right = st.columns([1.35, 1], gap="large")
|
| 520 |
+
with col_left:
|
| 521 |
+
uploaded_file = st.file_uploader(
|
| 522 |
+
"Upload a legal document",
|
| 523 |
+
type=["pdf", "docx", "txt"],
|
| 524 |
+
help="Supported files: PDF, DOCX, TXT",
|
| 525 |
+
)
|
| 526 |
+
with col_right:
|
| 527 |
+
st.markdown(
|
| 528 |
+
"""
|
| 529 |
+
<div class="mini-card">
|
| 530 |
+
<div class="mini-label">Supported Inputs</div>
|
| 531 |
+
<div class="mono">PDF / DOCX / TXT</div>
|
| 532 |
+
</div>
|
| 533 |
+
<div class="mini-card">
|
| 534 |
+
<div class="mini-label">Output</div>
|
| 535 |
+
<div class="mono">Pair Findings + Error-Line Dashboard + PDF/JSON Export</div>
|
| 536 |
+
</div>
|
| 537 |
+
""",
|
| 538 |
+
unsafe_allow_html=True,
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
if uploaded_file is None:
|
| 542 |
+
st.info("Upload a file to continue.")
|
| 543 |
+
return
|
| 544 |
+
|
| 545 |
+
st.session_state.uploaded_name = uploaded_file.name
|
| 546 |
+
st.session_state.uploaded_ext = uploaded_file.name.split(".")[-1].lower()
|
| 547 |
+
st.session_state.uploaded_bytes = uploaded_file.getvalue()
|
| 548 |
+
st.success(f"File ready: {uploaded_file.name}")
|
| 549 |
+
|
| 550 |
+
if st.button("Run Full Analysis", type="primary"):
|
| 551 |
+
try:
|
| 552 |
+
results, line_issues = run_analysis(
|
| 553 |
+
uploaded_file=uploaded_file,
|
| 554 |
+
sensitivity=sensitivity,
|
| 555 |
+
backend=model_backend,
|
| 556 |
+
llama_model_path=llama_model_path,
|
| 557 |
+
)
|
| 558 |
+
st.session_state.results = results
|
| 559 |
+
st.session_state.line_issues = line_issues
|
| 560 |
+
st.session_state.analysis_done = True
|
| 561 |
+
st.rerun()
|
| 562 |
+
except Exception as exc:
|
| 563 |
+
st.error(f"Analysis failed: {exc}")
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
def dashboard_page():
|
| 567 |
+
st.markdown(
|
| 568 |
+
"""
|
| 569 |
+
<div class="hero">
|
| 570 |
+
<h2>Interactive Findings Dashboard</h2>
|
| 571 |
+
<p>Trace conflicts by issue type, confidence, and exact line location.</p>
|
| 572 |
+
</div>
|
| 573 |
+
""",
|
| 574 |
+
unsafe_allow_html=True,
|
| 575 |
+
)
|
| 576 |
+
st.markdown('<div class="step">Step 3 of 3: Dashboard</div>', unsafe_allow_html=True)
|
| 577 |
+
|
| 578 |
+
results = st.session_state.results
|
| 579 |
+
line_issues = st.session_state.line_issues
|
| 580 |
+
|
| 581 |
+
if not results:
|
| 582 |
+
st.warning("No results found.")
|
| 583 |
+
return
|
| 584 |
+
|
| 585 |
+
df = pd.DataFrame(results)
|
| 586 |
+
df["Confidence"] = df["Confidence"].astype(float)
|
| 587 |
+
|
| 588 |
+
issues_df = df[~df["Label"].isin(["NO_CONFLICT"])].copy()
|
| 589 |
+
|
| 590 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 591 |
+
with col1:
|
| 592 |
+
st.markdown(
|
| 593 |
+
f"""
|
| 594 |
+
<div class="mini-card">
|
| 595 |
+
<div class="mini-label">User</div>
|
| 596 |
+
<div class="mini-value">{st.session_state.username or "N/A"}</div>
|
| 597 |
+
</div>
|
| 598 |
+
""",
|
| 599 |
+
unsafe_allow_html=True,
|
| 600 |
+
)
|
| 601 |
+
with col2:
|
| 602 |
+
st.markdown(
|
| 603 |
+
f"""
|
| 604 |
+
<div class="mini-card">
|
| 605 |
+
<div class="mini-label">Pairs Reviewed</div>
|
| 606 |
+
<div class="mini-value">{len(df)}</div>
|
| 607 |
+
</div>
|
| 608 |
+
""",
|
| 609 |
+
unsafe_allow_html=True,
|
| 610 |
+
)
|
| 611 |
+
with col3:
|
| 612 |
+
st.markdown(
|
| 613 |
+
f"""
|
| 614 |
+
<div class="mini-card">
|
| 615 |
+
<div class="mini-label">Detected Issues</div>
|
| 616 |
+
<div class="mini-value">{len(issues_df)}</div>
|
| 617 |
+
</div>
|
| 618 |
+
""",
|
| 619 |
+
unsafe_allow_html=True,
|
| 620 |
+
)
|
| 621 |
+
with col4:
|
| 622 |
+
max_conf = float(df["Confidence"].max()) if not df.empty else 0.0
|
| 623 |
+
st.markdown(
|
| 624 |
+
f"""
|
| 625 |
+
<div class="mini-card">
|
| 626 |
+
<div class="mini-label">Max Confidence</div>
|
| 627 |
+
<div class="mini-value">{max_conf:.2f}</div>
|
| 628 |
+
</div>
|
| 629 |
+
""",
|
| 630 |
+
unsafe_allow_html=True,
|
| 631 |
+
)
|
| 632 |
+
|
| 633 |
+
st.subheader("Issue Analytics Dashboard")
|
| 634 |
+
if line_issues:
|
| 635 |
+
line_df = pd.DataFrame(line_issues).copy()
|
| 636 |
+
line_df["Page"] = line_df["Page"].astype(int)
|
| 637 |
+
line_df["Line"] = line_df["Line"].astype(int)
|
| 638 |
+
line_df["Confidence"] = line_df["Confidence"].astype(float)
|
| 639 |
+
|
| 640 |
+
filter_col1, filter_col2, filter_col3 = st.columns([1.2, 1, 1], gap="large")
|
| 641 |
+
with filter_col1:
|
| 642 |
+
issue_types = sorted(line_df["Issue Type"].dropna().unique().tolist())
|
| 643 |
+
issue_sel = st.multiselect("Issue Types", issue_types, default=issue_types)
|
| 644 |
+
with filter_col2:
|
| 645 |
+
conf_min = st.slider("Min Confidence (analytics)", 0.0, 1.0, 0.0, 0.01)
|
| 646 |
+
page_min, page_max = int(line_df["Page"].min()), int(line_df["Page"].max())
|
| 647 |
+
if page_min == page_max:
|
| 648 |
+
st.caption(f"Single issue page: {page_min}")
|
| 649 |
+
page_sel = (page_min, page_max)
|
| 650 |
+
else:
|
| 651 |
+
page_sel = st.slider("Page Range (analytics)", page_min, page_max, (page_min, page_max))
|
| 652 |
+
with filter_col3:
|
| 653 |
+
vendors = ["All"] + sorted(line_df["Vendor"].dropna().astype(str).unique().tolist())
|
| 654 |
+
vendees = ["All"] + sorted(line_df["Vendee"].dropna().astype(str).unique().tolist())
|
| 655 |
+
vendor_sel = st.selectbox("Vendor", vendors, index=0)
|
| 656 |
+
vendee_sel = st.selectbox("Vendee", vendees, index=0)
|
| 657 |
+
|
| 658 |
+
filtered = line_df.copy()
|
| 659 |
+
if issue_sel:
|
| 660 |
+
filtered = filtered[filtered["Issue Type"].isin(issue_sel)]
|
| 661 |
+
filtered = filtered[filtered["Confidence"] >= conf_min]
|
| 662 |
+
filtered = filtered[(filtered["Page"] >= page_sel[0]) & (filtered["Page"] <= page_sel[1])]
|
| 663 |
+
if vendor_sel != "All":
|
| 664 |
+
filtered = filtered[filtered["Vendor"] == vendor_sel]
|
| 665 |
+
if vendee_sel != "All":
|
| 666 |
+
filtered = filtered[filtered["Vendee"] == vendee_sel]
|
| 667 |
+
|
| 668 |
+
total_issues = len(filtered)
|
| 669 |
+
conflict_rate = (len(issues_df) / len(df) * 100.0) if len(df) else 0.0
|
| 670 |
+
top_issue = filtered["Issue Type"].mode().iloc[0] if not filtered.empty else "N/A"
|
| 671 |
+
highest_risk_page = (
|
| 672 |
+
int(filtered.groupby("Page")["Confidence"].mean().idxmax()) if not filtered.empty else "N/A"
|
| 673 |
+
)
|
| 674 |
+
k1, k2, k3, k4 = st.columns(4)
|
| 675 |
+
k1.metric("Filtered Issues", total_issues)
|
| 676 |
+
k2.metric("Conflict Rate", f"{conflict_rate:.1f}%")
|
| 677 |
+
k3.metric("Top Issue Type", top_issue)
|
| 678 |
+
k4.metric("Highest Risk Page", highest_risk_page)
|
| 679 |
+
|
| 680 |
+
if filtered.empty:
|
| 681 |
+
st.warning("No analytics data for current filter.")
|
| 682 |
+
else:
|
| 683 |
+
pie_df = filtered["Issue Type"].value_counts().reset_index()
|
| 684 |
+
pie_df.columns = ["Issue Type", "Count"]
|
| 685 |
+
pie_fig = px.pie(
|
| 686 |
+
pie_df,
|
| 687 |
+
names="Issue Type",
|
| 688 |
+
values="Count",
|
| 689 |
+
title="Issue Type Split",
|
| 690 |
+
hole=0.35,
|
| 691 |
+
)
|
| 692 |
+
pie_fig.update_layout(margin=dict(l=10, r=10, t=50, b=10))
|
| 693 |
+
st.plotly_chart(pie_fig, use_container_width=True)
|
| 694 |
+
|
| 695 |
+
top_lines = filtered.sort_values(by=["Confidence"], ascending=False).head(10)
|
| 696 |
+
st.markdown("**Top 10 High-Risk Lines**")
|
| 697 |
+
st.dataframe(
|
| 698 |
+
top_lines[["Issue Type", "Confidence", "Page", "Line", "Vendor", "Vendee", "Snippet", "Reason"]],
|
| 699 |
+
use_container_width=True,
|
| 700 |
+
)
|
| 701 |
+
else:
|
| 702 |
+
st.info("No issue analytics data available.")
|
| 703 |
+
|
| 704 |
+
tab_findings, tab_line_map, tab_export = st.tabs(
|
| 705 |
+
["Findings Table", "Error Line Map", "Export"]
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
+
with tab_findings:
|
| 709 |
+
st.subheader("Detected Issues")
|
| 710 |
+
left, right = st.columns([1, 1.1])
|
| 711 |
+
with left:
|
| 712 |
+
display_mode = st.radio(
|
| 713 |
+
"Display mode",
|
| 714 |
+
["Issues Only", "All Analyzed Pairs"],
|
| 715 |
+
horizontal=True,
|
| 716 |
+
)
|
| 717 |
+
with right:
|
| 718 |
+
conf_threshold = st.slider("Minimum confidence", 0.0, 1.0, 0.0, 0.01)
|
| 719 |
+
|
| 720 |
+
display_df = issues_df if display_mode == "Issues Only" else df
|
| 721 |
+
display_df = display_df[display_df["Confidence"] >= conf_threshold]
|
| 722 |
+
|
| 723 |
+
if display_mode == "Issues Only" and display_df.empty:
|
| 724 |
+
st.warning("No issues match this filter.")
|
| 725 |
+
st.info("Try lower confidence or switch to 'All Analyzed Pairs'.")
|
| 726 |
+
elif display_df.empty:
|
| 727 |
+
st.info("No analyzed pairs match this filter.")
|
| 728 |
+
else:
|
| 729 |
+
display_df = display_df.copy().reset_index(drop=True)
|
| 730 |
+
display_df.insert(0, "S.No", range(1, len(display_df) + 1))
|
| 731 |
+
cols = [
|
| 732 |
+
"S.No",
|
| 733 |
+
"Label",
|
| 734 |
+
"Confidence",
|
| 735 |
+
"Reason",
|
| 736 |
+
"Location 1",
|
| 737 |
+
"Location 2",
|
| 738 |
+
"Clause 1",
|
| 739 |
+
"Clause 2",
|
| 740 |
+
]
|
| 741 |
+
st.dataframe(display_df[cols], use_container_width=True)
|
| 742 |
+
|
| 743 |
+
with tab_line_map:
|
| 744 |
+
st.subheader("Error Line Dashboard")
|
| 745 |
+
if line_issues:
|
| 746 |
+
line_df = pd.DataFrame(line_issues)
|
| 747 |
+
labels = sorted(line_df["Issue Type"].dropna().unique().tolist())
|
| 748 |
+
selected = st.multiselect("Filter issue types", labels, default=labels)
|
| 749 |
+
page_min = int(line_df["Page"].min()) if not line_df.empty else 1
|
| 750 |
+
page_max = int(line_df["Page"].max()) if not line_df.empty else 1
|
| 751 |
+
if page_min == page_max:
|
| 752 |
+
st.caption(f"Only one page with issues: Page {page_min}")
|
| 753 |
+
page_range = (page_min, page_max)
|
| 754 |
+
else:
|
| 755 |
+
page_range = st.slider("Page range", page_min, page_max, (page_min, page_max))
|
| 756 |
+
|
| 757 |
+
if selected:
|
| 758 |
+
line_df = line_df[line_df["Issue Type"].isin(selected)]
|
| 759 |
+
line_df = line_df[(line_df["Page"] >= page_range[0]) & (line_df["Page"] <= page_range[1])]
|
| 760 |
+
|
| 761 |
+
st.dataframe(line_df, use_container_width=True)
|
| 762 |
+
|
| 763 |
+
st.markdown("**Issue Occurrence By Line With Parties**")
|
| 764 |
+
by_line = line_df.copy()
|
| 765 |
+
by_line = by_line.sort_values(by=["Page", "Line", "Confidence"], ascending=[True, True, False])
|
| 766 |
+
st.dataframe(
|
| 767 |
+
by_line[["Issue Type", "Page", "Line", "Vendor", "Vendee", "Confidence", "Reason"]],
|
| 768 |
+
use_container_width=True,
|
| 769 |
+
)
|
| 770 |
+
|
| 771 |
+
st.subheader("Jump To Error Line")
|
| 772 |
+
if not line_df.empty:
|
| 773 |
+
line_df = line_df.reset_index(drop=True)
|
| 774 |
+
line_df.insert(0, "Item", range(1, len(line_df) + 1))
|
| 775 |
+
line_df["Jump"] = line_df.apply(
|
| 776 |
+
lambda r: f"#{r['Item']} | Pg {int(r['Page'])}, Ln {int(r['Line'])} | {r['Issue Type']}",
|
| 777 |
+
axis=1,
|
| 778 |
+
)
|
| 779 |
+
selected_jump = st.selectbox("Select issue line", line_df["Jump"].tolist())
|
| 780 |
+
chosen = line_df[line_df["Jump"] == selected_jump].iloc[0]
|
| 781 |
+
|
| 782 |
+
c1, c2 = st.columns([1.1, 1], gap="large")
|
| 783 |
+
with c1:
|
| 784 |
+
st.markdown(
|
| 785 |
+
f"""
|
| 786 |
+
<div class="mini-card">
|
| 787 |
+
<div class="mini-label">Selected Line</div>
|
| 788 |
+
<div class="mini-value">Pg {int(chosen['Page'])} · Ln {int(chosen['Line'])}</div>
|
| 789 |
+
<div class="mono">{chosen['Issue Type']} | Confidence: {float(chosen['Confidence']):.2f}</div>
|
| 790 |
+
</div>
|
| 791 |
+
""",
|
| 792 |
+
unsafe_allow_html=True,
|
| 793 |
+
)
|
| 794 |
+
st.caption("Snippet")
|
| 795 |
+
st.code(str(chosen["Snippet"]), language="text")
|
| 796 |
+
st.caption("Reason")
|
| 797 |
+
st.write(str(chosen["Reason"]))
|
| 798 |
+
|
| 799 |
+
with c2:
|
| 800 |
+
is_pdf = st.session_state.uploaded_ext == "pdf"
|
| 801 |
+
if is_pdf and st.session_state.uploaded_bytes:
|
| 802 |
+
st.caption("PDF Preview (jumped to selected page)")
|
| 803 |
+
page_number = int(chosen["Page"])
|
| 804 |
+
pdf_b64 = base64.b64encode(st.session_state.uploaded_bytes).decode("utf-8")
|
| 805 |
+
pdf_html = f"""
|
| 806 |
+
<iframe
|
| 807 |
+
src="data:application/pdf;base64,{pdf_b64}#page={page_number}&zoom=110"
|
| 808 |
+
width="100%"
|
| 809 |
+
height="520"
|
| 810 |
+
style="border:1px solid #d6e8f4; border-radius: 10px;"
|
| 811 |
+
></iframe>
|
| 812 |
+
"""
|
| 813 |
+
st.markdown(pdf_html, unsafe_allow_html=True)
|
| 814 |
+
else:
|
| 815 |
+
st.info("Inline PDF preview is available for PDF uploads. Current file is not PDF.")
|
| 816 |
+
else:
|
| 817 |
+
st.info("No line-level issues to display.")
|
| 818 |
+
|
| 819 |
+
with tab_export:
|
| 820 |
+
st.subheader("Download Reports")
|
| 821 |
+
json_payload = json.dumps(results, indent=2)
|
| 822 |
+
st.download_button(
|
| 823 |
+
label="Download JSON Report",
|
| 824 |
+
data=json_payload,
|
| 825 |
+
file_name="semantic_integrity_report.json",
|
| 826 |
+
mime="application/json",
|
| 827 |
+
)
|
| 828 |
+
pdf_bytes = generate_pdf_report([r for r in results if r["Label"] != "NO_CONFLICT"])
|
| 829 |
+
st.download_button(
|
| 830 |
+
label="Download PDF Report",
|
| 831 |
+
data=pdf_bytes,
|
| 832 |
+
file_name="semantic_integrity_report.pdf",
|
| 833 |
+
mime="application/pdf",
|
| 834 |
+
)
|
| 835 |
+
|
| 836 |
+
if st.button("Analyze Another Document"):
|
| 837 |
+
st.session_state.analysis_done = False
|
| 838 |
+
st.session_state.results = []
|
| 839 |
+
st.session_state.line_issues = []
|
| 840 |
+
st.rerun()
|
| 841 |
+
|
| 842 |
+
|
| 843 |
+
def main():
|
| 844 |
+
st.set_page_config(page_title=APP_TITLE, layout="wide")
|
| 845 |
+
apply_theme()
|
| 846 |
+
init_state()
|
| 847 |
+
|
| 848 |
+
top_col1, top_col2 = st.columns([5, 1])
|
| 849 |
+
with top_col1:
|
| 850 |
+
st.title(APP_TITLE)
|
| 851 |
+
with top_col2:
|
| 852 |
+
if st.session_state.is_authenticated and st.button("Logout"):
|
| 853 |
+
st.session_state.is_authenticated = False
|
| 854 |
+
st.session_state.username = ""
|
| 855 |
+
st.session_state.analysis_done = False
|
| 856 |
+
st.session_state.results = []
|
| 857 |
+
st.session_state.line_issues = []
|
| 858 |
+
st.rerun()
|
| 859 |
+
|
| 860 |
+
if not st.session_state.is_authenticated:
|
| 861 |
+
login_page()
|
| 862 |
+
return
|
| 863 |
+
|
| 864 |
+
if not st.session_state.analysis_done:
|
| 865 |
+
upload_page()
|
| 866 |
+
else:
|
| 867 |
+
dashboard_page()
|
| 868 |
+
|
| 869 |
+
|
| 870 |
+
if __name__ == "__main__":
|
| 871 |
+
main()
|