Abrar55 commited on
Commit
08663d7
Β·
verified Β·
1 Parent(s): c9aa164

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +2044 -0
app.py ADDED
@@ -0,0 +1,2044 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CHEX - Document Intelligence
3
+ HuggingFace Spaces Gradio Demo β€” fully self-contained (no relative imports)
4
+
5
+ Tab 1: Analyze Contract β€” paste a contract, ask a question, get a structured answer
6
+ Tab 2: Benchmark Demo β€” side-by-side table showing base model hallucinations vs CHEX
7
+ Tab 3: Analyse Bank Statement β€” paste / upload a bank statement, get a summary + Q&A
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import csv
13
+ import datetime as _dt
14
+ import importlib.util
15
+ import io
16
+ import json
17
+ import os
18
+ import re
19
+ import tempfile
20
+ from enum import Enum
21
+ from pathlib import Path
22
+ from typing import Optional
23
+
24
+ import gradio as gr
25
+ from pydantic import BaseModel
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Schema (inlined from data/schema.py)
29
+ # ---------------------------------------------------------------------------
30
+
31
+ class Label(str, Enum):
32
+ GROUNDED = "GROUNDED"
33
+ ABSENT = "ABSENT"
34
+ CONTRADICTS_PRIOR = "CONTRADICTS_PRIOR"
35
+
36
+
37
+ class ModelOutput(BaseModel):
38
+ question: str
39
+ label: Label
40
+ answer: Optional[str] = None
41
+ citation: Optional[str] = None
42
+ reasoning: str
43
+
44
+
45
+ class BankStatementSummary(BaseModel):
46
+ total_credits: Optional[str] = None
47
+ total_debits: Optional[str] = None
48
+ largest_transaction: Optional[str] = None
49
+ recurring_payments: Optional[list[str]] = None
50
+ flags: Optional[list[str]] = None
51
+ raw_reasoning: str
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Prompt templates (inlined from training/prompt_template.py)
56
+ # ---------------------------------------------------------------------------
57
+
58
+ SYSTEM_PROMPT = """\
59
+ You are a contract analysis assistant specializing in detecting hallucinations \
60
+ and calibrated uncertainty. Given a contract text and a question about a specific \
61
+ clause, output a single JSON object with exactly these fields:
62
+
63
+ question : the question asked (copy verbatim)
64
+ label : one of GROUNDED, ABSENT, or CONTRADICTS_PRIOR
65
+ - GROUNDED : the information exists verbatim in the contract
66
+ - ABSENT : the contract does not contain this clause at all
67
+ - CONTRADICTS_PRIOR: the contract contains a clause but it deviates \
68
+ from standard legal terms (e.g., inverted obligations, non-standard timeframes)
69
+ answer : the answer text if GROUNDED or CONTRADICTS_PRIOR, null if ABSENT
70
+ citation : the exact verbatim span from the contract that supports the answer, \
71
+ null if ABSENT
72
+ reasoning : one sentence explaining your classification
73
+
74
+ Output ONLY the JSON object. No preamble, no markdown fences, no text outside the JSON.
75
+
76
+ ### Example 1 β€” GROUNDED
77
+
78
+ [CONTRACT]
79
+ This Software License Agreement ("Agreement") is entered into as of January 1, 2024, \
80
+ between TechVision Inc. ("Licensor") and GlobalCorp Ltd. ("Licensee"). The Agreement \
81
+ shall remain in effect for a period of two (2) years from the Effective Date, unless \
82
+ earlier terminated pursuant to Section 8. Licensor grants Licensee a non-exclusive, \
83
+ non-transferable license to use the Software solely for Licensee's internal business \
84
+ purposes.
85
+ [/CONTRACT]
86
+
87
+ Question: What is the duration of this agreement?
88
+
89
+ {"question": "What is the duration of this agreement?", "label": "GROUNDED", \
90
+ "answer": "Two years from the Effective Date", \
91
+ "citation": "remain in effect for a period of two (2) years from the Effective Date", \
92
+ "reasoning": "The contract explicitly specifies a two-year term starting from the Effective Date."}
93
+
94
+ ### Example 2 β€” ABSENT
95
+
96
+ [CONTRACT]
97
+ The Licensee shall pay a monthly fee of five hundred dollars ($500.00). Payment is due \
98
+ on the first business day of each calendar month. Late payments shall accrue interest \
99
+ at a rate of one and one-half percent (1.5%) per month. Licensee shall maintain \
100
+ accurate records of all uses of the Software.
101
+ [/CONTRACT]
102
+
103
+ Question: Does this agreement include a limitation of liability clause?
104
+
105
+ {"question": "Does this agreement include a limitation of liability clause?", \
106
+ "label": "ABSENT", "answer": null, "citation": null, \
107
+ "reasoning": "No limitation of liability clause appears anywhere in the provided contract text."}
108
+
109
+ ### Example 3 β€” CONTRADICTS_PRIOR
110
+
111
+ [CONTRACT]
112
+ This Non-Disclosure Agreement is made between AlphaTech Solutions ("Discloser") and \
113
+ Beta Dynamics Corp. ("Recipient"). The Recipient shall not disclose Confidential \
114
+ Information to any third party. NON-COMPETE: The Recipient shall engage in any \
115
+ business activity that competes with the Discloser's primary operations during the \
116
+ term and for a period of 24 months thereafter. The Recipient shall not take any \
117
+ steps to protect Discloser's trade secrets.
118
+ [/CONTRACT]
119
+
120
+ Question: Does this agreement restrict the Recipient from competing with the Discloser?
121
+
122
+ {"question": "Does this agreement restrict the Recipient from competing with the Discloser?", \
123
+ "label": "CONTRADICTS_PRIOR", \
124
+ "answer": "The non-compete clause has inverted obligations β€” it permits competition rather than prohibiting it", \
125
+ "citation": "The Recipient shall engage in any business activity that competes with the Discloser's primary operations", \
126
+ "reasoning": "The clause uses 'shall engage' instead of 'shall not engage', inverting the standard non-compete obligation."}
127
+ """
128
+
129
+ BANK_SYSTEM_PROMPT = """\
130
+ You are a financial analysis assistant specialising in bank statement review. \
131
+ Given a bank statement (plain text, CSV/Excel-derived, OFX/QFX-derived, or PDF-extracted) and either a \
132
+ summary request or a specific question, produce a single JSON object.
133
+
134
+ For SUMMARY mode (question is "SUMMARISE"):
135
+ Output a JSON object with exactly these fields:
136
+ total_credits : total money received (e.g. "Β£3,420.50") or null
137
+ total_debits : total money spent (e.g. "Β£2,105.30") or null
138
+ largest_transaction: description + amount of the single largest transaction or null
139
+ recurring_payments : list of detected recurring charges (e.g. ["Netflix Β£9.99", "Gym Β£35.00"]) or []
140
+ flags : list of unusual or suspicious items (e.g. ["Large cash withdrawal Β£800"]) or []
141
+ raw_reasoning : one sentence summarising your analysis
142
+
143
+ For Q&A mode (any other question), output a JSON object with exactly these fields:
144
+ question : the question asked (copy verbatim)
145
+ label : one of GROUNDED, ABSENT, or CONTRADICTS_PRIOR
146
+ answer : the answer text if GROUNDED or CONTRADICTS_PRIOR, null if ABSENT
147
+ citation : the exact verbatim span from the statement, null if ABSENT
148
+ reasoning : one sentence explaining your classification
149
+
150
+ Output ONLY the JSON object. No preamble, no markdown fences, no text outside the JSON.
151
+ """
152
+
153
+ STRICT_SUFFIX = (
154
+ "\n\nIMPORTANT: You must output ONLY a valid JSON object. "
155
+ "Do not include any text before or after the JSON."
156
+ )
157
+
158
+
159
+ def _build_contract_messages(contract_text: str, question: str) -> list[dict]:
160
+ return [
161
+ {"role": "system", "content": SYSTEM_PROMPT},
162
+ {"role": "user", "content": f"[CONTRACT]\n{contract_text}\n[/CONTRACT]\n\nQuestion: {question}"},
163
+ ]
164
+
165
+
166
+ def _build_bank_messages(statement_text: str, question: str) -> list[dict]:
167
+ return [
168
+ {"role": "system", "content": BANK_SYSTEM_PROMPT},
169
+ {"role": "user", "content": f"[STATEMENT]\n{statement_text}\n[/STATEMENT]\n\nQuestion: {question}"},
170
+ ]
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # JSON parsing helpers
175
+ # ---------------------------------------------------------------------------
176
+
177
+ def _extract_json_str(raw_text: str) -> str:
178
+ match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)?\}", raw_text, re.DOTALL)
179
+ if not match:
180
+ match = re.search(r"\{.*\}", raw_text, re.DOTALL)
181
+ if not match:
182
+ raise ValueError(f"No JSON object found in model output: {raw_text[:300]!r}")
183
+ return match.group()
184
+
185
+
186
+ def _parse_model_output(raw_text: str, question: str) -> ModelOutput:
187
+ json_str = _extract_json_str(raw_text)
188
+ return ModelOutput.model_validate_json(json_str)
189
+
190
+
191
+ def _parse_summary(raw_text: str) -> BankStatementSummary:
192
+ data = json.loads(_extract_json_str(raw_text))
193
+ return BankStatementSummary(
194
+ total_credits=data.get("total_credits"),
195
+ total_debits=data.get("total_debits"),
196
+ largest_transaction=data.get("largest_transaction"),
197
+ recurring_payments=data.get("recurring_payments") or [],
198
+ flags=data.get("flags") or [],
199
+ raw_reasoning=data.get("raw_reasoning", ""),
200
+ )
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Model loading
205
+ # ---------------------------------------------------------------------------
206
+
207
+ MLX_SERVER_URL = os.environ.get("MLX_SERVER_URL", "").rstrip("/")
208
+ SAMPLE_DIR = Path(__file__).parent / "sample_contracts"
209
+ STATEMENT_DIR = Path(__file__).parent / "sample_statements"
210
+
211
+ model_load_error: Optional[str] = None
212
+
213
+ if not MLX_SERVER_URL:
214
+ model_load_error = "MLX_SERVER_URL not set. Set it in Space secrets to your Mac's ngrok URL."
215
+ print(f"WARNING: {model_load_error}")
216
+ else:
217
+ print(f"MLX server configured at: {MLX_SERVER_URL}")
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # Inference helpers
222
+ # ---------------------------------------------------------------------------
223
+
224
+ MAX_CHARS = 32000 # rough character limit (~8k tokens) to keep requests fast
225
+
226
+
227
+ def _truncate(text: str) -> str:
228
+ if len(text) > MAX_CHARS:
229
+ print(f"WARNING: Text truncated from {len(text)} to {MAX_CHARS} chars.")
230
+ return text[:MAX_CHARS]
231
+ return text
232
+
233
+
234
+ def _apply_messages(messages: list[dict], strict: bool = False) -> list[dict]:
235
+ if strict:
236
+ messages = list(messages)
237
+ messages[-1] = dict(messages[-1])
238
+ messages[-1]["content"] += STRICT_SUFFIX
239
+ return messages
240
+
241
+
242
+ def _run_inference(messages: list[dict]) -> str:
243
+ import urllib.request
244
+ payload = json.dumps({
245
+ "messages": messages,
246
+ "max_tokens": 512,
247
+ "temperature": 0.0,
248
+ }).encode()
249
+ req = urllib.request.Request(
250
+ f"{MLX_SERVER_URL}/v1/chat/completions",
251
+ data=payload,
252
+ headers={"Content-Type": "application/json"},
253
+ method="POST",
254
+ )
255
+ with urllib.request.urlopen(req, timeout=120) as resp:
256
+ data = json.loads(resp.read())
257
+ return data["choices"][0]["message"]["content"]
258
+
259
+
260
+ # ---------------------------------------------------------------------------
261
+ # Sample contract content
262
+ # ---------------------------------------------------------------------------
263
+
264
+ def _read_sample(filename: str) -> str:
265
+ p = SAMPLE_DIR / filename
266
+ if p.exists():
267
+ return p.read_text(encoding="utf-8")
268
+ return f"[Sample contract '{filename}' not found. Place it in demo/sample_contracts/]"
269
+
270
+
271
+ SOFTWARE_LICENSE = _read_sample("software_license.txt")
272
+ NDA = _read_sample("nda.txt")
273
+ SERVICE_AGREEMENT = _read_sample("service_agreement.txt")
274
+
275
+ SAMPLE_QUESTIONS = {
276
+ "software_license.txt": "What is the limitation of liability in this agreement?",
277
+ "nda.txt": "Does this agreement include a non-compete clause?",
278
+ "service_agreement.txt": "Does this contract include a termination for convenience clause?",
279
+ }
280
+
281
+
282
+ def _read_sample_statement(filename: str) -> str:
283
+ p = STATEMENT_DIR / filename
284
+ if p.exists():
285
+ return p.read_text(encoding="utf-8")
286
+ return f"[Sample statement '{filename}' not found. Place it in demo/sample_statements/]"
287
+
288
+
289
+ SAMPLE_STATEMENT = _read_sample_statement("sample_statement.txt")
290
+
291
+
292
+ # ---------------------------------------------------------------------------
293
+ # Label badge HTML
294
+ # ---------------------------------------------------------------------------
295
+
296
+ _BADGE_CFG = {
297
+ "GROUNDED": ("#0f9d58", "rgba(34,197,94,0.10)", "rgba(34,197,94,0.28)", "βœ“"),
298
+ "ABSENT": ("#d23131", "rgba(239,68,68,0.09)", "rgba(239,68,68,0.28)", "βœ—"),
299
+ "CONTRADICTS_PRIOR": ("#b87800", "rgba(245,158,11,0.10)", "rgba(245,158,11,0.30)", "⚠"),
300
+ "N/A": ("#8a91a3", "rgba(139,145,163,0.10)","rgba(139,145,163,0.25)","β€”"),
301
+ "ERROR": ("#991b1b", "rgba(220,38,38,0.10)", "rgba(220,38,38,0.32)", "!"),
302
+ }
303
+
304
+
305
+ def format_label_html(label: str) -> str:
306
+ fg, bg, border, icon = _BADGE_CFG.get(label, _BADGE_CFG["N/A"])
307
+ display = "CONTRADICTS PRIOR" if label == "CONTRADICTS_PRIOR" else label
308
+ return (
309
+ f'<div style="display:inline-flex;align-items:center;gap:8px;'
310
+ f'padding:11px 16px;border-radius:10px;border:1px solid {border};'
311
+ f'background:{bg};color:{fg};font-family:\'Inter\',sans-serif;'
312
+ f'font-size:12.5px;font-weight:600;letter-spacing:0.02em;'
313
+ f'backdrop-filter:blur(10px);">'
314
+ f'<span style="width:14px;height:14px;display:grid;place-items:center;'
315
+ f'font-size:13px;">{icon}</span>'
316
+ f'<span>{display}</span></div>'
317
+ )
318
+
319
+
320
+ # ---------------------------------------------------------------------------
321
+ # Analysis handlers
322
+ # ---------------------------------------------------------------------------
323
+
324
+ def analyze_contract(contract_text: str, question: str) -> tuple[str, str, str, str]:
325
+ if not contract_text.strip():
326
+ return format_label_html("N/A"), "", "", "Please paste a contract above."
327
+ if not question.strip():
328
+ return format_label_html("N/A"), "", "", "Please enter a question."
329
+ if not MLX_SERVER_URL:
330
+ return (
331
+ format_label_html("N/A"),
332
+ "Model not loaded",
333
+ "",
334
+ f"Model failed to load: {model_load_error}.",
335
+ )
336
+
337
+ contract_text = _truncate(contract_text)
338
+ messages = _build_contract_messages(contract_text, question)
339
+
340
+ for attempt in range(2):
341
+ msgs = _apply_messages(messages, strict=(attempt == 1))
342
+ try:
343
+ raw = _run_inference(msgs)
344
+ result = _parse_model_output(raw, question)
345
+ label_html = format_label_html(result.label.value)
346
+ answer = result.answer or "(none β€” clause is absent or not applicable)"
347
+ citation = result.citation or "(none)"
348
+ return label_html, answer, citation, result.reasoning
349
+ except Exception as e:
350
+ if attempt == 0:
351
+ print(f" Parse attempt 1 failed ({e}). Retrying with stricter prompt...")
352
+ else:
353
+ print(f" Parse attempt 2 failed ({e}). Returning safe fallback.")
354
+
355
+ return (
356
+ format_label_html("ABSENT"),
357
+ "(none β€” clause is absent or not applicable)",
358
+ "(none)",
359
+ "Model output could not be parsed as valid JSON after two attempts.",
360
+ )
361
+
362
+
363
+ def _get_statement_text(
364
+ paste_text: str,
365
+ pdf_file,
366
+ pdf_password: str | None,
367
+ csv_file,
368
+ txt_file,
369
+ xlsx_file,
370
+ ofx_file,
371
+ ) -> tuple[str, str]:
372
+ # Backwards-compatible shim: treat "single statement" inputs as one item.
373
+ texts, errors = _get_statement_texts(
374
+ paste_text,
375
+ pdf_file,
376
+ pdf_password,
377
+ csv_file,
378
+ txt_file,
379
+ xlsx_file,
380
+ ofx_file,
381
+ )
382
+ if not texts:
383
+ return (
384
+ "",
385
+ errors[0]
386
+ if errors
387
+ else "Please paste a bank statement or upload a PDF / CSV / TXT / XLSX / OFX/QFX file."
388
+ )
389
+ return texts[0], ""
390
+
391
+
392
+ def _ensure_file_list(files) -> list:
393
+ if files is None:
394
+ return []
395
+ if isinstance(files, (list, tuple)):
396
+ return [f for f in files if f is not None]
397
+ return [files]
398
+
399
+
400
+ def _split_statements(paste_text: str) -> list[str]:
401
+ """
402
+ Split pasted content into multiple statements.
403
+
404
+ Delimiter: a line containing only `---` (3+ dashes), optionally surrounded by whitespace.
405
+ """
406
+ text = (paste_text or "").strip()
407
+ if not text:
408
+ return []
409
+ parts = re.split(r"(?m)^[ \t]*-{3,}[ \t]*$", text)
410
+ cleaned = [p.strip() for p in parts if p.strip()]
411
+ return cleaned if cleaned else [text]
412
+
413
+
414
+ def _get_statement_texts(
415
+ paste_text: str,
416
+ pdf_files,
417
+ pdf_password: str | None,
418
+ csv_files,
419
+ txt_files,
420
+ xlsx_files,
421
+ ofx_files,
422
+ ) -> tuple[list[str], list[str]]:
423
+ """
424
+ Extract statement text blocks from:
425
+ - pasted text (can contain multiple statements separated by `---`)
426
+ - uploaded PDFs (supports multiple)
427
+ - uploaded CSVs (supports multiple)
428
+ - uploaded TXT files (supports multiple)
429
+ - uploaded Excel (.xlsx) (supports multiple)
430
+ - uploaded OFX/QFX files (supports multiple)
431
+ """
432
+ statement_texts: list[str] = []
433
+ errors: list[str] = []
434
+
435
+ pdf_list = _ensure_file_list(pdf_files)
436
+ csv_list = _ensure_file_list(csv_files)
437
+ txt_list = _ensure_file_list(txt_files)
438
+ xlsx_list = _ensure_file_list(xlsx_files)
439
+ ofx_list = _ensure_file_list(ofx_files)
440
+
441
+ # PDFs
442
+ if pdf_list:
443
+ try:
444
+ if importlib.util.find_spec("pdfplumber") is None:
445
+ errors.append("pdfplumber not installed.")
446
+ else:
447
+ import pdfplumber
448
+ password = (pdf_password or "").strip()
449
+ for idx, pdf_file in enumerate(pdf_list):
450
+ try:
451
+ text_parts: list[str] = []
452
+ try:
453
+ with pdfplumber.open(
454
+ str(pdf_file),
455
+ password=password if password else "",
456
+ ) as pdf:
457
+ for page in pdf.pages:
458
+ t = page.extract_text()
459
+ if t:
460
+ text_parts.append(t)
461
+ except TypeError:
462
+ # Older pdfplumber versions may not accept `password=...`
463
+ with pdfplumber.open(str(pdf_file)) as pdf:
464
+ for page in pdf.pages:
465
+ t = page.extract_text()
466
+ if t:
467
+ text_parts.append(t)
468
+ text = "\n".join(text_parts).strip()
469
+ if not text:
470
+ errors.append(f"PDF #{idx+1} uploaded but no text could be extracted.")
471
+ else:
472
+ statement_texts.append(text)
473
+ except Exception as e:
474
+ msg = str(e).lower()
475
+ if "password" in msg or "encrypted" in msg or "decrypt" in msg:
476
+ errors.append(
477
+ f"PDF #{idx+1} is password-protected. Please enter the correct password."
478
+ )
479
+ else:
480
+ errors.append(f"PDF #{idx+1} extraction error: {e}")
481
+ except Exception as e:
482
+ errors.append(f"PDF extraction error: {e}")
483
+
484
+ # CSVs
485
+ if csv_list:
486
+ try:
487
+ import pandas as pd
488
+ except Exception:
489
+ if importlib.util.find_spec("pandas") is None:
490
+ errors.append("pandas not installed.")
491
+ else:
492
+ errors.append("CSV parsing error: pandas import failed.")
493
+ else:
494
+ for idx, csv_file in enumerate(csv_list):
495
+ try:
496
+ df = pd.read_csv(str(csv_file))
497
+ df.columns = [c.strip().lower() for c in df.columns]
498
+ lines: list[str] = []
499
+ for _, row in df.iterrows():
500
+ parts = [
501
+ str(v).strip()
502
+ for v in row.values
503
+ if str(v).strip() not in ("", "nan")
504
+ ]
505
+ lines.append(", ".join(parts))
506
+ statement_texts.append(
507
+ ", ".join(df.columns.tolist()) + "\n" + "\n".join(lines)
508
+ )
509
+ except Exception as e:
510
+ errors.append(f"CSV #{idx+1} parsing error: {e}")
511
+
512
+ # TXT
513
+ if txt_list:
514
+ for idx, txt_file in enumerate(txt_list):
515
+ try:
516
+ # Read best-effort encoding; then reuse the same delimiter splitting
517
+ # strategy as pasted input.
518
+ p = Path(str(txt_file))
519
+ content = p.read_text(encoding="utf-8", errors="replace")
520
+ parts = _split_statements(content)
521
+ if not parts:
522
+ errors.append(f"TXT #{idx+1} uploaded but no text could be read.")
523
+ else:
524
+ statement_texts.extend(parts)
525
+ except Exception as e:
526
+ errors.append(f"TXT #{idx+1} parsing error: {e}")
527
+
528
+ # XLSX (Excel)
529
+ if xlsx_list:
530
+ try:
531
+ import pandas as pd
532
+ except Exception:
533
+ if importlib.util.find_spec("pandas") is None:
534
+ errors.append("pandas not installed.")
535
+ else:
536
+ errors.append("Excel parsing error: pandas import failed.")
537
+ else:
538
+ for idx, xlsx_file in enumerate(xlsx_list):
539
+ try:
540
+ df = pd.read_excel(str(xlsx_file), sheet_name=0)
541
+ if df is None or df.empty:
542
+ errors.append(f"XLSX #{idx+1} uploaded but no rows were found.")
543
+ continue
544
+ df.columns = [str(c).strip().lower() for c in df.columns]
545
+ lines: list[str] = []
546
+ for _, row in df.iterrows():
547
+ parts = [
548
+ str(v).strip()
549
+ for v in row.values
550
+ if str(v).strip() not in ("", "nan", "NaN")
551
+ ]
552
+ lines.append(", ".join(parts))
553
+ statement_texts.append(
554
+ ", ".join(df.columns.tolist()) + "\n" + "\n".join(lines)
555
+ )
556
+ except Exception as e:
557
+ errors.append(f"XLSX #{idx+1} parsing error: {e}")
558
+
559
+ # OFX/QFX (lightweight tag extraction)
560
+ if ofx_list:
561
+ def _format_ofx_date(d: str) -> str:
562
+ d = (d or "").strip()
563
+ if len(d) == 8 and d.isdigit():
564
+ return f"{d[:4]}-{d[4:6]}-{d[6:]}"
565
+ return d
566
+
567
+ for idx, ofx_file in enumerate(ofx_list):
568
+ try:
569
+ p = Path(str(ofx_file))
570
+ raw = p.read_bytes()
571
+ try:
572
+ content = raw.decode("utf-8")
573
+ except UnicodeDecodeError:
574
+ content = raw.decode("utf-8", errors="replace")
575
+
576
+ blocks = re.findall(
577
+ r"<STMTTRN>(.*?)</STMTTRN>",
578
+ content,
579
+ flags=re.IGNORECASE | re.DOTALL,
580
+ )
581
+
582
+ def _get_tag(block: str, tag: str) -> str:
583
+ m = re.search(rf"<{tag}>([^<]*)", block, flags=re.IGNORECASE)
584
+ return (m.group(1) if m else "").strip()
585
+
586
+ lines: list[str] = []
587
+ for b in blocks:
588
+ dt = _get_tag(b, "DTPOSTED") or _get_tag(b, "DTTRAN")
589
+ name = _get_tag(b, "NAME") or _get_tag(b, "PAYEE")
590
+ memo = _get_tag(b, "MEMO") or _get_tag(b, "TRNTYPE")
591
+ amt = _get_tag(b, "TRNAMT") or _get_tag(b, "AMOUNT")
592
+
593
+ if not any([dt, name, memo, amt]):
594
+ continue
595
+
596
+ dt = _format_ofx_date(dt)
597
+ desc_parts = [p for p in [name, memo] if p]
598
+ desc = " - ".join(desc_parts) if desc_parts else "Transaction"
599
+ lines.append(f"{dt}, {desc}, {amt}".strip(", "))
600
+
601
+ if lines:
602
+ statement_texts.append("Date, Description, Amount\n" + "\n".join(lines))
603
+ else:
604
+ # Fall back to returning the raw content (truncated).
605
+ statement_texts.append(content.strip()[:20000])
606
+ except Exception as e:
607
+ errors.append(f"OFX/QFX #{idx+1} parsing error: {e}")
608
+
609
+ # Paste text (may contain multiple statements)
610
+ pasted_parts = _split_statements(paste_text)
611
+ if pasted_parts:
612
+ statement_texts.extend(pasted_parts)
613
+
614
+ if not statement_texts:
615
+ errors.append(
616
+ "Please paste a bank statement or upload a PDF / CSV / TXT / XLSX / OFX/QFX file(s)."
617
+ )
618
+
619
+ return statement_texts, errors
620
+
621
+
622
+ def analyse_bank_statement(
623
+ paste_text: str,
624
+ pdf_file,
625
+ pdf_password: str | None,
626
+ csv_file,
627
+ txt_file,
628
+ xlsx_file,
629
+ ofx_file,
630
+ ) -> tuple[str, str, str]:
631
+ statement_texts, errors = _get_statement_texts(
632
+ paste_text,
633
+ pdf_file,
634
+ pdf_password,
635
+ csv_file,
636
+ txt_file,
637
+ xlsx_file,
638
+ ofx_file,
639
+ )
640
+ if not statement_texts:
641
+ return f"**Error:** {errors[0] if errors else 'No bank statement provided.'}", "", ""
642
+
643
+ MAX_STATEMENTS = 6
644
+ if len(statement_texts) > MAX_STATEMENTS:
645
+ errors.append(f"Too many statements provided; only the first {MAX_STATEMENTS} were used.")
646
+ statement_texts = statement_texts[:MAX_STATEMENTS]
647
+
648
+ combined_text = "\n\n".join(
649
+ f"===== Statement {i+1}/{len(statement_texts)} =====\n\n{st.strip()}"
650
+ for i, st in enumerate(statement_texts)
651
+ if st.strip()
652
+ ).strip()
653
+
654
+ if not MLX_SERVER_URL:
655
+ return (
656
+ f"**Inference client not initialised.** Error: {model_load_error}",
657
+ combined_text,
658
+ "",
659
+ )
660
+
661
+ summaries: list[BankStatementSummary] = []
662
+ for idx, statement_text in enumerate(statement_texts):
663
+ statement_text = _truncate(statement_text)
664
+ messages = _build_bank_messages(statement_text, "SUMMARISE")
665
+
666
+ summary: BankStatementSummary | None = None
667
+ for attempt in range(2):
668
+ msgs = _apply_messages(messages, strict=(attempt == 1))
669
+ try:
670
+ raw = _run_inference(msgs)
671
+ summary = _parse_summary(raw)
672
+ break
673
+ except Exception as e:
674
+ if attempt == 0:
675
+ print(f" Summary parse attempt 1 failed (statement {idx+1}, {e}). Retrying...")
676
+ else:
677
+ print(f" Summary parse attempt 2 failed (statement {idx+1}, {e}). Returning error.")
678
+
679
+ if summary is None:
680
+ summary = BankStatementSummary(
681
+ raw_reasoning=f"Could not parse model output for statement {idx+1}."
682
+ )
683
+ summaries.append(summary)
684
+
685
+ # Render markdown
686
+ lines: list[str] = []
687
+ lines.append("## Statements Summary")
688
+ lines.append("")
689
+ if errors:
690
+ lines.append("**Notes:**")
691
+ for e in errors:
692
+ lines.append(f"- {e}")
693
+ lines.append("")
694
+
695
+ for idx, summary in enumerate(summaries):
696
+ lines.append(f"### Statement {idx+1}")
697
+ lines.append(f"**Total Credits:** {summary.total_credits or 'N/A'}")
698
+ lines.append(f"**Total Debits:** {summary.total_debits or 'N/A'}")
699
+ lines.append(
700
+ f"**Largest Transaction:** {summary.largest_transaction or 'N/A'}"
701
+ )
702
+ if summary.recurring_payments:
703
+ lines.append("\n**Recurring Payments:**")
704
+ for p in summary.recurring_payments:
705
+ lines.append(f"- {p}")
706
+ if summary.flags:
707
+ lines.append("\n**Flags / Unusual Activity:**")
708
+ for f in summary.flags:
709
+ lines.append(f"- {f}")
710
+ lines.append(f"\n*{summary.raw_reasoning}*")
711
+ lines.append("")
712
+
713
+ # Overall union (useful across multiple statements)
714
+ overall_recurring: list[str] = []
715
+ overall_flags: list[str] = []
716
+ for s in summaries:
717
+ for r in (s.recurring_payments or []):
718
+ if r not in overall_recurring:
719
+ overall_recurring.append(r)
720
+ for f in (s.flags or []):
721
+ if f not in overall_flags:
722
+ overall_flags.append(f)
723
+
724
+ lines.append("## Overall (union across statements)")
725
+ if overall_recurring:
726
+ lines.append("\n**Recurring Payments (union):**")
727
+ for p in overall_recurring:
728
+ lines.append(f"- {p}")
729
+ else:
730
+ lines.append("\n**Recurring Payments (union):** N/A")
731
+
732
+ if overall_flags:
733
+ lines.append("\n**Flags / Unusual Activity (union):**")
734
+ for f in overall_flags:
735
+ lines.append(f"- {f}")
736
+ else:
737
+ lines.append("\n**Flags / Unusual Activity (union):** N/A")
738
+
739
+ summary_json = json.dumps([s.model_dump() for s in summaries], ensure_ascii=False)
740
+ return "\n".join(lines).strip(), combined_text, summary_json
741
+
742
+
743
+ def _safe_json_loads(s: str) -> object:
744
+ try:
745
+ obj = json.loads(s or "")
746
+ if isinstance(obj, (dict, list)):
747
+ return obj
748
+ return {}
749
+ except Exception:
750
+ return {}
751
+
752
+
753
+ def _escape_pdf_text(s: str) -> str:
754
+ # PDF literal strings escape backslash and parentheses.
755
+ return (s or "").replace("\\", "\\\\").replace("(", "\\(").replace(")", "\\)")
756
+
757
+
758
+ def _simple_pdf_bytes(title: str, lines: list[str]) -> bytes:
759
+ """
760
+ Tiny, dependency-free, single-page PDF generator for short text reports.
761
+ """
762
+ font = "Helvetica"
763
+ font_size = 11
764
+ left = 54
765
+ top = 790
766
+ leading = 14
767
+
768
+ safe_title = _escape_pdf_text(title)
769
+ safe_lines = [_escape_pdf_text(ln) for ln in lines]
770
+
771
+ content_lines: list[str] = []
772
+ content_lines.append("BT")
773
+ content_lines.append(f"/F1 {font_size} Tf")
774
+ content_lines.append(f"{left} {top} Td")
775
+ content_lines.append(f"({_escape_pdf_text(safe_title)}) Tj")
776
+ content_lines.append(f"0 -{leading*2} Td")
777
+ for ln in safe_lines:
778
+ content_lines.append(f"({ln}) Tj")
779
+ content_lines.append(f"0 -{leading} Td")
780
+ content_lines.append("ET")
781
+ stream = "\n".join(content_lines).encode("latin-1", errors="replace")
782
+
783
+ objects: list[bytes] = []
784
+ objects.append(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
785
+ objects.append(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
786
+ objects.append(
787
+ b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] "
788
+ b"/Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>\nendobj\n"
789
+ )
790
+ objects.append(f"4 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /{font} >>\nendobj\n".encode())
791
+ objects.append(
792
+ b"5 0 obj\n<< /Length " + str(len(stream)).encode() + b" >>\nstream\n" + stream + b"\nendstream\nendobj\n"
793
+ )
794
+
795
+ out = io.BytesIO()
796
+ out.write(b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n")
797
+ xref: list[int] = [0]
798
+ for obj in objects:
799
+ xref.append(out.tell())
800
+ out.write(obj)
801
+ xref_start = out.tell()
802
+ out.write(f"xref\n0 {len(xref)}\n".encode())
803
+ out.write(b"0000000000 65535 f \n")
804
+ for off in xref[1:]:
805
+ out.write(f"{off:010d} 00000 n \n".encode())
806
+ out.write(
807
+ b"trailer\n<< /Size "
808
+ + str(len(xref)).encode()
809
+ + b" /Root 1 0 R >>\nstartxref\n"
810
+ + str(xref_start).encode()
811
+ + b"\n%%EOF\n"
812
+ )
813
+ return out.getvalue()
814
+
815
+
816
+ def export_bank_summary_csv(summary_json: str) -> tuple[str | None, str]:
817
+ data = _safe_json_loads(summary_json)
818
+ if not data:
819
+ return None, "**Export error:** Run 'Analyse statement' first."
820
+
821
+ statements = data if isinstance(data, list) else [data]
822
+
823
+ filename = f"bank-statement-summaries_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
824
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="chex_", mode="w", newline="", encoding="utf-8")
825
+ try:
826
+ writer = csv.writer(tmp)
827
+ writer.writerow([
828
+ "statement_index",
829
+ "total_credits",
830
+ "total_debits",
831
+ "largest_transaction",
832
+ "recurring_payments",
833
+ "flags",
834
+ "raw_reasoning",
835
+ ])
836
+
837
+ overall_recurring: list[str] = []
838
+ overall_flags: list[str] = []
839
+ for s in statements:
840
+ if not isinstance(s, dict):
841
+ continue
842
+ for r in (s.get("recurring_payments") or []):
843
+ if r not in overall_recurring:
844
+ overall_recurring.append(r)
845
+ for f in (s.get("flags") or []):
846
+ if f not in overall_flags:
847
+ overall_flags.append(f)
848
+
849
+ for i, s in enumerate(statements, start=1):
850
+ if not isinstance(s, dict):
851
+ continue
852
+ writer.writerow([
853
+ i,
854
+ s.get("total_credits") or "",
855
+ s.get("total_debits") or "",
856
+ s.get("largest_transaction") or "",
857
+ " | ".join(s.get("recurring_payments") or []),
858
+ " | ".join(s.get("flags") or []),
859
+ s.get("raw_reasoning") or "",
860
+ ])
861
+
862
+ # Overall union row
863
+ writer.writerow([
864
+ "overall",
865
+ "",
866
+ "",
867
+ "",
868
+ " | ".join(overall_recurring),
869
+ " | ".join(overall_flags),
870
+ "",
871
+ ])
872
+ finally:
873
+ tmp.close()
874
+
875
+ # Gradio uses the path; name displayed is fine.
876
+ return tmp.name, f"**CSV ready:** `{filename}`"
877
+
878
+
879
+ def export_bank_summary_pdf(summary_json: str) -> tuple[str | None, str]:
880
+ data = _safe_json_loads(summary_json)
881
+ if not data:
882
+ return None, "**Export error:** Run 'Analyse statement' first."
883
+
884
+ statements = data if isinstance(data, list) else [data]
885
+
886
+ title = "CHEX β€” Bank Statement Summary (Multiple)"
887
+ lines: list[str] = [
888
+ f"Generated: {_dt.datetime.now().isoformat(timespec='seconds')}",
889
+ "",
890
+ f"Statements analysed: {len(statements)}",
891
+ "",
892
+ ]
893
+
894
+ overall_recurring: list[str] = []
895
+ overall_flags: list[str] = []
896
+ for s in statements:
897
+ if not isinstance(s, dict):
898
+ continue
899
+ for r in (s.get("recurring_payments") or []):
900
+ if r not in overall_recurring:
901
+ overall_recurring.append(r)
902
+ for f in (s.get("flags") or []):
903
+ if f not in overall_flags:
904
+ overall_flags.append(f)
905
+
906
+ lines += [
907
+ "Overall Recurring Payments:",
908
+ *([f"- {x}" for x in overall_recurring] if overall_recurring else ["- (none)"]),
909
+ "",
910
+ "Overall Flags / Unusual Activity:",
911
+ *([f"- {x}" for x in overall_flags] if overall_flags else ["- (none)"]),
912
+ "",
913
+ ]
914
+
915
+ for i, s in enumerate(statements, start=1):
916
+ if not isinstance(s, dict):
917
+ continue
918
+ lines += [
919
+ f"Statement {i}:",
920
+ f"- Total Credits: {s.get('total_credits') or 'N/A'}",
921
+ f"- Total Debits: {s.get('total_debits') or 'N/A'}",
922
+ f"- Largest Transaction: {s.get('largest_transaction') or 'N/A'}",
923
+ ]
924
+ rr = (s.get("raw_reasoning") or "").strip()
925
+ if rr:
926
+ lines += ["- Model reasoning: " + rr]
927
+ lines.append("")
928
+
929
+ pdf_bytes = _simple_pdf_bytes(title, lines)
930
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf", prefix="chex_", mode="wb")
931
+ try:
932
+ tmp.write(pdf_bytes)
933
+ finally:
934
+ tmp.close()
935
+
936
+ filename = f"bank-statement-summaries_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
937
+ return tmp.name, f"**PDF ready:** `{filename}`"
938
+
939
+
940
+ def bank_qa(statement_text: str, question: str) -> tuple[str, str, str, str]:
941
+ if not statement_text.strip():
942
+ return (
943
+ format_label_html("N/A"), "", "",
944
+ "Please run 'Analyse Statement' first to load the statement.",
945
+ )
946
+ if not question.strip():
947
+ return format_label_html("N/A"), "", "", "Please enter a question."
948
+ if not MLX_SERVER_URL:
949
+ return (
950
+ format_label_html("N/A"), "Inference client not initialised", "",
951
+ f"Error: {model_load_error}.",
952
+ )
953
+
954
+ statement_text = _truncate(statement_text)
955
+ messages = _build_bank_messages(statement_text, question)
956
+
957
+ for attempt in range(2):
958
+ msgs = _apply_messages(messages, strict=(attempt == 1))
959
+ try:
960
+ raw = _run_inference(msgs)
961
+ result = _parse_model_output(raw, question)
962
+ label_html = format_label_html(result.label.value)
963
+ answer = result.answer or "(none β€” information not found in statement)"
964
+ citation = result.citation or "(none)"
965
+ return label_html, answer, citation, result.reasoning
966
+ except Exception as e:
967
+ if attempt == 0:
968
+ print(f" Q&A parse attempt 1 failed ({e}). Retrying...")
969
+ else:
970
+ print(f" Q&A parse attempt 2 failed ({e}). Returning fallback.")
971
+
972
+ return (
973
+ format_label_html("ABSENT"),
974
+ "(none β€” information not found in statement)",
975
+ "(none)",
976
+ "Model output could not be parsed after two attempts.",
977
+ )
978
+
979
+
980
+ # ---------------------------------------------------------------------------
981
+ # Benchmark table
982
+ # ---------------------------------------------------------------------------
983
+
984
+ import pandas as pd
985
+
986
+ BENCHMARK_ROWS = [
987
+ {
988
+ "Question": "What is the limitation of liability?",
989
+ "Ground Truth": "GROUNDED β€” $50,000 cap",
990
+ "Base Model (untuned)": "GROUNDED β€” $100,000 cap (wrong amount)",
991
+ "CHEX Fine-tuned": "GROUNDED β€” $50,000 cap βœ“",
992
+ "Hallucinated?": "No (wrong value)",
993
+ },
994
+ {
995
+ "Question": "Does this contract include a non-compete clause?",
996
+ "Ground Truth": "ABSENT",
997
+ "Base Model (untuned)": "🚨 GROUNDED β€” 'Licensee shall not engage in competing activities...' (fabricated)",
998
+ "CHEX Fine-tuned": "ABSENT β€” null βœ“",
999
+ "Hallucinated?": "YES",
1000
+ },
1001
+ {
1002
+ "Question": "What is the term of the NDA?",
1003
+ "Ground Truth": "GROUNDED β€” 3 years",
1004
+ "Base Model (untuned)": "GROUNDED β€” 2 years (wrong duration)",
1005
+ "CHEX Fine-tuned": "GROUNDED β€” three (3) years βœ“",
1006
+ "Hallucinated?": "No (wrong value)",
1007
+ },
1008
+ {
1009
+ "Question": "Is there a termination for convenience clause?",
1010
+ "Ground Truth": "ABSENT",
1011
+ "Base Model (untuned)": "🚨 GROUNDED β€” 'Either party may terminate at any time...' (fabricated)",
1012
+ "CHEX Fine-tuned": "ABSENT β€” null βœ“",
1013
+ "Hallucinated?": "YES",
1014
+ },
1015
+ {
1016
+ "Question": "What are the monthly payment terms?",
1017
+ "Ground Truth": "GROUNDED β€” $5,000/month",
1018
+ "Base Model (untuned)": "GROUNDED β€” $5,000/month βœ“",
1019
+ "CHEX Fine-tuned": "GROUNDED β€” $5,000/month βœ“",
1020
+ "Hallucinated?": "No",
1021
+ },
1022
+ ]
1023
+
1024
+ BENCHMARK_DF = pd.DataFrame(BENCHMARK_ROWS)
1025
+
1026
+ # ---------------------------------------------------------------------------
1027
+ # Warning banner
1028
+ # ---------------------------------------------------------------------------
1029
+
1030
+ WARNING_HTML = ""
1031
+ if model_load_error:
1032
+ WARNING_HTML = (
1033
+ '<div class="chex-banner">'
1034
+ '<span class="chex-banner-icon">⚠</span>'
1035
+ f'<div class="chex-banner-body"><strong>Model not loaded</strong> Β· '
1036
+ f'{model_load_error} β€” set <code>HF_MODEL_REPO</code> in Space secrets.</div>'
1037
+ '</div>'
1038
+ )
1039
+
1040
+ # ---------------------------------------------------------------------------
1041
+ # CSS
1042
+ # ---------------------------------------------------------------------------
1043
+
1044
+ CHEX_CSS = """
1045
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
1046
+
1047
+ *, *::before, *::after { box-sizing: border-box; }
1048
+
1049
+ :root {
1050
+ --bg-base: #0B0E14;
1051
+ --bg-grad: linear-gradient(180deg, #0B0E14 0%, #06080C 100%);
1052
+ --bg-elev: #131720;
1053
+ --bg-elev-strong: #191E2B;
1054
+ --bg-sunken: #0E121A;
1055
+ --bg-input: rgba(0,0,0,0.2);
1056
+ --border: rgba(255,255,255,0.06);
1057
+ --border-strong: rgba(255,255,255,0.12);
1058
+ --hairline: rgba(255,255,255,0.03);
1059
+ --fg: #E2E8F0;
1060
+ --fg-muted: #94A3B8;
1061
+ --fg-subtle: #475569;
1062
+ --green: #10B981;
1063
+ --green-bg: rgba(16,185,129,0.10);
1064
+ --green-border: rgba(16,185,129,0.25);
1065
+ --red: #F43F5E;
1066
+ --red-bg: rgba(244,63,94,0.10);
1067
+ --red-border: rgba(244,63,94,0.25);
1068
+ --amber: #F59E0B;
1069
+ --amber-bg: rgba(245,158,11,0.10);
1070
+ --amber-border: rgba(245,158,11,0.25);
1071
+ --blur: 24px;
1072
+ --blur-strong: 32px;
1073
+ --shadow-md: 0 1px 0 rgba(255,255,255,0.03) inset,
1074
+ 0 8px 24px rgba(0,0,0,0.4),
1075
+ 0 1px 2px rgba(0,0,0,0.2);
1076
+ --radius: 10px;
1077
+ --radius-lg: 14px;
1078
+ }
1079
+
1080
+ body {
1081
+ background: var(--bg-grad) !important;
1082
+ background-attachment: fixed !important;
1083
+ background-color: var(--bg-base) !important;
1084
+ min-height: 100vh;
1085
+ }
1086
+
1087
+ .gradio-container {
1088
+ font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
1089
+ font-size: 14px !important;
1090
+ line-height: 1.55 !important;
1091
+ color: var(--fg) !important;
1092
+ background: transparent !important;
1093
+ -webkit-font-smoothing: antialiased !important;
1094
+ -moz-osx-font-smoothing: grayscale !important;
1095
+ letter-spacing: -0.006em !important;
1096
+ max-width: 1480px !important;
1097
+ margin: 0 auto !important;
1098
+ padding: 0 !important;
1099
+ }
1100
+
1101
+ footer, .footer, .built-with, #footer,
1102
+ footer.svelte-1ax1toq, .svelte-1ax1toq.footer,
1103
+ .gradio-container > .footer,
1104
+ .share-button, .copy-all-button,
1105
+ .gradio-container > .top-panel { display: none !important; }
1106
+
1107
+ #root, .app, main {
1108
+ background: transparent !important;
1109
+ padding: 0 !important;
1110
+ margin: 0 !important;
1111
+ }
1112
+
1113
+ .contain, .container {
1114
+ padding: 0 !important;
1115
+ gap: 0 !important;
1116
+ max-width: 100% !important;
1117
+ background: transparent !important;
1118
+ }
1119
+
1120
+ .block, .gr-block, .gr-box, .gr-group, .gradio-container .block {
1121
+ background: transparent !important;
1122
+ border: none !important;
1123
+ box-shadow: none !important;
1124
+ padding: 0 !important;
1125
+ border-radius: 0 !important;
1126
+ }
1127
+
1128
+ .gap, .gr-row { gap: 20px !important; }
1129
+
1130
+ .panel, .gr-panel, .gr-padded {
1131
+ background: transparent !important;
1132
+ border: none !important;
1133
+ padding: 0 !important;
1134
+ box-shadow: none !important;
1135
+ }
1136
+
1137
+ .tabs, .gr-tabs { background: transparent !important; border: none !important; }
1138
+
1139
+ .tabitem, .gr-tabitem {
1140
+ background: transparent !important;
1141
+ border: none !important;
1142
+ padding: 24px !important;
1143
+ }
1144
+
1145
+ [data-testid="textbox"], .gr-textbox {
1146
+ background: transparent !important;
1147
+ border: none !important;
1148
+ box-shadow: none !important;
1149
+ padding: 0 !important;
1150
+ }
1151
+
1152
+ label.block, .label-wrap {
1153
+ background: transparent !important;
1154
+ border: none !important;
1155
+ padding: 0 !important;
1156
+ gap: 6px !important;
1157
+ display: flex !important;
1158
+ flex-direction: column !important;
1159
+ }
1160
+
1161
+ .row, .gr-row { background: transparent !important; border: none !important; padding: 0 !important; }
1162
+
1163
+ .form, .gr-form {
1164
+ background: transparent !important;
1165
+ border: none !important;
1166
+ box-shadow: none !important;
1167
+ padding: 0 !important;
1168
+ gap: 14px !important;
1169
+ }
1170
+
1171
+ /* ── Topbar ─────────────────────────────────────────────────────────────── */
1172
+ .chex-topbar {
1173
+ display: flex;
1174
+ align-items: center;
1175
+ gap: 16px;
1176
+ padding: 0 28px;
1177
+ height: 52px;
1178
+ position: sticky;
1179
+ top: 0;
1180
+ z-index: 100;
1181
+ background: rgba(11, 14, 20, 0.85);
1182
+ backdrop-filter: blur(var(--blur-strong)) saturate(160%);
1183
+ -webkit-backdrop-filter: blur(var(--blur-strong)) saturate(160%);
1184
+ border-bottom: 1px solid var(--hairline);
1185
+ }
1186
+
1187
+ .chex-logo {
1188
+ width: 22px; height: 22px; border-radius: 5px;
1189
+ background: #E2E8F0;
1190
+ color: #0B0E14; display: grid; place-items: center;
1191
+ font-family: 'JetBrains Mono', monospace; font-weight: 700; font-size: 10px;
1192
+ letter-spacing: -0.05em;
1193
+ box-shadow: 0 2px 10px rgba(0,0,0,0.5);
1194
+ flex-shrink: 0;
1195
+ }
1196
+
1197
+ .chex-name { font-size: 14px; font-weight: 600; letter-spacing: -0.01em; color: var(--fg); font-family: 'Inter', sans-serif; }
1198
+ .chex-tag { font-size: 12px; color: var(--fg-muted); font-weight: 400; padding-left: 12px; border-left: 1px solid rgba(255,255,255,0.08); font-family: 'Inter', sans-serif; }
1199
+
1200
+ .chex-pill {
1201
+ display: inline-flex; align-items: center; gap: 7px;
1202
+ padding: 4px 11px 4px 9px; border: 1px solid var(--border); border-radius: 999px;
1203
+ font-size: 11.5px; color: var(--fg-muted); background: var(--bg-elev);
1204
+ backdrop-filter: blur(12px); -webkit-backdrop-filter: blur(12px);
1205
+ font-family: 'JetBrains Mono', monospace; white-space: nowrap;
1206
+ }
1207
+
1208
+ .chex-dot {
1209
+ width: 6px; height: 6px; border-radius: 50%; background: var(--green);
1210
+ box-shadow: 0 0 0 3px rgba(16,185,129,0.22); display: inline-block; flex-shrink: 0;
1211
+ }
1212
+
1213
+ /* ── Warning banner ─────────────────────────────────────────────────────── */
1214
+ .chex-banner {
1215
+ display: flex; align-items: center; gap: 12px; padding: 11px 20px;
1216
+ border-bottom: 1px solid var(--amber-border); background: var(--amber-bg);
1217
+ backdrop-filter: blur(var(--blur)) saturate(160%); -webkit-backdrop-filter: blur(var(--blur)) saturate(160%);
1218
+ color: var(--amber); font-size: 13px; font-family: 'Inter', sans-serif; font-weight: 500;
1219
+ }
1220
+ .chex-banner-icon { font-size: 14px; flex-shrink: 0; }
1221
+ .chex-banner-body { color: var(--fg); font-weight: 400; line-height: 1.5; }
1222
+ .chex-banner-body strong { color: var(--fg); font-weight: 600; }
1223
+ .chex-banner code { font-family: 'JetBrains Mono', monospace; font-size: 12px; background: rgba(0,0,0,0.06); padding: 1px 5px; border-radius: 4px; }
1224
+
1225
+ /* ── Tab nav ────────────────────────────────────────────────────────────── */
1226
+ .tab-nav {
1227
+ background: rgba(11,14,20,0.85) !important;
1228
+ backdrop-filter: blur(var(--blur)) saturate(160%) !important;
1229
+ -webkit-backdrop-filter: blur(var(--blur)) saturate(160%) !important;
1230
+ border-bottom: 1px solid var(--hairline) !important;
1231
+ border-top: none !important; padding: 0 20px !important; gap: 0 !important;
1232
+ position: sticky !important; top: 52px !important; z-index: 99 !important; overflow: visible !important;
1233
+ }
1234
+
1235
+ .tab-nav button {
1236
+ background: transparent !important; border: none !important; border-radius: 0 !important;
1237
+ padding: 13px 16px !important; color: var(--fg-muted) !important;
1238
+ font-size: 13px !important; font-weight: 500 !important; font-family: 'Inter', sans-serif !important;
1239
+ letter-spacing: -0.003em !important; position: relative !important; white-space: nowrap !important;
1240
+ transition: color 0.15s ease !important; cursor: pointer !important; box-shadow: none !important; outline: none !important;
1241
+ }
1242
+
1243
+ .tab-nav button:hover { color: var(--fg) !important; background: transparent !important; }
1244
+
1245
+ .tab-nav button.selected, .tab-nav button[aria-selected="true"] {
1246
+ color: var(--fg) !important; background: transparent !important; font-weight: 500 !important; box-shadow: none !important;
1247
+ }
1248
+
1249
+ .tab-nav button.selected::after, .tab-nav button[aria-selected="true"]::after {
1250
+ content: ""; position: absolute; left: 12px; right: 12px; bottom: -1px;
1251
+ height: 1.5px; background: var(--fg); border-radius: 2px 2px 0 0;
1252
+ }
1253
+
1254
+ .tabitem { border: none !important; background: transparent !important; padding: 20px 20px !important; }
1255
+
1256
+ /* ── Cards (gr-group) ───────────────────────────────────────────────────── */
1257
+ .gradio-container .gr-group {
1258
+ background: var(--bg-elev) !important;
1259
+ backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1260
+ -webkit-backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1261
+ border: 1px solid var(--border) !important;
1262
+ border-radius: var(--radius-lg) !important;
1263
+ box-shadow: var(--shadow-md) !important;
1264
+ overflow: hidden !important; padding: 0 !important;
1265
+ }
1266
+
1267
+ .gradio-container .gr-group > *:not(.chex-card-header):not(.chex-load-bar) {
1268
+ padding-left: 20px !important; padding-right: 20px !important;
1269
+ }
1270
+ .gradio-container .gr-group > *:last-child { padding-bottom: 18px !important; }
1271
+
1272
+ /* ── Card header (new design) ───────────────────────────────────────────── */
1273
+ .chex-card-header {
1274
+ padding: 14px 20px; display: flex; align-items: center;
1275
+ gap: 10px; border-bottom: 1px solid var(--hairline);
1276
+ }
1277
+
1278
+ .chex-card-icon {
1279
+ opacity: 0.45; flex-shrink: 0; display: flex; align-items: center;
1280
+ }
1281
+
1282
+ .chex-card-title {
1283
+ font-size: 13px; font-weight: 600; letter-spacing: -0.01em;
1284
+ color: var(--fg); white-space: nowrap; font-family: 'Inter', sans-serif;
1285
+ display: inline-flex; align-items: center; gap: 0;
1286
+ }
1287
+
1288
+ .chex-card-sep {
1289
+ font-size: 13px; color: var(--fg-subtle); font-weight: 400;
1290
+ padding: 0 6px; font-family: 'Inter', sans-serif;
1291
+ }
1292
+
1293
+ .chex-card-sub {
1294
+ font-size: 12px; color: var(--fg-subtle); font-weight: 400;
1295
+ font-family: 'Inter', sans-serif; letter-spacing: 0;
1296
+ }
1297
+
1298
+ /* ── Load bar (LOAD chip + sample buttons row) ─────────────────────────── */
1299
+ .chex-load-bar {
1300
+ display: flex; align-items: center; gap: 10px; padding: 10px 16px;
1301
+ border-bottom: 1px solid var(--hairline); background: var(--bg-sunken); flex-wrap: wrap;
1302
+ }
1303
+
1304
+ .chex-load-chip {
1305
+ font-family: 'JetBrains Mono', monospace; font-size: 10px;
1306
+ text-transform: uppercase; letter-spacing: 0.1em;
1307
+ color: var(--fg-subtle); background: var(--bg-elev-strong);
1308
+ border: 1px solid var(--border); border-radius: 4px;
1309
+ padding: 3px 7px; white-space: nowrap; flex-shrink: 0;
1310
+ }
1311
+
1312
+ /* Question label chip */
1313
+ .chex-question-label {
1314
+ font-family: 'JetBrains Mono', monospace; font-size: 10px;
1315
+ text-transform: uppercase; letter-spacing: 0.1em;
1316
+ color: var(--fg-subtle); padding: 14px 20px 6px; display: block;
1317
+ }
1318
+
1319
+ /* ── Suggested hint ─────────────────────────────────────────────────────── */
1320
+ .chex-suggested {
1321
+ display: flex; align-items: center; gap: 10px; padding: 10px 14px;
1322
+ background: rgba(13,18,32,0.04); border: 1px solid var(--border); border-radius: var(--radius);
1323
+ font-size: 12.5px; color: var(--fg-muted); font-family: 'Inter', sans-serif; line-height: 1.4; margin-top: 2px;
1324
+ }
1325
+ .chex-suggested-icon { font-size: 13px; flex-shrink: 0; opacity: 0.7; }
1326
+
1327
+ /* ── Labels ─────────────────────────────────────────────────────────────── */
1328
+ label > span:first-child, .label-wrap span,
1329
+ .gradio-container label span.text-gray-500, span.svelte-1b6s6s {
1330
+ font-family: 'JetBrains Mono', monospace !important; font-size: 10px !important;
1331
+ font-weight: 500 !important; text-transform: uppercase !important; letter-spacing: 0.08em !important;
1332
+ color: var(--fg-subtle) !important; margin-bottom: 6px !important; display: block !important;
1333
+ }
1334
+
1335
+ /* ── Text inputs ────────────────────────────────────────────────────────── */
1336
+ textarea, input[type="text"], input[type="search"],
1337
+ .gradio-container .gr-input, .gradio-container .gr-textarea,
1338
+ .gradio-container [data-testid="textbox"] textarea,
1339
+ .gradio-container [data-testid="textbox"] input {
1340
+ background: var(--bg-input) !important; backdrop-filter: blur(10px) !important;
1341
+ -webkit-backdrop-filter: blur(10px) !important; border: 1px solid var(--border) !important;
1342
+ border-radius: var(--radius) !important; color: var(--fg) !important;
1343
+ font-family: 'Inter', sans-serif !important; font-size: 13px !important;
1344
+ line-height: 1.6 !important; padding: 11px 14px !important;
1345
+ transition: border-color 0.18s ease, box-shadow 0.18s ease, background 0.18s ease !important;
1346
+ resize: vertical !important;
1347
+ }
1348
+
1349
+ textarea:focus, input[type="text"]:focus,
1350
+ .gradio-container [data-testid="textbox"] textarea:focus,
1351
+ .gradio-container [data-testid="textbox"] input:focus {
1352
+ border-color: var(--border-strong) !important; background: var(--bg-elev) !important;
1353
+ box-shadow: 0 0 0 2px rgba(255,255,255,0.05) !important; outline: none !important;
1354
+ }
1355
+
1356
+ textarea::placeholder, input::placeholder { color: var(--fg-subtle) !important; }
1357
+
1358
+ textarea[readonly],
1359
+ .gradio-container [data-testid="textbox"][data-interactive="false"] textarea {
1360
+ background: var(--bg-sunken) !important; border: 1px solid var(--hairline) !important;
1361
+ color: var(--fg) !important; cursor: default !important;
1362
+ }
1363
+
1364
+ /* ── Buttons ────────────────────────────────────────────────────────────── */
1365
+ .gradio-container button {
1366
+ font-family: 'Inter', sans-serif !important; font-size: 13px !important;
1367
+ font-weight: 500 !important; border-radius: var(--radius) !important;
1368
+ padding: 9px 16px !important;
1369
+ transition: opacity 0.15s ease, background 0.15s ease, box-shadow 0.15s ease !important;
1370
+ cursor: pointer !important; letter-spacing: -0.003em !important;
1371
+ }
1372
+
1373
+ .gradio-container button.primary, button.primary {
1374
+ background: var(--fg) !important; color: var(--bg-base) !important; border: 1px solid var(--fg) !important;
1375
+ box-shadow: 0 4px 14px rgba(0,0,0,0.35), 0 1px 0 rgba(255,255,255,0.08) inset !important;
1376
+ }
1377
+ .gradio-container button.primary:hover, button.primary:hover { opacity: 0.9 !important; box-shadow: 0 4px 12px rgba(0,0,0,0.3) !important; }
1378
+
1379
+ .gradio-container button.secondary, button.secondary {
1380
+ background: transparent !important; color: var(--fg-muted) !important;
1381
+ border: 1px solid var(--border-strong) !important; box-shadow: none !important;
1382
+ }
1383
+ .gradio-container button.secondary:hover, button.secondary:hover { background: var(--bg-elev) !important; color: var(--fg) !important; border-color: var(--border-strong) !important; }
1384
+
1385
+ button.sm, .gradio-container button[size="sm"], button.small { font-size: 12px !important; padding: 6px 12px !important; }
1386
+
1387
+ /* ── File upload ────────────────────────────────────────────────────────── */
1388
+ .gradio-container .upload-container, .gradio-container [data-testid="file"] {
1389
+ background: var(--bg-input) !important; border: 1px dashed var(--border-strong) !important; border-radius: var(--radius) !important;
1390
+ }
1391
+
1392
+ /* ── Dataframe / table ──────────────────────────────────────────────────── */
1393
+ .gradio-container .wrap.svelte-a4gbbr, .gradio-container .table-wrap,
1394
+ .gradio-container [data-testid="dataframe"] {
1395
+ background: var(--bg-elev) !important;
1396
+ backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1397
+ -webkit-backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1398
+ border: 1px solid var(--border) !important; border-radius: var(--radius-lg) !important;
1399
+ box-shadow: var(--shadow-md) !important; overflow: hidden !important;
1400
+ }
1401
+
1402
+ .gradio-container table {
1403
+ background: transparent !important; font-size: 13px !important;
1404
+ font-family: 'Inter', sans-serif !important; border-collapse: separate !important;
1405
+ border-spacing: 0 !important; width: 100% !important; border: none !important;
1406
+ box-shadow: none !important; border-radius: 0 !important;
1407
+ }
1408
+
1409
+ .gradio-container th {
1410
+ background: var(--bg-sunken) !important; border-bottom: 1px solid var(--hairline) !important;
1411
+ border-top: none !important; padding: 14px 18px !important;
1412
+ font-family: 'JetBrains Mono', monospace !important; font-size: 10px !important;
1413
+ text-transform: uppercase !important; letter-spacing: 0.08em !important;
1414
+ color: var(--fg-muted) !important; font-weight: 500 !important; text-align: left !important;
1415
+ }
1416
+
1417
+ .gradio-container td {
1418
+ padding: 16px 18px !important; border-top: 1px solid var(--hairline) !important;
1419
+ border-bottom: none !important; vertical-align: top !important; line-height: 1.6 !important;
1420
+ color: var(--fg) !important; background: transparent !important;
1421
+ }
1422
+
1423
+ .gradio-container tr:first-child td { border-top: none !important; }
1424
+
1425
+ /* ── Markdown prose ─────────────────────────────────────────────────────── */
1426
+ .gradio-container .prose, .gradio-container .md, .gradio-container [data-testid="markdown"] {
1427
+ color: var(--fg) !important; font-family: 'Inter', sans-serif !important;
1428
+ font-size: 13px !important; line-height: 1.65 !important;
1429
+ }
1430
+
1431
+ .gradio-container .prose h2, .gradio-container .md h2 {
1432
+ font-size: 18px !important; font-weight: 600 !important; letter-spacing: -0.02em !important;
1433
+ color: var(--fg) !important; margin-bottom: 10px !important; margin-top: 0 !important;
1434
+ }
1435
+
1436
+ .gradio-container .prose p, .gradio-container .md p {
1437
+ color: var(--fg-muted) !important; font-size: 13px !important; line-height: 1.65 !important; margin-bottom: 8px !important;
1438
+ }
1439
+
1440
+ .gradio-container .prose strong, .gradio-container .md strong { color: var(--fg) !important; font-weight: 600 !important; }
1441
+
1442
+ .gradio-container .prose code, .gradio-container .md code {
1443
+ font-family: 'JetBrains Mono', monospace !important; font-size: 12px !important;
1444
+ background: rgba(13,18,32,0.06) !important; padding: 1px 5px !important;
1445
+ border-radius: 4px !important; color: var(--fg) !important;
1446
+ }
1447
+
1448
+ /* ── Benchmark intro card ───────────────────────────────────────────────── */
1449
+ .chex-bench-wrap {
1450
+ background: var(--bg-elev);
1451
+ backdrop-filter: blur(var(--blur)) saturate(180%);
1452
+ -webkit-backdrop-filter: blur(var(--blur)) saturate(180%);
1453
+ border: 1px solid var(--border); border-radius: var(--radius-lg);
1454
+ box-shadow: var(--shadow-md); margin-bottom: 20px; overflow: hidden;
1455
+ }
1456
+
1457
+ .chex-bench-top {
1458
+ padding: 24px 28px 20px; display: flex; align-items: flex-start; gap: 28px;
1459
+ }
1460
+
1461
+ .chex-bench-copy { flex: 1; min-width: 0; }
1462
+ .chex-bench-copy h2 { margin: 0 0 8px; font-size: 18px; font-weight: 600; letter-spacing: -0.02em; color: var(--fg); font-family: 'Inter', sans-serif; }
1463
+ .chex-bench-copy p { margin: 0; color: var(--fg-muted); font-size: 13px; line-height: 1.65; font-family: 'Inter', sans-serif; }
1464
+
1465
+ .chex-bench-stats {
1466
+ display: flex; gap: 0; flex-shrink: 0;
1467
+ border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden;
1468
+ }
1469
+ .chex-bench-stat {
1470
+ padding: 14px 20px; display: flex; flex-direction: column; gap: 3px;
1471
+ border-right: 1px solid var(--border);
1472
+ }
1473
+ .chex-bench-stat:last-child { border-right: none; }
1474
+ .chex-bench-stat .v { font-family: 'Inter', sans-serif; font-size: 22px; font-weight: 600; letter-spacing: -0.03em; line-height: 1.1; }
1475
+ .chex-bench-stat .v.red { color: var(--red); }
1476
+ .chex-bench-stat .v.green { color: var(--green); }
1477
+ .chex-bench-stat .v.white { color: var(--fg); }
1478
+ .chex-bench-stat .k { font-size: 10px; text-transform: uppercase; letter-spacing: 0.08em; color: var(--fg-subtle); font-family: 'JetBrains Mono', monospace; }
1479
+
1480
+ /* ── Benchmark HTML table ───────────────────────────────────────────────── */
1481
+ .chex-bench-table-wrap {
1482
+ background: var(--bg-elev);
1483
+ backdrop-filter: blur(var(--blur)) saturate(180%);
1484
+ -webkit-backdrop-filter: blur(var(--blur)) saturate(180%);
1485
+ border: 1px solid var(--border); border-radius: var(--radius-lg);
1486
+ box-shadow: var(--shadow-md); overflow: hidden;
1487
+ }
1488
+
1489
+ .chex-bench-table {
1490
+ width: 100%; border-collapse: separate; border-spacing: 0;
1491
+ font-family: 'Inter', sans-serif; font-size: 13px;
1492
+ }
1493
+
1494
+ .chex-bench-table th {
1495
+ background: var(--bg-sunken) !important; border-bottom: 1px solid var(--hairline) !important;
1496
+ padding: 13px 18px !important; font-family: 'JetBrains Mono', monospace !important;
1497
+ font-size: 10px !important; text-transform: uppercase !important; letter-spacing: 0.08em !important;
1498
+ color: var(--fg-muted) !important; font-weight: 500 !important; text-align: left !important;
1499
+ white-space: nowrap;
1500
+ }
1501
+
1502
+ .chex-bench-table td {
1503
+ padding: 16px 18px; border-top: 1px solid var(--hairline);
1504
+ vertical-align: top; line-height: 1.55; color: var(--fg);
1505
+ }
1506
+
1507
+ .chex-bench-table tr:first-child td { border-top: none; }
1508
+
1509
+ .chex-bench-table .col-question { color: var(--fg); font-weight: 400; }
1510
+ .chex-bench-table .col-truth { color: var(--fg-muted); font-style: italic; }
1511
+ .chex-bench-table .col-base { color: #F87171; }
1512
+ .chex-bench-table .col-halluc { text-align: center; }
1513
+
1514
+ /* Inline label badges for the benchmark table */
1515
+ .cbadge {
1516
+ display: inline-flex; align-items: center; gap: 6px;
1517
+ padding: 5px 10px; border-radius: 8px; font-size: 11px;
1518
+ font-weight: 600; letter-spacing: 0.02em; font-family: 'Inter', sans-serif;
1519
+ white-space: nowrap;
1520
+ }
1521
+ .cbadge-green { background: rgba(16,185,129,0.12); color: #10B981; border: 1px solid rgba(16,185,129,0.28); }
1522
+ .cbadge-red { background: rgba(244,63,94,0.10); color: #F43F5E; border: 1px solid rgba(244,63,94,0.28); }
1523
+ .cbadge-amber { background: rgba(245,158,11,0.12); color: #F59E0B; border: 1px solid rgba(245,158,11,0.30); }
1524
+ .cbadge-icon { font-size: 12px; }
1525
+
1526
+ .chex-halluc-yes { color: var(--red); font-size: 15px; }
1527
+ .chex-halluc-no { color: var(--green); font-size: 15px; }
1528
+
1529
+ /* ── Footer ─────────────────────────────────────────────────────────────── */
1530
+ .chex-footer {
1531
+ border-top: 1px solid var(--hairline); padding: 13px 28px;
1532
+ display: flex; align-items: center; gap: 18px; color: var(--fg-subtle);
1533
+ font-size: 11px; font-family: 'JetBrains Mono', monospace;
1534
+ background: var(--bg-elev); backdrop-filter: blur(var(--blur));
1535
+ -webkit-backdrop-filter: blur(var(--blur)); margin-top: 24px;
1536
+ justify-content: space-between;
1537
+ }
1538
+ .chex-footer-left { display: flex; align-items: center; gap: 18px; }
1539
+ .chex-footer .sep { opacity: 0.35; }
1540
+ .chex-footer-right { font-size: 11px; color: var(--fg-subtle); opacity: 0.6; }
1541
+
1542
+ /* ── Misc helpers ───────────────────────────────────────────────────────── */
1543
+ .chex-label-wrap { padding: 4px 0 8px; }
1544
+ .chex-divider { height: 1px; background: var(--hairline); margin: 18px 0; }
1545
+ .chex-section-kicker { font-family: 'JetBrains Mono', monospace; font-size: 10px; text-transform: uppercase; letter-spacing: 0.08em; color: var(--fg-subtle); margin-bottom: 10px; display: block; }
1546
+ .chex-card-body { padding: 18px 20px; display: flex; flex-direction: column; gap: 14px; }
1547
+
1548
+ /* ── Empty state ────────────────────────────────────────────────────────── */
1549
+ .chex-empty {
1550
+ display: flex; flex-direction: column; align-items: center; justify-content: center;
1551
+ gap: 10px; padding: 48px 24px; color: var(--fg-subtle); text-align: center;
1552
+ }
1553
+ .chex-empty-icon { font-size: 20px; opacity: 0.4; }
1554
+ .chex-empty-title { font-size: 13.5px; font-weight: 500; color: var(--fg-muted); font-family: 'Inter', sans-serif; }
1555
+ .chex-empty-body { font-size: 12.5px; color: var(--fg-subtle); line-height: 1.6; font-family: 'Inter', sans-serif; max-width: 280px; }
1556
+
1557
+ *::-webkit-scrollbar { width: 8px; height: 8px; }
1558
+ *::-webkit-scrollbar-thumb { background: var(--border-strong); border-radius: 999px; border: 2px solid transparent; background-clip: padding-box; }
1559
+ *::-webkit-scrollbar-track { background: transparent; }
1560
+
1561
+ .gradio-container .gap-4 { gap: 14px !important; }
1562
+ .gradio-container .gap-2 { gap: 8px !important; }
1563
+
1564
+ .tabitem .tab-nav { position: static !important; top: auto !important; }
1565
+
1566
+ @media (max-width: 900px) {
1567
+ .chex-topbar { padding: 0 16px; }
1568
+ .chex-tag { display: none; }
1569
+ .tabitem { padding: 16px !important; }
1570
+ .chex-bench-top { flex-direction: column; gap: 16px; }
1571
+ .chex-bench-stats { flex-direction: row; }
1572
+ .chex-footer { padding: 12px 16px; gap: 12px; flex-wrap: wrap; }
1573
+ }
1574
+ """
1575
+
1576
+ # ---------------------------------------------------------------------------
1577
+ # Static HTML
1578
+ # ---------------------------------------------------------------------------
1579
+
1580
+ TOPBAR_HTML = """
1581
+ <div class="chex-topbar">
1582
+ <div class="chex-logo">CX</div>
1583
+ <span class="chex-name">CHEX</span>
1584
+ <span class="chex-tag">grounded answers from documents</span>
1585
+ <div style="flex:1"></div>
1586
+ <div class="chex-pill"><span class="chex-dot"></span>MI300X &middot; ready</div>
1587
+ </div>
1588
+ """
1589
+
1590
+ FOOTER_HTML = """
1591
+ <div class="chex-footer">
1592
+ <div class="chex-footer-left">
1593
+ <span>chex/v0.4.1</span>
1594
+ <span class="sep">&middot;</span>
1595
+ <span>endpoint: mi300x-east-2</span>
1596
+ <span class="sep">&middot;</span>
1597
+ <span>tokens/s 142.7</span>
1598
+ </div>
1599
+ <div class="chex-footer-right">&#8617; to analyse &nbsp;&nbsp; &#8984;K shortcuts</div>
1600
+ </div>
1601
+ """
1602
+
1603
+ BENCH_INTRO_HTML = """
1604
+ <div class="chex-bench-wrap">
1605
+ <div class="chex-bench-top">
1606
+ <div class="chex-bench-copy">
1607
+ <h2>Why grounding matters</h2>
1608
+ <p>We ran the same five questions through a base instruction-tuned model and through CHEX. The base model invented or extrapolated answers in 4 of 5 cases &mdash; confident, plausible, wrong. CHEX returned a verifiable label, a verbatim citation, and refused to answer when the source was silent.</p>
1609
+ </div>
1610
+ <div class="chex-bench-stats">
1611
+ <div class="chex-bench-stat"><div class="v red">4/5</div><div class="k">Base hallucinations</div></div>
1612
+ <div class="chex-bench-stat"><div class="v green">5/5</div><div class="k">CHEX correct</div></div>
1613
+ <div class="chex-bench-stat"><div class="v white">100%</div><div class="k">Cited verbatim</div></div>
1614
+ </div>
1615
+ </div>
1616
+ </div>
1617
+ """
1618
+
1619
+ BENCH_TABLE_HTML = """
1620
+ <div class="chex-bench-table-wrap">
1621
+ <table class="chex-bench-table">
1622
+ <thead>
1623
+ <tr>
1624
+ <th>Question</th>
1625
+ <th>Ground truth</th>
1626
+ <th>Base model</th>
1627
+ <th>CHEX</th>
1628
+ <th>Halluc.</th>
1629
+ </tr>
1630
+ </thead>
1631
+ <tbody>
1632
+ <tr>
1633
+ <td class="col-question">What is the annual license fee in the Helix MSA?</td>
1634
+ <td class="col-truth">$144,000</td>
1635
+ <td class="col-base">$120,000 per year (industry-standard rate).</td>
1636
+ <td>
1637
+ <span class="cbadge cbadge-green"><span class="cbadge-icon">&#10003;</span> GROUNDED</span>
1638
+ <div style="margin-top:8px;font-size:12.5px;color:var(--fg-muted)">$144,000 per year.</div>
1639
+ </td>
1640
+ <td class="col-halluc"><span class="chex-halluc-yes">!</span></td>
1641
+ </tr>
1642
+ <tr>
1643
+ <td class="col-question">What is the SLA uptime in the Northwind MSA?</td>
1644
+ <td class="col-truth">99.9% per calendar month</td>
1645
+ <td class="col-base">99.95% with four nines on premium tier.</td>
1646
+ <td>
1647
+ <span class="cbadge cbadge-green"><span class="cbadge-icon">&#10003;</span> GROUNDED</span>
1648
+ <div style="margin-top:8px;font-size:12.5px;color:var(--fg-muted)">99.9% per calendar month, excluding maintenance.</div>
1649
+ </td>
1650
+ <td class="col-halluc"><span class="chex-halluc-yes">!</span></td>
1651
+ </tr>
1652
+ <tr>
1653
+ <td class="col-question">Does the Aperture NDA include an indemnification clause?</td>
1654
+ <td class="col-truth" style="font-style:italic">Not present in the document.</td>
1655
+ <td class="col-base">Yes &mdash; Section 7 of the NDA contains standard mutual indemnification.</td>
1656
+ <td>
1657
+ <span class="cbadge cbadge-red"><span class="cbadge-icon">&#10007;</span> ABSENT</span>
1658
+ <div style="margin-top:8px;font-size:12.5px;color:var(--fg-muted)">No indemnification clause is present.</div>
1659
+ </td>
1660
+ <td class="col-halluc"><span class="chex-halluc-yes">!</span></td>
1661
+ </tr>
1662
+ <tr>
1663
+ <td class="col-question">Within how many days must security incidents be reported?</td>
1664
+ <td class="col-truth">72 hours</td>
1665
+ <td class="col-base">72 hours.</td>
1666
+ <td>
1667
+ <span class="cbadge cbadge-green"><span class="cbadge-icon">&#10003;</span> GROUNDED</span>
1668
+ <div style="margin-top:8px;font-size:12.5px;color:var(--fg-muted)">Within 72 hours of becoming aware.</div>
1669
+ </td>
1670
+ <td class="col-halluc"><span class="chex-halluc-no">&#10003;</span></td>
1671
+ </tr>
1672
+ <tr>
1673
+ <td class="col-question">Is the NDA perpetual?</td>
1674
+ <td class="col-truth" style="font-style:italic">No &mdash; 2 year term.</td>
1675
+ <td class="col-base">The NDA appears to be perpetual based on standard NDA practice.</td>
1676
+ <td>
1677
+ <span class="cbadge cbadge-amber"><span class="cbadge-icon">&#9888;</span> CONTRADICTS PRIOR</span>
1678
+ <div style="margin-top:8px;font-size:12.5px;color:var(--fg-muted)">No &mdash; 2 year term, contradicts the premise.</div>
1679
+ </td>
1680
+ <td class="col-halluc"><span class="chex-halluc-yes">!</span></td>
1681
+ </tr>
1682
+ </tbody>
1683
+ </table>
1684
+ </div>
1685
+ """
1686
+
1687
+ CONTRACT_SOURCE_HEADER_HTML = """
1688
+ <div class="chex-card-header">
1689
+ <div class="chex-card-icon">
1690
+ <svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/></svg>
1691
+ </div>
1692
+ <span class="chex-card-title">Contract source</span>
1693
+ <span class="chex-card-sep">/</span>
1694
+ <span class="chex-card-sub">paste or load sample</span>
1695
+ </div>
1696
+ """
1697
+
1698
+ CONTRACT_RESULTS_HEADER_HTML = """
1699
+ <div class="chex-card-header">
1700
+ <div class="chex-card-icon">
1701
+ <svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>
1702
+ </div>
1703
+ <span class="chex-card-title">Classification</span>
1704
+ <span class="chex-card-sep">/</span>
1705
+ <span class="chex-card-sub">ask a question of the source</span>
1706
+ </div>
1707
+ """
1708
+
1709
+ LOAD_BAR_HTML = """
1710
+ <div class="chex-load-bar">
1711
+ <span class="chex-load-chip">LOAD</span>
1712
+ </div>
1713
+ """
1714
+
1715
+ STATEMENT_SOURCE_HEADER_HTML = """
1716
+ <div class="chex-card-header">
1717
+ <div class="chex-card-icon">
1718
+ <svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="2" y="5" width="20" height="14" rx="2"/><line x1="2" y1="10" x2="22" y2="10"/></svg>
1719
+ </div>
1720
+ <span class="chex-card-title">Statement source</span>
1721
+ <span class="chex-card-sep">/</span>
1722
+ <span class="chex-card-sub">paste &middot; upload pdf &middot; upload csv</span>
1723
+ </div>
1724
+ """
1725
+
1726
+ STATEMENT_RESULTS_HEADER_HTML = """
1727
+ <div class="chex-card-header">
1728
+ <div class="chex-card-icon">
1729
+ <svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>
1730
+ </div>
1731
+ <span class="chex-card-title">Statement analysis</span>
1732
+ <span class="chex-card-sep">/</span>
1733
+ <span class="chex-card-sub">summary + Q&amp;A</span>
1734
+ </div>
1735
+ """
1736
+
1737
+ CONTRACT_EMPTY_HTML = """
1738
+ <div class="chex-empty">
1739
+ <div class="chex-empty-icon">&#9889;</div>
1740
+ <div class="chex-empty-title">No analysis yet</div>
1741
+ <div class="chex-empty-body">Load or paste a contract, type a question, then press Analyze. CHEX will return a label, an exact citation, and its reasoning.</div>
1742
+ </div>
1743
+ """
1744
+
1745
+ STATEMENT_EMPTY_HTML = """
1746
+ <div class="chex-empty">
1747
+ <div class="chex-empty-icon">&#9141;</div>
1748
+ <div class="chex-empty-title">Awaiting statement</div>
1749
+ <div class="chex-empty-body">Paste, upload, or load the sample. CHEX will summarise totals, surface recurring payments, and flag anomalies you can drill into.</div>
1750
+ </div>
1751
+ """
1752
+
1753
+ # ---------------------------------------------------------------------------
1754
+ # Gradio UI
1755
+ # ---------------------------------------------------------------------------
1756
+
1757
+ with gr.Blocks(title="CHEX β€” Document Intelligence") as demo:
1758
+
1759
+ gr.HTML(TOPBAR_HTML)
1760
+
1761
+ if WARNING_HTML:
1762
+ gr.HTML(WARNING_HTML)
1763
+
1764
+ with gr.Tabs():
1765
+
1766
+ # ── Tab 01: Contract Analysis ──────────────────────────────────── #
1767
+ with gr.Tab("01 Contract analysis"):
1768
+ with gr.Row(equal_height=False):
1769
+
1770
+ with gr.Column(scale=9):
1771
+ with gr.Group():
1772
+ gr.HTML(CONTRACT_SOURCE_HEADER_HTML)
1773
+ contract_input = gr.Textbox(
1774
+ label="Contract text",
1775
+ lines=20,
1776
+ placeholder="Paste contract text here, or load a sample above…",
1777
+ show_label=False,
1778
+ )
1779
+ gr.HTML(LOAD_BAR_HTML)
1780
+ with gr.Row():
1781
+ btn_software = gr.Button("Software License", variant="secondary", size="sm")
1782
+ btn_nda = gr.Button("NDA", variant="secondary", size="sm")
1783
+ btn_service = gr.Button("Service Agreement", variant="secondary", size="sm")
1784
+ suggested_q = gr.HTML(value="", visible=False)
1785
+
1786
+ with gr.Column(scale=11):
1787
+ with gr.Group():
1788
+ gr.HTML(CONTRACT_RESULTS_HEADER_HTML)
1789
+ gr.HTML('<span class="chex-question-label">Question</span>')
1790
+ with gr.Row():
1791
+ question_input = gr.Textbox(
1792
+ label="Question",
1793
+ placeholder="e.g., What is the annual license fee?",
1794
+ lines=1,
1795
+ show_label=False,
1796
+ scale=8,
1797
+ )
1798
+ analyze_btn = gr.Button("Analyze ↡", variant="primary", scale=2)
1799
+ label_display = gr.HTML(value=CONTRACT_EMPTY_HTML)
1800
+ answer_output = gr.Textbox(label="Answer", interactive=False, lines=3, visible=False)
1801
+ citation_output = gr.Textbox(label="Citation", interactive=False, lines=2, visible=False)
1802
+ reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=3, visible=False)
1803
+
1804
+ # ── Tab 02: Bank Statements ────────────────────────────────────── #
1805
+ with gr.Tab("02 Bank statements"):
1806
+ with gr.Row(equal_height=False):
1807
+
1808
+ with gr.Column(scale=9):
1809
+ with gr.Group():
1810
+ gr.HTML(STATEMENT_SOURCE_HEADER_HTML)
1811
+ with gr.Tabs():
1812
+ with gr.Tab("Paste text"):
1813
+ bank_paste_input = gr.Textbox(
1814
+ label="Bank statement text (supports multiple)",
1815
+ lines=20,
1816
+ placeholder=(
1817
+ "Paste your statement here, e.g. lines like: 2025-03-15 Direct deposit +5,420.00…"
1818
+ ),
1819
+ show_label=False,
1820
+ )
1821
+ btn_load_statement = gr.Button("Load sample statement", variant="secondary", size="sm")
1822
+ with gr.Tab("Upload PDF"):
1823
+ bank_pdf_input = gr.File(
1824
+ label="PDF bank statement (multiple allowed)",
1825
+ file_types=[".pdf"],
1826
+ file_count="multiple",
1827
+ )
1828
+ bank_pdf_password_input = gr.Textbox(
1829
+ label="PDF password (optional)",
1830
+ type="password",
1831
+ placeholder="Leave blank if PDF is not encrypted",
1832
+ show_label=False,
1833
+ )
1834
+ with gr.Tab("Upload CSV"):
1835
+ bank_csv_input = gr.File(
1836
+ label="CSV bank statement (multiple allowed)",
1837
+ file_types=[".csv"],
1838
+ file_count="multiple",
1839
+ )
1840
+ with gr.Tab("Upload TXT"):
1841
+ bank_txt_input = gr.File(
1842
+ label="TXT bank statement (multiple allowed)",
1843
+ file_types=[".txt", ".text"],
1844
+ file_count="multiple",
1845
+ )
1846
+ with gr.Tab("Upload Excel"):
1847
+ bank_xlsx_input = gr.File(
1848
+ label="Excel bank statement (.xlsx, multiple allowed)",
1849
+ file_types=[".xlsx"],
1850
+ file_count="multiple",
1851
+ )
1852
+ with gr.Tab("Upload OFX / QFX"):
1853
+ bank_ofx_input = gr.File(
1854
+ label="OFX / QFX bank statement (multiple allowed)",
1855
+ file_types=[".ofx", ".qfx"],
1856
+ file_count="multiple",
1857
+ )
1858
+
1859
+ with gr.Column(scale=11):
1860
+ with gr.Group():
1861
+ gr.HTML(STATEMENT_RESULTS_HEADER_HTML)
1862
+ analyse_stmt_btn = gr.Button("Analyse statement", variant="primary")
1863
+ summary_output = gr.HTML(value=STATEMENT_EMPTY_HTML)
1864
+ summary_md_output = gr.Markdown(value="", visible=False)
1865
+ with gr.Row():
1866
+ export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm")
1867
+ export_pdf_btn = gr.Button("Export PDF", variant="secondary", size="sm")
1868
+ export_status = gr.Markdown(value="")
1869
+ export_file = gr.File(label="Download", interactive=False)
1870
+ gr.HTML('<div class="chex-divider"></div>')
1871
+ gr.HTML('<span class="chex-section-kicker">Ask a question</span>')
1872
+ with gr.Row():
1873
+ bank_question_input = gr.Textbox(
1874
+ label="Question",
1875
+ placeholder="e.g., What was the largest debit this month?",
1876
+ lines=1,
1877
+ show_label=False,
1878
+ scale=8,
1879
+ )
1880
+ bank_ask_btn = gr.Button("Ask ↡", variant="secondary", scale=2)
1881
+ bank_label_display = gr.HTML(value=format_label_html("N/A"))
1882
+ bank_answer_output = gr.Textbox(label="Answer", interactive=False, lines=3)
1883
+ bank_citation_output = gr.Textbox(label="Citation", interactive=False, lines=2)
1884
+ bank_reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=3)
1885
+
1886
+ bank_statement_state = gr.State("")
1887
+ bank_summary_state = gr.State("")
1888
+ # Hidden JSON output for `gradio_client` API usage.
1889
+ bank_api_output = gr.JSON(visible=False)
1890
+ bank_api_question = gr.Textbox(visible=False)
1891
+ bank_api_btn = gr.Button(visible=False)
1892
+
1893
+ # ── Tab 03: Benchmark ──────────────────────────────────────────── #
1894
+ with gr.Tab("03 Benchmark"):
1895
+ gr.HTML(BENCH_INTRO_HTML)
1896
+ gr.HTML(BENCH_TABLE_HTML)
1897
+
1898
+ gr.HTML(FOOTER_HTML)
1899
+
1900
+ # ── Event handlers ─────────────────────────────────────────────────── #
1901
+
1902
+ def load_software():
1903
+ hint = '<div class="chex-suggested"><span class="chex-suggested-icon">πŸ’‘</span><span><strong>Suggested:</strong> What is the limitation of liability in this agreement?</span></div>'
1904
+ return SOFTWARE_LICENSE, SAMPLE_QUESTIONS["software_license.txt"], gr.update(value=hint, visible=True)
1905
+
1906
+ def load_nda():
1907
+ hint = '<div class="chex-suggested"><span class="chex-suggested-icon">πŸ’‘</span><span><strong>Suggested:</strong> Does this agreement include a non-compete clause?</span></div>'
1908
+ return NDA, SAMPLE_QUESTIONS["nda.txt"], gr.update(value=hint, visible=True)
1909
+
1910
+ def load_service():
1911
+ hint = '<div class="chex-suggested"><span class="chex-suggested-icon">πŸ’‘</span><span><strong>Suggested:</strong> Does this contract include a termination for convenience clause? <em>(expected: ABSENT)</em></span></div>'
1912
+ return SERVICE_AGREEMENT, SAMPLE_QUESTIONS["service_agreement.txt"], gr.update(value=hint, visible=True)
1913
+
1914
+ btn_software.click(fn=load_software, inputs=[], outputs=[contract_input, question_input, suggested_q])
1915
+ btn_nda.click(fn=load_nda, inputs=[], outputs=[contract_input, question_input, suggested_q])
1916
+ btn_service.click(fn=load_service, inputs=[], outputs=[contract_input, question_input, suggested_q])
1917
+
1918
+ def analyze_contract_ui(contract_text: str, question: str):
1919
+ label_html, answer, citation, reasoning = analyze_contract(contract_text, question)
1920
+ return (
1921
+ label_html,
1922
+ gr.update(value=answer, visible=True),
1923
+ gr.update(value=citation, visible=True),
1924
+ gr.update(value=reasoning, visible=True),
1925
+ )
1926
+
1927
+ analyze_btn.click(
1928
+ fn=analyze_contract_ui,
1929
+ inputs=[contract_input, question_input],
1930
+ outputs=[label_display, answer_output, citation_output, reasoning_output],
1931
+ api_name="contract_analyze",
1932
+ )
1933
+ question_input.submit(
1934
+ fn=analyze_contract_ui,
1935
+ inputs=[contract_input, question_input],
1936
+ outputs=[label_display, answer_output, citation_output, reasoning_output],
1937
+ api_name="contract_analyze",
1938
+ )
1939
+
1940
+ btn_load_statement.click(fn=lambda: SAMPLE_STATEMENT, inputs=[], outputs=[bank_paste_input])
1941
+
1942
+ def analyse_bank_ui(paste_text, pdf_file, pdf_password, csv_file, txt_file, xlsx_file, ofx_file):
1943
+ summary_md, combined_text, summary_json = analyse_bank_statement(
1944
+ paste_text, pdf_file, pdf_password, csv_file, txt_file, xlsx_file, ofx_file
1945
+ )
1946
+ return (
1947
+ gr.update(value="", visible=False),
1948
+ gr.update(value=summary_md, visible=True),
1949
+ combined_text,
1950
+ summary_json,
1951
+ )
1952
+
1953
+ analyse_stmt_btn.click(
1954
+ fn=analyse_bank_ui,
1955
+ inputs=[
1956
+ bank_paste_input,
1957
+ bank_pdf_input,
1958
+ bank_pdf_password_input,
1959
+ bank_csv_input,
1960
+ bank_txt_input,
1961
+ bank_xlsx_input,
1962
+ bank_ofx_input,
1963
+ ],
1964
+ outputs=[summary_output, summary_md_output, bank_statement_state, bank_summary_state],
1965
+ )
1966
+
1967
+ export_csv_btn.click(
1968
+ fn=export_bank_summary_csv,
1969
+ inputs=[bank_summary_state],
1970
+ outputs=[export_file, export_status],
1971
+ )
1972
+ export_pdf_btn.click(
1973
+ fn=export_bank_summary_pdf,
1974
+ inputs=[bank_summary_state],
1975
+ outputs=[export_file, export_status],
1976
+ )
1977
+
1978
+ bank_ask_btn.click(
1979
+ fn=bank_qa,
1980
+ inputs=[bank_statement_state, bank_question_input],
1981
+ outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output],
1982
+ )
1983
+ bank_question_input.submit(
1984
+ fn=bank_qa,
1985
+ inputs=[bank_statement_state, bank_question_input],
1986
+ outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output],
1987
+ )
1988
+
1989
+ def bank_analyze_api(
1990
+ paste_text: str,
1991
+ pdf_files,
1992
+ pdf_password: str | None,
1993
+ csv_files,
1994
+ txt_files,
1995
+ xlsx_files,
1996
+ ofx_files,
1997
+ question: str | None,
1998
+ ) -> dict:
1999
+ summary_md, combined_text, summary_json = analyse_bank_statement(
2000
+ paste_text,
2001
+ pdf_files,
2002
+ pdf_password,
2003
+ csv_files,
2004
+ txt_files,
2005
+ xlsx_files,
2006
+ ofx_files,
2007
+ )
2008
+
2009
+ qa: dict | None = None
2010
+ if (question or "").strip():
2011
+ label_html, answer, citation, reasoning = bank_qa(combined_text, (question or "").strip())
2012
+ qa = {
2013
+ "label_html": label_html,
2014
+ "answer": answer,
2015
+ "citation": citation,
2016
+ "reasoning": reasoning,
2017
+ }
2018
+
2019
+ return {
2020
+ "summary_markdown": summary_md,
2021
+ "combined_text": combined_text,
2022
+ "summary_json": summary_json,
2023
+ "qa": qa,
2024
+ }
2025
+
2026
+ bank_api_btn.click(
2027
+ fn=bank_analyze_api,
2028
+ inputs=[
2029
+ bank_paste_input,
2030
+ bank_pdf_input,
2031
+ bank_pdf_password_input,
2032
+ bank_csv_input,
2033
+ bank_txt_input,
2034
+ bank_xlsx_input,
2035
+ bank_ofx_input,
2036
+ bank_api_question,
2037
+ ],
2038
+ outputs=[bank_api_output],
2039
+ api_name="bank_analyze",
2040
+ )
2041
+
2042
+
2043
+ if __name__ == "__main__":
2044
+ demo.launch(show_error=True, theme=gr.themes.Base(), css=CHEX_CSS, ssr_mode=False)