Eklavya73 commited on
Commit
4c5208f
·
verified ·
1 Parent(s): 45993cd

Upload 6 files

Browse files
Files changed (6) hide show
  1. .gitattributes +2 -34
  2. .gitignore +7 -0
  3. __init__.py +1 -0
  4. app.py +853 -0
  5. requirements.txt +14 -0
  6. runtime.txt +1 -0
.gitattributes CHANGED
@@ -1,36 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
  *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- Data/Domain-A_Dataset_Clean.csv filter=lfs diff=lfs merge=lfs -text
 
1
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
2
  *.npy filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
3
  *.pkl filter=lfs diff=lfs merge=lfs -text
4
+ *.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ .DS_Store
4
+ Thumbs.db
5
+ .gradio/
6
+ tmp/
7
+ temp/
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Shared deployment utilities for the intelligent ticket auto-routing system."""
app.py ADDED
@@ -0,0 +1,853 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Intelligent Ticket Auto-Routing System - Hugging Face Spaces App
3
+ ================================================================
4
+ Converts support tickets into structured routing decisions:
5
+ - Multi-label tag classification
6
+ - Department routing (hybrid: tag-voting + semantic similarity)
7
+ - Priority prediction
8
+ - Duplicate detection via FAISS
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import csv
14
+ import os
15
+ import tempfile
16
+ import time
17
+ import uuid
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+
21
+ import gradio as gr
22
+ import joblib
23
+ import numpy as np
24
+ from sentence_transformers import SentenceTransformer
25
+
26
+ try:
27
+ from .calibration_utils import (
28
+ calibrate_probabilities,
29
+ load_temperature_scaler,
30
+ )
31
+ from .duplicate_detection_utils import CachedDuplicateDetectionEngine
32
+ from .hybrid_routing_utils import (
33
+ DEFAULT_TAG_TO_DEPARTMENT,
34
+ assert_valid_routing_label_policy,
35
+ compute_department_hybrid_scores,
36
+ load_routing_label_policy,
37
+ )
38
+ from .review_policy_utils import (
39
+ apply_controlled_review,
40
+ load_review_policy,
41
+ )
42
+ from .runtime_utils import (
43
+ load_model_config,
44
+ load_routing_config,
45
+ resolve_dataset_file,
46
+ resolve_model_dir,
47
+ resolve_model_reference,
48
+ )
49
+ except ImportError: # pragma: no cover
50
+ from calibration_utils import (
51
+ calibrate_probabilities,
52
+ load_temperature_scaler,
53
+ )
54
+ from duplicate_detection_utils import CachedDuplicateDetectionEngine
55
+ from hybrid_routing_utils import (
56
+ DEFAULT_TAG_TO_DEPARTMENT,
57
+ assert_valid_routing_label_policy,
58
+ compute_department_hybrid_scores,
59
+ load_routing_label_policy,
60
+ )
61
+ from review_policy_utils import (
62
+ apply_controlled_review,
63
+ load_review_policy,
64
+ )
65
+ from runtime_utils import (
66
+ load_model_config,
67
+ load_routing_config,
68
+ resolve_dataset_file,
69
+ resolve_model_dir,
70
+ resolve_model_reference,
71
+ )
72
+
73
+
74
+ APP_DIR = Path(__file__).resolve().parent
75
+ MODEL_DIR = resolve_model_dir(APP_DIR)
76
+
77
+ ROUTING_CONFIG, ROUTING_CONFIG_PATH = load_routing_config(APP_DIR)
78
+ DEFAULT_DEPARTMENT = str(
79
+ ROUTING_CONFIG.get("default_department", "Human_Review")
80
+ )
81
+ PRIORITY_ESCALATION = {
82
+ str(priority).lower(): department
83
+ for priority, department in (ROUTING_CONFIG.get("priority_escalation") or {}).items()
84
+ }
85
+ LOG_PATH = os.path.join(tempfile.gettempdir(), "routing_evaluation_log.csv")
86
+
87
+
88
+ print("Loading SBERT model...")
89
+ model_config = load_model_config(APP_DIR)
90
+ routing_sbert_model_name = resolve_model_reference(
91
+ model_config.get("sbert_model", "Eklavya73/sbert_finetuned"),
92
+ base_dir=APP_DIR,
93
+ model_dir=MODEL_DIR,
94
+ )
95
+ duplicate_sbert_model_name = resolve_model_reference(
96
+ model_config.get("duplicate_sbert_model", "Eklavya73/duplicate_sbert"),
97
+ base_dir=APP_DIR,
98
+ model_dir=MODEL_DIR,
99
+ default="all-mpnet-base-v2",
100
+ )
101
+ routing_sbert = SentenceTransformer(routing_sbert_model_name)
102
+ duplicate_sbert = (
103
+ routing_sbert
104
+ if duplicate_sbert_model_name == routing_sbert_model_name
105
+ else SentenceTransformer(duplicate_sbert_model_name)
106
+ )
107
+
108
+ print("Loading classifiers...")
109
+ tag_model = joblib.load(MODEL_DIR / "sbert_classifier.pkl")
110
+ tag_calibrators = joblib.load(MODEL_DIR / "tag_calibrators.pkl")
111
+ temperature_scaler = load_temperature_scaler(MODEL_DIR / "tag_temperature_scaler.pkl")
112
+ review_policy = load_review_policy(MODEL_DIR / "routing_review_policy.pkl")
113
+
114
+ priority_bundle = joblib.load(MODEL_DIR / "tuned_priority_model.pkl")
115
+ priority_model = (
116
+ priority_bundle["model"]
117
+ if isinstance(priority_bundle, dict) and "model" in priority_bundle
118
+ else priority_bundle
119
+ )
120
+ priority_encoder = joblib.load(MODEL_DIR / "priority_encoder.pkl")
121
+ hf_scaler = joblib.load(MODEL_DIR / "hf_scaler.pkl")
122
+
123
+ tag_binarizer = joblib.load(MODEL_DIR / "mlb_tag_binarizer.pkl")
124
+ tag_list = list(tag_binarizer.classes_)
125
+
126
+ dept_prototypes = joblib.load(MODEL_DIR / "department_prototypes.pkl")
127
+ routing_label_policy = load_routing_label_policy(
128
+ MODEL_DIR / "routing_label_policy.pkl",
129
+ fallback_tag_to_department=ROUTING_CONFIG.get(
130
+ "departments",
131
+ DEFAULT_TAG_TO_DEPARTMENT,
132
+ ),
133
+ valid_tags=tag_list,
134
+ valid_departments=dept_prototypes.keys(),
135
+ default_department=DEFAULT_DEPARTMENT,
136
+ )
137
+ tag_to_department = routing_label_policy["tag_to_department"]
138
+ assert_valid_routing_label_policy(
139
+ routing_label_policy,
140
+ valid_tags=tag_list,
141
+ valid_departments=dept_prototypes.keys(),
142
+ )
143
+
144
+ print("Loading duplicate detection index...")
145
+ duplicate_engine = CachedDuplicateDetectionEngine(APP_DIR)
146
+
147
+ print(f"[OK] Tags: {len(tag_list)}, Departments: {len(dept_prototypes)}")
148
+ print(f"[OK] Routing label policy: {len(tag_to_department)} active mappings")
149
+ print(
150
+ "[OK] Routing config: "
151
+ f"{ROUTING_CONFIG_PATH if ROUTING_CONFIG_PATH is not None else 'defaults'}"
152
+ )
153
+ print(f"[OK] Default human-review department: {DEFAULT_DEPARTMENT}")
154
+ print(f"[OK] Routing SBERT model: {routing_sbert_model_name}")
155
+ print(f"[OK] Duplicate SBERT model: {duplicate_sbert_model_name}")
156
+ print(f"[OK] Duplicate index: {duplicate_engine.index_size} vectors")
157
+ print(f"[OK] Duplicate threshold: {duplicate_engine.duplicate_threshold:.4f}")
158
+ print(f"[OK] Temperature scaler: T={temperature_scaler.get('temperature', 1.0):.3f}")
159
+ print(
160
+ "[OK] Review policy: "
161
+ f"target={review_policy.get('target_review_fraction', 0.15):.0%}, "
162
+ f"percentile_threshold={review_policy.get('percentile_threshold', 0.55):.3f}, "
163
+ f"fallback_threshold={review_policy.get('fallback_threshold', 0.55):.3f}"
164
+ )
165
+
166
+
167
+ def encode_ticket_embedding(text, encoder):
168
+ emb = np.asarray(encoder.encode(text), dtype=float).reshape(-1)
169
+ emb_norm = np.linalg.norm(emb)
170
+ if emb_norm == 0.0:
171
+ return emb
172
+ return emb / emb_norm
173
+
174
+
175
+ def predict_tags(text, emb):
176
+ raw_probs = np.asarray(tag_model.predict_proba([emb])[0], dtype=float)
177
+ calibrated = calibrate_probabilities(
178
+ raw_probs,
179
+ tag_calibrators=tag_calibrators,
180
+ temperature_scaler=temperature_scaler,
181
+ )
182
+ top_idx = calibrated.argsort()[-5:][::-1]
183
+ return top_idx, calibrated[top_idx], calibrated, raw_probs
184
+
185
+
186
+ def extract_features(text):
187
+ words = text.split()
188
+ return [
189
+ len(text),
190
+ len(words),
191
+ len(set(words)) / (len(words) + 1),
192
+ np.mean([len(word) for word in words]) if words else 0,
193
+ sum(word in text.lower() for word in ["urgent", "critical", "down"]),
194
+ sum(word in text.lower() for word in ["not", "cannot", "no"]),
195
+ ]
196
+
197
+
198
+ def predict_priority(text, emb, return_confidence=False):
199
+ features = extract_features(text)
200
+ features_scaled = hf_scaler.transform([features])
201
+ x = np.hstack([emb.reshape(1, -1), features_scaled])
202
+ pred_idx = int(priority_model.predict(x)[0])
203
+ priority_label = str(priority_encoder.classes_[pred_idx])
204
+ priority_confidence = float("nan")
205
+
206
+ if hasattr(priority_model, "predict_proba"):
207
+ try:
208
+ probs = np.asarray(
209
+ priority_model.predict_proba(x)[0],
210
+ dtype=float,
211
+ ).reshape(-1)
212
+ if probs.size:
213
+ priority_confidence = float(probs[pred_idx])
214
+ except Exception:
215
+ priority_confidence = float("nan")
216
+
217
+ if return_confidence:
218
+ return priority_label, priority_confidence
219
+ return priority_label
220
+
221
+
222
+ HYBRID_CLASSIFIER_WEIGHT = 0.7
223
+ HYBRID_SIMILARITY_WEIGHT = 0.3
224
+ HYBRID_FLOOR = 0.45
225
+ FLAGGED_HYBRID_FLOOR = 0.30
226
+ MARGIN_THRESHOLD = 0.15
227
+ ENTROPY_THRESHOLD = 1.8
228
+
229
+
230
+ def compute_confidence_metrics(calibrated_probs):
231
+ probs = np.asarray(calibrated_probs, dtype=float).reshape(-1)
232
+ if probs.size == 0:
233
+ return 0.0, float("inf")
234
+
235
+ sorted_probs = np.sort(probs)[::-1]
236
+ top1 = float(sorted_probs[0])
237
+ top2 = float(sorted_probs[1]) if len(sorted_probs) > 1 else 0.0
238
+ margin = top1 - top2
239
+
240
+ p = np.clip(probs, 1e-12, None)
241
+ total = float(p.sum())
242
+ if total == 0.0:
243
+ p = np.full_like(p, 1.0 / len(p))
244
+ else:
245
+ p = p / total
246
+ entropy = float(-np.sum(p * np.log(p)))
247
+ return margin, entropy
248
+
249
+
250
+ def decide_routing_mode(hybrid_confidence, calibrated_probs):
251
+ margin, entropy = compute_confidence_metrics(calibrated_probs)
252
+
253
+ if hybrid_confidence < HYBRID_FLOOR:
254
+ return "HUMAN_REVIEW", True, margin, entropy
255
+
256
+ if (margin >= MARGIN_THRESHOLD) or (entropy <= ENTROPY_THRESHOLD):
257
+ return "AUTO_ROUTE", False, margin, entropy
258
+
259
+ if hybrid_confidence >= FLAGGED_HYBRID_FLOOR:
260
+ return "AUTO_ROUTE_FLAGGED", True, margin, entropy
261
+
262
+ return "HUMAN_REVIEW", True, margin, entropy
263
+
264
+
265
+ def route_ticket(emb, text):
266
+ _, _, calibrated_probs, _ = predict_tags(text, emb)
267
+ best_dept, hybrid_confidence, department_details, top_tag_votes = (
268
+ compute_department_hybrid_scores(
269
+ calibrated_probs,
270
+ emb,
271
+ dept_prototypes,
272
+ tag_to_department=tag_to_department,
273
+ tag_names=tag_list,
274
+ classifier_weight=HYBRID_CLASSIFIER_WEIGHT,
275
+ similarity_weight=HYBRID_SIMILARITY_WEIGHT,
276
+ top_k=5,
277
+ )
278
+ )
279
+ priority, priority_confidence = predict_priority(
280
+ text,
281
+ emb,
282
+ return_confidence=True,
283
+ )
284
+ base_mode, _, margin, entropy = decide_routing_mode(
285
+ hybrid_confidence,
286
+ calibrated_probs,
287
+ )
288
+
289
+ recommended_department = best_dept
290
+ routed_department = recommended_department
291
+ escalation_note = ""
292
+
293
+ if not top_tag_votes or best_dept is None:
294
+ review_decision = {
295
+ "base_mode": "HUMAN_REVIEW",
296
+ "final_mode": "HUMAN_REVIEW",
297
+ "forced_human_review": False,
298
+ "percentile_threshold": float(
299
+ review_policy.get("percentile_threshold", 0.55)
300
+ ),
301
+ "fallback_threshold": float(
302
+ review_policy.get("fallback_threshold", 0.55)
303
+ ),
304
+ "reason": "No valid tag votes or department resolved. Requires human review.",
305
+ }
306
+ return {
307
+ "mode": "HUMAN_REVIEW",
308
+ "department": DEFAULT_DEPARTMENT,
309
+ "recommended_department": None,
310
+ "priority": priority,
311
+ "priority_confidence": priority_confidence,
312
+ "hybrid_confidence": hybrid_confidence,
313
+ "review": True,
314
+ "margin": margin,
315
+ "entropy": entropy,
316
+ "best_details": {},
317
+ "top_tag_votes": [],
318
+ "review_decision": review_decision,
319
+ "note": review_decision["reason"],
320
+ }
321
+
322
+ escalation_department = PRIORITY_ESCALATION.get(str(priority).lower())
323
+ if base_mode != "HUMAN_REVIEW" and escalation_department:
324
+ routed_department = str(escalation_department)
325
+ escalation_note = (
326
+ f" Priority escalation override applied after gate: "
327
+ f"{priority} -> {routed_department}."
328
+ )
329
+
330
+ mode, review, review_decision = apply_controlled_review(
331
+ base_mode,
332
+ hybrid_confidence,
333
+ review_policy=review_policy,
334
+ )
335
+
336
+ if review_decision.get("forced_human_review", False):
337
+ final_department = DEFAULT_DEPARTMENT
338
+ note = (
339
+ f"{review_decision.get('reason', '')} "
340
+ f"Recommended department before override: {routed_department}."
341
+ f"{escalation_note}"
342
+ ).strip()
343
+ elif mode == "AUTO_ROUTE":
344
+ final_department = routed_department
345
+ note = (
346
+ f"Stage 2 pass: hybrid_confidence={hybrid_confidence:.4f}, "
347
+ f"margin={margin:.4f}, entropy={entropy:.4f}."
348
+ f"{escalation_note}"
349
+ )
350
+ elif mode == "AUTO_ROUTE_FLAGGED":
351
+ final_department = routed_department
352
+ note = (
353
+ f"Stage 2 flagged: hybrid_confidence={hybrid_confidence:.4f}, "
354
+ f"margin={margin:.4f}, entropy={entropy:.4f}."
355
+ f"{escalation_note}"
356
+ )
357
+ elif hybrid_confidence < HYBRID_FLOOR:
358
+ final_department = DEFAULT_DEPARTMENT
359
+ note = (
360
+ f"Stage 1 reject: hybrid_confidence {hybrid_confidence:.4f} "
361
+ f"< HYBRID_FLOOR {HYBRID_FLOOR}."
362
+ )
363
+ else:
364
+ final_department = DEFAULT_DEPARTMENT
365
+ note = (
366
+ f"Stage 2 reject: hybrid_confidence={hybrid_confidence:.4f}, "
367
+ f"margin={margin:.4f}, entropy={entropy:.4f}."
368
+ )
369
+
370
+ best_details = department_details.get(recommended_department, {})
371
+ return {
372
+ "mode": mode,
373
+ "department": final_department,
374
+ "recommended_department": recommended_department,
375
+ "priority": priority,
376
+ "priority_confidence": priority_confidence,
377
+ "hybrid_confidence": hybrid_confidence,
378
+ "review": review,
379
+ "margin": margin,
380
+ "entropy": entropy,
381
+ "best_details": best_details,
382
+ "top_tag_votes": top_tag_votes,
383
+ "review_decision": review_decision,
384
+ "note": note.strip(),
385
+ }
386
+
387
+
388
+ LOG_COLUMNS = [
389
+ "ticket_id",
390
+ "timestamp",
391
+ "ticket_text",
392
+ "duplicate_flag",
393
+ "duplicate_score",
394
+ "routing_mode",
395
+ "department",
396
+ "base_routing_mode",
397
+ "requires_review",
398
+ "controlled_review_applied",
399
+ "department_confidence",
400
+ "classifier_confidence",
401
+ "semantic_similarity",
402
+ "raw_semantic_similarity",
403
+ "priority",
404
+ "priority_confidence",
405
+ "selected_tags",
406
+ "routing_score",
407
+ "hybrid_confidence",
408
+ "margin",
409
+ "entropy",
410
+ "review_percentile_threshold",
411
+ "review_fallback_threshold",
412
+ "prediction_latency_ms",
413
+ "explanation",
414
+ ]
415
+
416
+
417
+ def _ensure_log_header():
418
+ if not os.path.exists(LOG_PATH):
419
+ with open(LOG_PATH, "w", newline="", encoding="utf-8") as handle:
420
+ csv.writer(handle).writerow(LOG_COLUMNS)
421
+ return
422
+
423
+ with open(LOG_PATH, "r", newline="", encoding="utf-8") as handle:
424
+ existing_header = next(csv.reader(handle), [])
425
+
426
+ if existing_header != LOG_COLUMNS:
427
+ with open(LOG_PATH, "w", newline="", encoding="utf-8") as handle:
428
+ csv.writer(handle).writerow(LOG_COLUMNS)
429
+
430
+
431
+ def _append_log(row_dict):
432
+ _ensure_log_header()
433
+ with open(LOG_PATH, "a", newline="", encoding="utf-8") as handle:
434
+ csv.writer(handle).writerow([row_dict.get(column, "") for column in LOG_COLUMNS])
435
+
436
+
437
+ def process_ticket(text):
438
+ t0 = time.time()
439
+ ticket_id = str(uuid.uuid4())[:8]
440
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
441
+
442
+ routing_emb = encode_ticket_embedding(text, routing_sbert)
443
+ duplicate_emb = encode_ticket_embedding(text, duplicate_sbert)
444
+
445
+ best_match = duplicate_engine.find_best_match(duplicate_emb, k=20)
446
+ dup_score = (
447
+ float(best_match["similarity"])
448
+ if best_match is not None
449
+ else 0.0
450
+ )
451
+ dup_text = best_match.get("matched_text") if best_match is not None else None
452
+ is_dup = bool(
453
+ best_match is not None
454
+ and dup_score >= float(duplicate_engine.duplicate_threshold)
455
+ )
456
+
457
+ routing = route_ticket(routing_emb, text)
458
+ latency_ms = round((time.time() - t0) * 1000, 2)
459
+
460
+ mode = routing["mode"]
461
+ dept = routing["department"]
462
+ priority = routing["priority"]
463
+ priority_confidence = routing["priority_confidence"]
464
+ hybrid_confidence = routing["hybrid_confidence"]
465
+ review = routing["review"]
466
+ margin = routing["margin"]
467
+ entropy = routing["entropy"]
468
+ best_details = routing["best_details"]
469
+ top_tag_votes = routing["top_tag_votes"]
470
+ review_decision = routing["review_decision"]
471
+ note = routing["note"]
472
+
473
+ classifier_confidence = float(best_details.get("classifier_confidence", 0.0))
474
+ semantic_similarity = float(best_details.get("semantic_similarity", 0.0))
475
+ raw_semantic_similarity = float(best_details.get("raw_semantic_similarity", 0.0))
476
+ base_mode = str(review_decision.get("base_mode", mode))
477
+ review_reason = str(review_decision.get("reason", note))
478
+ percentile_threshold = float(
479
+ review_decision.get(
480
+ "percentile_threshold",
481
+ review_policy.get("percentile_threshold", 0.55),
482
+ )
483
+ )
484
+ fallback_threshold = float(
485
+ review_decision.get(
486
+ "fallback_threshold",
487
+ review_policy.get("fallback_threshold", 0.55),
488
+ )
489
+ )
490
+ controlled_review_applied = bool(
491
+ review_decision.get("forced_human_review", False)
492
+ )
493
+ recommended_department = routing.get("recommended_department")
494
+ tag_summary = ", ".join(
495
+ f"{vote['tag']} ({vote['score']:.2f})"
496
+ for vote in top_tag_votes[:3]
497
+ )
498
+
499
+ recommended_text = (
500
+ f" Recommended department before final policy: {recommended_department}."
501
+ if recommended_department and recommended_department != dept
502
+ else ""
503
+ )
504
+
505
+ if is_dup:
506
+ explanation = (
507
+ f"Duplicate detected (score={dup_score:.4f}). "
508
+ f"Original: {str(dup_text)[:100]}. "
509
+ f"Routing mode: {mode} (base_mode={base_mode}), "
510
+ f"final_department={dept}, hybrid_confidence={hybrid_confidence:.3f}, "
511
+ f"classifier_confidence={classifier_confidence:.3f}, "
512
+ f"semantic_similarity={semantic_similarity:.3f} "
513
+ f"(raw={raw_semantic_similarity:.3f}), margin={margin:.3f}, "
514
+ f"entropy={entropy:.3f}, controlled_review_applied={controlled_review_applied}, "
515
+ f"review_thresholds=(percentile={percentile_threshold:.3f}, "
516
+ f"fallback={fallback_threshold:.3f}).{recommended_text} {note}"
517
+ )
518
+ result = {
519
+ "ticket_id": ticket_id,
520
+ "status": "DUPLICATE",
521
+ "route": mode,
522
+ "department": dept,
523
+ "priority": priority,
524
+ "confidence": round(float(hybrid_confidence), 3),
525
+ "review": review,
526
+ "tags": tag_summary,
527
+ "message": (
528
+ f"Duplicate of: {str(dup_text)[:200]} (similarity={dup_score:.3f}). "
529
+ f"{note}"
530
+ ).strip(),
531
+ "latency": latency_ms,
532
+ }
533
+ else:
534
+ explanation = (
535
+ f"Ticket processed with final department {dept}. "
536
+ f"Predicted tags [{tag_summary}] produced routing mode {mode} "
537
+ f"(base_mode={base_mode}), hybrid_confidence={hybrid_confidence:.3f}, "
538
+ f"classifier_confidence={classifier_confidence:.3f}, "
539
+ f"semantic_similarity={semantic_similarity:.3f} "
540
+ f"(raw={raw_semantic_similarity:.3f}), margin={margin:.3f}, "
541
+ f"entropy={entropy:.3f}, controlled_review_applied={controlled_review_applied}, "
542
+ f"review_thresholds=(percentile={percentile_threshold:.3f}, "
543
+ f"fallback={fallback_threshold:.3f}).{recommended_text} {review_reason}"
544
+ )
545
+ result = {
546
+ "ticket_id": ticket_id,
547
+ "status": "NOT DUPLICATE",
548
+ "route": mode,
549
+ "department": dept,
550
+ "priority": priority,
551
+ "confidence": round(float(hybrid_confidence), 3),
552
+ "review": review,
553
+ "tags": tag_summary,
554
+ "message": note if note else "Ticket processed successfully",
555
+ "latency": latency_ms,
556
+ }
557
+
558
+ duplicate_engine.add_ticket(ticket_id, text, embedding=duplicate_emb)
559
+ _append_log(
560
+ {
561
+ "ticket_id": ticket_id,
562
+ "timestamp": timestamp,
563
+ "ticket_text": text,
564
+ "duplicate_flag": is_dup,
565
+ "duplicate_score": round(float(dup_score), 4),
566
+ "routing_mode": mode,
567
+ "department": dept,
568
+ "department_confidence": round(float(hybrid_confidence), 4),
569
+ "base_routing_mode": base_mode,
570
+ "requires_review": bool(review),
571
+ "controlled_review_applied": controlled_review_applied,
572
+ "classifier_confidence": round(float(classifier_confidence), 4),
573
+ "semantic_similarity": round(float(semantic_similarity), 4),
574
+ "raw_semantic_similarity": round(float(raw_semantic_similarity), 4),
575
+ "priority": priority,
576
+ "priority_confidence": (
577
+ round(float(priority_confidence), 4)
578
+ if np.isfinite(priority_confidence)
579
+ else ""
580
+ ),
581
+ "selected_tags": tag_summary,
582
+ "routing_score": round(float(hybrid_confidence), 4),
583
+ "hybrid_confidence": round(float(hybrid_confidence), 4),
584
+ "margin": round(float(margin), 4),
585
+ "entropy": round(float(entropy), 4),
586
+ "review_percentile_threshold": round(float(percentile_threshold), 4),
587
+ "review_fallback_threshold": round(float(fallback_threshold), 4),
588
+ "prediction_latency_ms": latency_ms,
589
+ "explanation": explanation,
590
+ }
591
+ )
592
+
593
+ return result
594
+
595
+
596
+ def ui_process(text):
597
+ if not text or not text.strip():
598
+ return ("Please enter ticket text", "", "", "", "", "", "", "", "")
599
+
600
+ result = process_ticket(text.strip())
601
+ conf_pct = int(result["confidence"] * 100)
602
+
603
+ if result["route"] == "HUMAN_REVIEW":
604
+ review_badge = "Human review required"
605
+ elif result["route"] == "AUTO_ROUTE_FLAGGED":
606
+ review_badge = "QA review required"
607
+ else:
608
+ review_badge = "No"
609
+
610
+ priority_map = {
611
+ "critical": "Critical",
612
+ "high": "High",
613
+ "medium": "Medium",
614
+ "low": "Low",
615
+ }
616
+ priority_display = priority_map.get(
617
+ result["priority"].lower(),
618
+ result["priority"],
619
+ )
620
+
621
+ route_map = {
622
+ "AUTO_ROUTE": "Auto-Routed",
623
+ "AUTO_ROUTE_FLAGGED": "Auto-Routed + Flagged",
624
+ "HUMAN_REVIEW": "Human Review Required",
625
+ }
626
+ route_display = route_map.get(result["route"], result["route"])
627
+ dept_display = result["department"].replace("_", " ")
628
+
629
+ return (
630
+ result["status"],
631
+ result["ticket_id"],
632
+ route_display,
633
+ dept_display,
634
+ priority_display,
635
+ f"{conf_pct}%",
636
+ result["tags"],
637
+ review_badge,
638
+ result["message"],
639
+ )
640
+
641
+
642
+ CSS = """
643
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
644
+
645
+ * { font-family: 'Inter', sans-serif !important; }
646
+
647
+ .gradio-container {
648
+ max-width: 960px !important;
649
+ margin: 0 auto !important;
650
+ }
651
+
652
+ .app-header {
653
+ text-align: center;
654
+ padding: 1.5rem 1rem;
655
+ background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 50%, #a855f7 100%);
656
+ border-radius: 16px;
657
+ margin-bottom: 1.5rem;
658
+ box-shadow: 0 8px 32px rgba(79, 70, 229, 0.3);
659
+ }
660
+ .app-header h1 {
661
+ color: white !important;
662
+ font-size: 1.75rem !important;
663
+ font-weight: 700 !important;
664
+ margin: 0 !important;
665
+ letter-spacing: -0.02em;
666
+ }
667
+ .app-header p {
668
+ color: rgba(255,255,255,0.85) !important;
669
+ font-size: 0.95rem !important;
670
+ margin: 0.4rem 0 0 0 !important;
671
+ }
672
+
673
+ .result-card {
674
+ background: linear-gradient(145deg, rgba(255,255,255,0.05), rgba(255,255,255,0.02));
675
+ border: 1px solid rgba(255,255,255,0.1);
676
+ border-radius: 12px;
677
+ padding: 0.25rem;
678
+ }
679
+
680
+ .status-box textarea, .status-box input {
681
+ font-weight: 600 !important;
682
+ font-size: 1rem !important;
683
+ }
684
+
685
+ .submit-btn {
686
+ background: linear-gradient(135deg, #4f46e5, #7c3aed) !important;
687
+ border: none !important;
688
+ color: white !important;
689
+ font-weight: 600 !important;
690
+ font-size: 1rem !important;
691
+ padding: 0.75rem 2rem !important;
692
+ border-radius: 10px !important;
693
+ box-shadow: 0 4px 16px rgba(79, 70, 229, 0.4) !important;
694
+ transition: all 0.3s ease !important;
695
+ }
696
+ .submit-btn:hover {
697
+ transform: translateY(-2px) !important;
698
+ box-shadow: 0 6px 24px rgba(79, 70, 229, 0.5) !important;
699
+ }
700
+
701
+ .clear-btn {
702
+ border: 1px solid rgba(255,255,255,0.2) !important;
703
+ border-radius: 10px !important;
704
+ font-weight: 500 !important;
705
+ }
706
+
707
+ .stats-row {
708
+ text-align: center;
709
+ padding: 0.75rem;
710
+ background: rgba(79, 70, 229, 0.08);
711
+ border-radius: 10px;
712
+ margin-top: 0.5rem;
713
+ font-size: 0.85rem;
714
+ color: #a5b4fc;
715
+ }
716
+
717
+ footer { display: none !important; }
718
+ """
719
+
720
+
721
+ EXAMPLES = [
722
+ [
723
+ "My laptop screen is flickering and sometimes goes completely black. "
724
+ "I've tried restarting but the issue persists after login."
725
+ ],
726
+ [
727
+ "I cannot access the company VPN from my home network. It keeps showing "
728
+ "authentication failed error even though my password is correct."
729
+ ],
730
+ [
731
+ "We need to upgrade our database server as the current one is running out "
732
+ "of storage space and response times have increased significantly."
733
+ ],
734
+ [
735
+ "I was charged twice for my last month's subscription. Please process a "
736
+ "refund for the duplicate charge."
737
+ ],
738
+ [
739
+ "The email server has been down since this morning. No one in the office "
740
+ "can send or receive emails. This is critical!"
741
+ ],
742
+ [
743
+ "Can you provide training materials for the new CRM software that was "
744
+ "deployed last week?"
745
+ ],
746
+ ]
747
+
748
+
749
+ with gr.Blocks(
750
+ css=CSS,
751
+ theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"),
752
+ title="Ticket Auto-Routing System",
753
+ ) as app:
754
+ gr.HTML(
755
+ """
756
+ <div class="app-header">
757
+ <h1>Intelligent Ticket Auto-Routing System</h1>
758
+ <p>AI-powered ticket classification, routing, priority prediction and duplicate detection</p>
759
+ </div>
760
+ """
761
+ )
762
+
763
+ with gr.Row():
764
+ with gr.Column(scale=1):
765
+ ticket_input = gr.Textbox(
766
+ label="Ticket Description",
767
+ placeholder="Describe the support issue in detail...",
768
+ lines=6,
769
+ max_lines=12,
770
+ )
771
+ with gr.Row():
772
+ submit_btn = gr.Button(
773
+ "Process Ticket",
774
+ variant="primary",
775
+ elem_classes=["submit-btn"],
776
+ )
777
+ clear_btn = gr.ClearButton(
778
+ value="Clear",
779
+ elem_classes=["clear-btn"],
780
+ )
781
+
782
+ gr.Examples(
783
+ examples=EXAMPLES,
784
+ inputs=ticket_input,
785
+ label="Try these examples",
786
+ )
787
+
788
+ with gr.Column(scale=1):
789
+ with gr.Group(elem_classes=["result-card"]):
790
+ dup_status = gr.Textbox(
791
+ label="Duplicate Status",
792
+ interactive=False,
793
+ elem_classes=["status-box"],
794
+ )
795
+ ticket_id = gr.Textbox(label="Ticket ID", interactive=False)
796
+
797
+ with gr.Group(elem_classes=["result-card"]):
798
+ with gr.Row():
799
+ route_mode = gr.Textbox(
800
+ label="Routing Mode",
801
+ interactive=False,
802
+ )
803
+ department = gr.Textbox(
804
+ label="Department",
805
+ interactive=False,
806
+ )
807
+ with gr.Row():
808
+ priority = gr.Textbox(label="Priority", interactive=False)
809
+ confidence = gr.Textbox(
810
+ label="Hybrid Confidence",
811
+ interactive=False,
812
+ )
813
+
814
+ with gr.Group(elem_classes=["result-card"]):
815
+ tags = gr.Textbox(label="Predicted Tags", interactive=False)
816
+ needs_review = gr.Textbox(label="Needs Review", interactive=False)
817
+ message = gr.Textbox(
818
+ label="Details",
819
+ interactive=False,
820
+ lines=2,
821
+ )
822
+
823
+ gr.HTML(
824
+ f"""
825
+ <div class="stats-row">
826
+ Database: <strong>{duplicate_engine.index_size:,}</strong> tickets indexed
827
+ &nbsp;|&nbsp;
828
+ <strong>{len(tag_list)}</strong> tag categories
829
+ &nbsp;|&nbsp;
830
+ <strong>{len(dept_prototypes)}</strong> departments
831
+ </div>
832
+ """
833
+ )
834
+
835
+ outputs = [
836
+ dup_status,
837
+ ticket_id,
838
+ route_mode,
839
+ department,
840
+ priority,
841
+ confidence,
842
+ tags,
843
+ needs_review,
844
+ message,
845
+ ]
846
+
847
+ submit_btn.click(fn=ui_process, inputs=ticket_input, outputs=outputs)
848
+ ticket_input.submit(fn=ui_process, inputs=ticket_input, outputs=outputs)
849
+ clear_btn.add([ticket_input] + outputs)
850
+
851
+
852
+ if __name__ == "__main__":
853
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.23.0
2
+ sentence-transformers==5.2.3
3
+ transformers==4.49.0
4
+ torch==2.6.0
5
+ faiss-cpu==1.13.2
6
+ scikit-learn==1.5.1
7
+ scipy==1.13.1
8
+ numpy==1.26.4
9
+ pandas==2.2.3
10
+ joblib==1.4.2
11
+ PyYAML==6.0.2
12
+ xgboost==3.2.0
13
+ lightgbm==4.6.0
14
+ huggingface_hub==0.16.4
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10.16