Spaces:

Asmitha-28
/

SupportMind

Running

App Files Files Community

SupportMind / src /ensemble_router.py

Asmitha-28

Upload src/ensemble_router.py with huggingface_hub

dd61cbf verified 1 day ago

raw

history blame contribute delete

16.6 kB

	# src/ensemble_router.py
	# SupportMind — Ensemble Confidence-Gated Router
	# Combines DistilBERT (MC Dropout) + TF-IDF Logistic Regression
	# for best-in-class accuracy on ticket routing.
	#
	# Strategy: weighted soft-voting on probability distributions
	# final_probs = w_bert * bert_probs + w_sklearn * sklearn_probs
	#
	# Why this beats either model alone:
	# - DistilBERT: captures semantic meaning, handles paraphrases
	# - TF-IDF+LR : captures keyword/n-gram signals, very confident on clear cases
	# - Ensemble : DistilBERT corrects LR on ambiguous tickets,
	# LR corrects BERT on keyword-heavy ones

	import os
	import gc
	import pickle
	import logging
	import numpy as np
	from typing import Dict, Optional

	logger = logging.getLogger(__name__)

	# ── Category map ────────────────────────────────────────────────────────────
	CATEGORY_MAP = {
	0: 'billing',
	1: 'technical_support',
	2: 'account_management',
	3: 'feature_request',
	4: 'compliance_legal',
	5: 'onboarding',
	6: 'general_inquiry',
	7: 'churn_risk',
	}
	CATEGORY_REVERSE = {v: k for k, v in CATEGORY_MAP.items()}

	# ── Routing thresholds ───────────────────────────────────────────────────────
	ROUTE_THRESHOLD = 0.82 # ensemble conf >= this → auto-route
	CLARIFY_THRESHOLD = 0.58 # ensemble conf >= this → ask 1 question
	ENTROPY_MAX = 0.32 # ensemble entropy <= this → low ambiguity
	MC_PASSES = 10 # MC Dropout stochastic passes (sequential for memory)

	# ── Ensemble weights ─────────────────────────────────────────────────────────
	# BERT weight is higher because it generalises better to unseen phrasing.
	# These are tunable — increase SKLEARN_W if LR is more accurate on your data.
	# BERT weight is significantly higher because DeBERTa-v3 is extremely robust.
	BERT_W = 0.75
	SKLEARN_W = 0.25


	class EnsembleRouter:
	"""
	Ensemble Confidence-Gated Router.

	Combines:
	1. DistilBERT fine-tuned on support tickets (MC Dropout for uncertainty)
	2. TF-IDF + Calibrated Logistic Regression baseline

	Falls back to sklearn-only if DistilBERT model weights are absent.
	Drop-in replacement for ConfidenceGatedRouter — same .route() interface.
	"""

	def __init__(self, model_dir: Optional[str] = None, device: str = 'cpu'):
	base = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	ultimate_path = os.path.join(base, 'models', 'deberta_ultimate')
	standard_path = os.path.join(base, 'models', 'ticket_classifier')

	if model_dir is None:
	if os.path.exists(os.path.join(ultimate_path, 'config.json')):
	self.model_dir = ultimate_path
	else:
	self.model_dir = standard_path
	else:
	self.model_dir = model_dir

	self._bert_router = None
	self._sklearn_pipe = None
	self._bert_available = False

	# IMPORTANT: Load BERT first and do a warmup pass.
	# On Windows, unpickling sklearn before PyTorch's first forward pass
	# causes a segfault in torch.distributed/optree DLLs.
	self._load_bert(device)
	if self._bert_available:
	self._warmup_bert()
	self._load_sklearn()

	try:
	from historical_memory import HistoricalMemoryLayer
	self._memory_layer = HistoricalMemoryLayer()
	except Exception as e:
	logger.warning(f"[EnsembleRouter] Could not load Historical Memory Layer: {e}")
	self._memory_layer = None

	logger.info(
	f"[EnsembleRouter] BERT={'ON' if self._bert_available else 'OFF (fallback)'} \| "
	f"sklearn=ON \| weights=({BERT_W}/{SKLEARN_W}) \| memory={'ON' if getattr(self, '_memory_layer', None) and self._memory_layer.is_ready else 'OFF'}"
	)

	def _warmup_bert(self):
	"""Perform a warmup forward pass to initialize PyTorch/CUDA state."""
	try:
	self._bert_router.mc_predict("warmup", n_passes=1)
	logger.info("[EnsembleRouter] BERT warmup complete.")
	except Exception as e:
	logger.warning(f"[EnsembleRouter] BERT warmup failed: {e}")

	# ── Model loaders ────────────────────────────────────────────────────────

	def _load_sklearn(self):
	# Check model_dir first, then fall back to ticket_classifier
	pkl = os.path.join(self.model_dir, 'sklearn_router.pkl')
	if not os.path.exists(pkl):
	base = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	pkl = os.path.join(base, 'models', 'ticket_classifier', 'sklearn_router.pkl')
	if not os.path.exists(pkl):
	raise FileNotFoundError(
	f"sklearn_router.pkl not found. "
	"Run: python train_baseline.py"
	)
	with open(pkl, 'rb') as f:
	self._sklearn_pipe = pickle.load(f)
	logger.info(f"[EnsembleRouter] sklearn pipeline loaded from {pkl}.")

	def _load_bert(self, device: str):
	"""Load fine-tuned DistilBERT. Skips gracefully if weights not saved yet."""
	import json, traceback as tb
	model_bin = os.path.join(self.model_dir, 'pytorch_model.bin')
	model_safe = os.path.join(self.model_dir, 'model.safetensors')
	config = os.path.join(self.model_dir, 'config.json')

	bert_ready = os.path.exists(config) and (
	os.path.exists(model_bin) or os.path.exists(model_safe)
	)

	if not bert_ready:
	logger.warning(
	"[EnsembleRouter] DistilBERT weights not found — running sklearn-only."
	)
	return

	# Check for stale baseline stub (only present before first real training run)
	try:
	with open(config) as f:
	cfg = json.load(f)
	if cfg.get('model_type') == 'baseline_sklearn':
	logger.warning("[EnsembleRouter] config.json is baseline stub — skipping BERT.")
	return
	except Exception:
	pass

	try:
	from confidence_router import ConfidenceGatedRouter
	self._bert_router = ConfidenceGatedRouter(self.model_dir, device=device)
	self._bert_available = True
	gc.collect()
	logger.info(f"[EnsembleRouter] {self._bert_router.model.config.model_type.upper()} loaded successfully.")
	except (Exception, OSError) as e:
	logger.error(f"[EnsembleRouter] BERT load failed (likely memory constraint): {e}")
	# Ensure we don't leave a half-initialized router
	self._bert_router = None
	self._bert_available = False
	gc.collect()

	# ── Prediction ───────────────────────────────────────────────────────────

	def _sklearn_probs(self, text: str) -> np.ndarray:
	"""Return calibrated probability distribution from sklearn pipeline."""
	return self._sklearn_pipe.predict_proba([text])[0] # shape [8]

	def _bert_probs(self, text: str) -> np.ndarray:
	"""Return MC-Dropout probability distribution from DistilBERT."""
	_, _, _, mean_p, _ = self._bert_router.mc_predict(text, n_passes=MC_PASSES)
	return mean_p # shape [8]

	def _blend(self, text: str):
	"""
	Compute blended probability distribution.
	Returns: (blended_probs, bert_probs_or_None, sklearn_probs, bert_std_or_None)
	"""
	sk_probs = self._sklearn_probs(text)

	if self._bert_available:
	_, _, _, bert_mean, bert_std = self._bert_router.mc_predict(text, MC_PASSES)
	blended = BERT_W * bert_mean + SKLEARN_W * sk_probs
	# Re-normalise (floating point can drift slightly)
	blended = blended / blended.sum()
	return blended, bert_mean, sk_probs, bert_std
	else:
	return sk_probs, None, sk_probs, np.zeros(8)

	# ── Public API ───────────────────────────────────────────────────────────

	def route(self, ticket_text: str, n_passes: int = MC_PASSES) -> Dict:
	"""
	Route a ticket through the ensemble confidence gate.
	Returns the same dict schema as ConfidenceGatedRouter.route()
	so it is a drop-in replacement in api.py.
	"""
	blended, bert_p, sk_p, bert_std = self._blend(ticket_text)

	confidence = float(blended.max())
	entropy = float(-np.sum(blended * np.log(blended + 1e-9)))
	pred_class = int(blended.argmax())
	category = CATEGORY_MAP[pred_class]

	# Build ranking
	ranking = sorted(
	[(CATEGORY_MAP[i], round(float(blended[i]), 4)) for i in range(8)],
	key=lambda x: x[1], reverse=True
	)
	top_two = [ranking[0][0], ranking[1][0]]

	base = {
	'confidence': round(confidence, 4),
	'entropy': round(entropy, 4),
	'top_category': category,
	'all_probs': {CATEGORY_MAP[i]: round(float(blended[i]), 4) for i in range(8)},
	'std_probs': {CATEGORY_MAP[i]: round(float(bert_std[i]), 4) for i in range(8)},
	'category_ranking': ranking,
	'top_two_classes': top_two,
	'mc_passes': n_passes,
	# Extra ensemble diagnostics
	'ensemble': {
	'bert_available': self._bert_available,
	'bert_top': CATEGORY_MAP[int(bert_p.argmax())] if bert_p is not None else None,
	'sklearn_top': CATEGORY_MAP[int(sk_p.argmax())],
	'bert_weight': BERT_W if self._bert_available else 0.0,
	'sklearn_weight': SKLEARN_W if self._bert_available else 1.0,
	'agreement': (
	CATEGORY_MAP[int(bert_p.argmax())] == CATEGORY_MAP[int(sk_p.argmax())]
	if bert_p is not None else True
	),
	}
	}

	top1_score = ranking[0][1]
	top2_score = ranking[1][1]
	margin = top1_score - top2_score

	hist_boost = 0.0
	if getattr(self, '_memory_layer', None) and self._memory_layer.is_ready:
	hist_boost = self._memory_layer.compute_historical_boost(ticket_text, category)
	base['historical_boost'] = hist_boost

	base['margin'] = round(margin, 4)
	base['confidence'] = round(confidence, 4)

	critical_labels = ['compliance_legal', 'account_management']

	effective_conf = confidence + hist_boost

	if category in critical_labels:
	if effective_conf >= 0.90 and margin >= 0.35 and entropy < 0.60:
	action = 'route'
	reason = f'• Safe to auto-route sensitive intent<br>• Confidence: {confidence:.2%}<br>• Margin: {margin:.2f}'
	if hist_boost > 0: reason += f'<br>• <span style="color:var(--green)">Historical Match Boost: +{hist_boost:.2%}</span>'
	else:
	action = 'escalate'
	reason = f'• Escalated sensitive intent ({category})<br>• Strict confidence/margin threshold not met'
	if hist_boost > 0: reason += f'<br>• <span style="color:var(--green)">Historical Match Boost: +{hist_boost:.2%}</span> (Insufficient)'
	elif category == 'technical_support':
	# Category-specific check for technical support to catch billing misroutes
	billing_keywords = ['invoice', 'billing', 'charge', 'refund', 'payment', 'subscription', 'plan']
	has_billing_kw = any(kw in ticket_text.lower() for kw in billing_keywords)

	if has_billing_kw and 'billing' in [r[0] for r in ranking[:3]]:
	action = 'clarify'
	reason = f'• Billing overlap detected<br>• Clarification needed between technical_support and billing'
	elif effective_conf >= 0.88 and margin >= 0.30 and entropy < 0.65:
	# Stricter thresholds for technical_support
	action = 'route'
	reason = f'• Strong dominant intent<br>• Confidence: {confidence:.2%}<br>• Margin: {margin:.2f}<br>• Safe to auto-route'
	if hist_boost > 0: reason += f'<br>• <span style="color:var(--green)">Historical Match Boost: +{hist_boost:.2%}</span>'
	elif effective_conf >= 0.60 and entropy < 1.05:
	action = 'clarify'
	reason = f'• Medium ambiguity detected<br>• Clarification needed between {top_two[0]} and {top_two[1]}<br>• Margin: {margin:.2f}'
	if hist_boost > 0: reason += f'<br>• <span style="color:var(--green)">Historical Match Boost: +{hist_boost:.2%}</span> (Insufficient for auto-route)'
	else:
	action = 'escalate'
	reason = f'• High ambiguity / Low confidence ({confidence:.2%})<br>• Multiple overlapping intents detected<br>• Human triage needed'
	else:
	if effective_conf >= 0.85 and margin >= 0.25 and entropy < 0.70:
	action = 'route'
	reason = f'• Strong dominant intent<br>• Confidence: {confidence:.2%}<br>• Margin: {margin:.2f}<br>• Safe to auto-route'
	if hist_boost > 0: reason += f'<br>• <span style="color:var(--green)">Historical Match Boost: +{hist_boost:.2%}</span>'
	elif effective_conf >= 0.60 and entropy < 1.05:
	action = 'clarify'
	reason = f'• Medium ambiguity detected<br>• Clarification needed between {top_two[0]} and {top_two[1]}<br>• Margin: {margin:.2f}'
	if hist_boost > 0: reason += f'<br>• <span style="color:var(--green)">Historical Match Boost: +{hist_boost:.2%}</span> (Insufficient for auto-route)'
	else:
	action = 'escalate'
	reason = f'• High ambiguity / Low confidence ({confidence:.2%})<br>• Multiple overlapping intents detected<br>• Human triage needed'

	return {**base, 'action': action, 'queue': category if action == 'route' else None, 'reason': reason}

	def batch_route(self, tickets: list, n_passes: int = MC_PASSES) -> list:
	return [self.route(t, n_passes) for t in tickets]

	# Property to expose model/tokenizer for the SHAP explainer in api.py
	@property
	def model(self):
	if self._bert_available:
	return self._bert_router.model
	return None

	@property
	def tokenizer(self):
	if self._bert_available:
	return self._bert_router.tokenizer
	return None


	if __name__ == '__main__':
	logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')

	router = EnsembleRouter()

	tests = [
	"My invoice from last month is incorrect, please fix the billing.",
	"The API keeps returning 500 errors since last Tuesday's update.",
	"I want to cancel — this tool has been broken for weeks.",
	"How do I add another user to our account?",
	"We need GDPR data processing agreements for our EU customers.",
	"Not happy at all, considering switching to a competitor.",
	"Can you add a dark mode to the dashboard?",
	"Just signed up — how do I import my existing data?",
	# Tricky ambiguous cases
	"Invoice is wrong AND the app keeps crashing.",
	"Not happy with service",
	]

	print(f"\n{'='*90}")
	print(f" SupportMind Ensemble Router — BERT={'ON' if router._bert_available else 'OFF (sklearn only)'}")
	print(f"{'='*90}\n")

	for ticket in tests:
	r = router.route(ticket)
	agree = 'AGREE' if r['ensemble']['agreement'] else 'DISAGREE'
	print(
	f"[{r['action'].upper():8s}] [{r['confidence']:.2%}] "
	f"{'H' if r['entropy'] < ENTROPY_MAX else 'L'}-certainty \| "
	f"{r['top_category']:20s} \| "
	f"Models: {agree} \| {ticket[:60]}"
	)