| """
|
| XGBoost Busy Detector - Hugging Face Inference Endpoint Handler
|
| Custom handler for HF Inference Endpoints.
|
|
|
| Loads XGBoost model, applies normalization, runs evidence accumulation scoring,
|
| and returns busy_score + confidence + recommendation.
|
|
|
| Derived from: src/normalization.py, src/scoring_engine.py, src/model.py
|
| """
|
|
|
| from typing import Dict, Any, Tuple
|
| import json
|
| import math
|
| import numpy as np
|
| import pickle
|
| from pathlib import Path
|
|
|
|
|
| class EndpointHandler:
|
| """HF Inference Endpoint handler for XGBoost busy detection."""
|
|
|
| def __init__(self, path: str = "."):
|
| model_dir = Path(path)
|
|
|
|
|
| model_path = None
|
| for candidate in [
|
| model_dir / "model.pkl",
|
| model_dir / "busy_detector_v1.pkl",
|
| model_dir / "busy_detector_5k.pkl",
|
| ]:
|
| if candidate.exists():
|
| model_path = candidate
|
| break
|
|
|
| if model_path is None:
|
| raise FileNotFoundError(
|
| f"No model file found in {model_dir}. "
|
| "Expected model.pkl, busy_detector_v1.pkl, or busy_detector_5k.pkl"
|
| )
|
|
|
| with open(model_path, "rb") as f:
|
| saved = pickle.load(f)
|
|
|
|
|
| if isinstance(saved, dict):
|
| self.model = saved.get("model") or saved.get("booster")
|
| self.feature_names = saved.get("feature_names")
|
| else:
|
| self.model = saved
|
| self.feature_names = None
|
|
|
| print(f"✓ XGBoost model loaded from {model_path}")
|
|
|
|
|
| ranges_path = model_dir / "feature_ranges.json"
|
| with open(ranges_path) as f:
|
| ranges_data = json.load(f)
|
|
|
| self.voice_ranges = ranges_data["voice_ranges"]
|
| self.text_ranges = ranges_data["text_ranges"]
|
| self.voice_order = ranges_data["voice_feature_order"]
|
| self.text_order = ranges_data["text_feature_order"]
|
|
|
|
|
| rules_path = model_dir / "scoring_rules.json"
|
| with open(rules_path) as f:
|
| self.scoring = json.load(f)
|
|
|
| self.weights = self.scoring["weights"]
|
| self.thresholds = self.scoring["thresholds"]
|
| print("✓ Feature ranges and scoring rules loaded")
|
|
|
|
|
|
|
|
|
|
|
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
| """
|
| Entrypoint for HF Inference Endpoints.
|
|
|
| Expected input (JSON):
|
| {
|
| "inputs": {
|
| "audio_features": { "v1_snr": 15.0, ... },
|
| "text_features": { "t1_explicit_busy": 0.8, ... }
|
| }
|
| }
|
|
|
| Returns:
|
| {
|
| "busy_score": 0.72,
|
| "confidence": 0.85,
|
| "recommendation": "EXIT",
|
| "ml_probability": 0.65,
|
| "evidence_details": [...]
|
| }
|
| """
|
|
|
| inputs = data.get("inputs", data)
|
| audio_features = inputs.get("audio_features", {})
|
| text_features = inputs.get("text_features", {})
|
|
|
|
|
| normalized = self._normalize_features(audio_features, text_features)
|
|
|
|
|
| import xgboost as xgb
|
|
|
| dmatrix = xgb.DMatrix(normalized.reshape(1, -1))
|
| ml_prob = float(self.model.predict(dmatrix)[0])
|
|
|
|
|
| final_score, confidence, details = self._score_with_evidence(
|
| ml_prob, audio_features, text_features
|
| )
|
|
|
|
|
| recommendation = self._get_recommendation(final_score)
|
|
|
| return {
|
| "busy_score": round(final_score, 4),
|
| "confidence": round(confidence, 4),
|
| "recommendation": recommendation,
|
| "ml_probability": round(ml_prob, 4),
|
| "evidence_details": details,
|
| }
|
|
|
|
|
|
|
|
|
|
|
| def _normalize_value(self, value: float, min_val: float, max_val: float) -> float:
|
| if max_val == min_val:
|
| return 0.0
|
| value = max(min_val, min(max_val, value))
|
| return (value - min_val) / (max_val - min_val)
|
|
|
| def _normalize_features(
|
| self,
|
| audio_features: Dict[str, float],
|
| text_features: Dict[str, float],
|
| ) -> np.ndarray:
|
| """Min-max normalize all 26 features and concatenate."""
|
| voice_norm = []
|
| for feat in self.voice_order:
|
| val = audio_features.get(feat, 0.0)
|
| lo, hi = self.voice_ranges[feat]
|
| voice_norm.append(self._normalize_value(val, lo, hi))
|
|
|
| text_norm = []
|
| for feat in self.text_order:
|
| val = text_features.get(feat, 0.0)
|
| lo, hi = self.text_ranges[feat]
|
| text_norm.append(self._normalize_value(val, lo, hi))
|
|
|
| return np.array(voice_norm + text_norm, dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
|
| @staticmethod
|
| def _sigmoid(x: float) -> float:
|
| return 1.0 / (1.0 + math.exp(-x))
|
|
|
| @staticmethod
|
| def _logit(p: float) -> float:
|
| p = max(0.01, min(0.99, p))
|
| return math.log(p / (1.0 - p))
|
|
|
| def _score_with_evidence(
|
| self,
|
| ml_prob: float,
|
| audio_features: Dict[str, float],
|
| text_features: Dict[str, float],
|
| ) -> Tuple[float, float, list]:
|
| """Evidence accumulation scoring exactly matching ScoringEngine.calculate_score."""
|
| evidence = 0.0
|
| details = []
|
|
|
|
|
| explicit = text_features.get("t1_explicit_busy", 0.0)
|
| if explicit > 0.5:
|
| pts = self.weights["explicit_busy"] * explicit
|
| evidence += pts
|
| details.append(f"Explicit Intent (+{pts:.1f})")
|
|
|
| explicit_free = text_features.get("t0_explicit_free", 0.0)
|
| if explicit_free > 0.5:
|
| pts = self.weights["explicit_free"] * explicit_free
|
| evidence += pts
|
| details.append(f"Explicit Free ({pts:.1f})")
|
|
|
| short_ratio = text_features.get("t3_short_ratio", 0.0)
|
| if short_ratio > 0.3:
|
| pts = self.weights["short_answers"] * short_ratio
|
| evidence += pts
|
| details.append(f"Brief Responses (+{pts:.1f})")
|
|
|
| deflection = text_features.get("t6_deflection", 0.0)
|
| if deflection > 0.1:
|
| pts = self.weights["deflection"] * deflection
|
| evidence += pts
|
| details.append(f"Deflection (+{pts:.1f})")
|
|
|
|
|
| traffic = audio_features.get("v2_noise_traffic", 0.0)
|
| if traffic > 0.5:
|
| pts = self.weights["traffic_noise"] * traffic
|
| evidence += pts
|
| details.append(f"Traffic Context (+{pts:.1f})")
|
|
|
| rate = audio_features.get("v3_speech_rate", 0.0)
|
| if rate > 3.5:
|
| pts = self.weights["rushed_speech"]
|
| evidence += pts
|
| details.append(f"Rushed Speech (+{pts:.1f})")
|
|
|
| pitch_std = audio_features.get("v5_pitch_std", 0.0)
|
| if pitch_std > 80.0:
|
| evidence += 0.5
|
| details.append("Voice Stress (+0.5)")
|
|
|
| emotion_stress = audio_features.get("v11_emotion_stress", 0.0)
|
| if emotion_stress > 0.6:
|
| pts = self.weights["emotion_stress"] * emotion_stress
|
| evidence += pts
|
| details.append(f"Emotional Stress (+{pts:.1f})")
|
|
|
| emotion_energy = audio_features.get("v12_emotion_energy", 0.0)
|
| if emotion_energy > 0.7:
|
| pts = self.weights["emotion_energy"] * emotion_energy
|
| evidence += pts
|
| details.append(f"High Energy (+{pts:.1f})")
|
|
|
|
|
| ml_evidence = self._logit(ml_prob) * self.weights["ml_model_factor"]
|
| evidence += ml_evidence
|
| details.append(f"ML Baseline ({ml_evidence:+.1f})")
|
|
|
|
|
| final_score = self._sigmoid(evidence)
|
| confidence = float(math.tanh(abs(evidence) / 2.0))
|
|
|
| return final_score, confidence, details
|
|
|
| def _get_recommendation(self, score: float) -> str:
|
| if score < self.thresholds["continue"]:
|
| return "CONTINUE"
|
| elif score < self.thresholds["check_in"]:
|
| return "CHECK_IN"
|
| else:
|
| return "EXIT"
|
|
|