busy-module-xgboost / normalization.py
EurekaPotato's picture
Upload folder using huggingface_hub
634310a verified
"""
Feature Normalizer
Min-max normalization for voice and text features based on expected ranges.
"""
import numpy as np
from typing import Dict
class FeatureNormalizer:
"""Normalize features to [0, 1] range using min-max scaling"""
# Expected ranges for voice features (from build spec)
VOICE_RANGES = {
'v1_snr': (-10, 40), # SNR in dB
'v2_noise_traffic': (0, 1), # Already normalized
'v2_noise_office': (0, 1),
'v2_noise_crowd': (0, 1),
'v2_noise_wind': (0, 1),
'v2_noise_clean': (0, 1),
'v3_speech_rate': (0, 5), # Words per second
'v4_pitch_mean': (75, 400), # Hz
'v5_pitch_std': (0, 100), # Hz
'v6_energy_mean': (0, 0.5), # RMS energy
'v7_energy_std': (0, 0.2),
'v8_pause_ratio': (0, 1), # Ratio
'v9_avg_pause_dur': (0, 3), # Seconds
'v10_mid_pause_cnt': (0, 20), # Count
'v11_emotion_stress': (0, 1),
'v12_emotion_energy': (0, 1),
'v13_emotion_valence': (0, 1),
}
# Expected ranges for text features (from build spec)
TEXT_RANGES = {
't1_explicit_busy': (0, 1), # Binary
't2_avg_resp_len': (0, 30), # Words
't3_short_ratio': (0, 1), # Ratio
't4_cognitive_load': (0, 0.3), # Ratio
't5_time_pressure': (0, 0.2), # Ratio
't6_deflection': (0, 0.2), # Ratio
't7_sentiment': (-1, 1), # Polarity
't8_coherence': (0, 1), # Score
't9_latency': (0, 10), # Seconds
}
def normalize_value(self, value: float, min_val: float, max_val: float) -> float:
"""
Min-max normalization to [0, 1]
Clips values outside expected range
"""
# Clip to range
value = max(min_val, min(max_val, value))
# Normalize
if max_val == min_val:
return 0.0
normalized = (value - min_val) / (max_val - min_val)
return float(normalized)
def normalize_voice(self, features: Dict[str, float]) -> np.ndarray:
"""
Normalize voice features to [0, 1]
Now handles 17 features (14 original + 3 emotion)
"""
normalized = []
feature_names = [
'v1_snr',
'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd',
'v2_noise_wind', 'v2_noise_clean',
'v3_speech_rate',
'v4_pitch_mean', 'v5_pitch_std',
'v6_energy_mean', 'v7_energy_std',
'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt',
# Paper 1: Add emotion features
'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence'
]
for feature_name in feature_names:
value = features.get(feature_name, 0.0)
min_val, max_val = self.VOICE_RANGES[feature_name]
normalized_val = self.normalize_value(value, min_val, max_val)
normalized.append(normalized_val)
return np.array(normalized, dtype=np.float32)
def normalize_text(self, features: Dict[str, float]) -> np.ndarray:
"""
Normalize text features to [0, 1]
Args:
features: Dict with 9 text feature keys
Returns:
np.ndarray of shape (9,) with normalized values
"""
normalized = []
for feature_name in [
't1_explicit_busy',
't2_avg_resp_len', 't3_short_ratio',
't4_cognitive_load', 't5_time_pressure', 't6_deflection',
't7_sentiment', 't8_coherence', 't9_latency'
]:
value = features.get(feature_name, 0.0)
min_val, max_val = self.TEXT_RANGES[feature_name]
normalized_val = self.normalize_value(value, min_val, max_val)
normalized.append(normalized_val)
return np.array(normalized, dtype=np.float32)
def normalize_all(
self,
voice_features: Dict[str, float],
text_features: Dict[str, float]
) -> np.ndarray:
"""
Normalize all 26 features (17 voice + 9 text) and concatenate
Returns:
np.ndarray of shape (26,) with all normalized features
"""
voice_norm = self.normalize_voice(voice_features)
text_norm = self.normalize_text(text_features)
return np.concatenate([voice_norm, text_norm])
def denormalize_voice(self, normalized: np.ndarray) -> Dict[str, float]:
"""
Convert normalized values back to original scale (for interpretability)
"""
feature_names = [
'v1_snr',
'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd',
'v2_noise_wind', 'v2_noise_clean',
'v3_speech_rate',
'v4_pitch_mean', 'v5_pitch_std',
'v6_energy_mean', 'v7_energy_std',
'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt',
'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence'
]
denormalized = {}
for i, name in enumerate(feature_names):
min_val, max_val = self.VOICE_RANGES[name]
value = normalized[i] * (max_val - min_val) + min_val
denormalized[name] = float(value)
return denormalized
def denormalize_text(self, normalized: np.ndarray) -> Dict[str, float]:
"""
Convert normalized values back to original scale (for interpretability)
"""
feature_names = [
't1_explicit_busy',
't2_avg_resp_len', 't3_short_ratio',
't4_cognitive_load', 't5_time_pressure', 't6_deflection',
't7_sentiment', 't8_coherence', 't9_latency'
]
denormalized = {}
for i, name in enumerate(feature_names):
min_val, max_val = self.TEXT_RANGES[name]
value = normalized[i] * (max_val - min_val) + min_val
denormalized[name] = float(value)
return denormalized
if __name__ == "__main__":
# Test normalizer
normalizer = FeatureNormalizer()
# Test voice features
test_voice = {
'v1_snr': 15.0,
'v2_noise_traffic': 0.8,
'v2_noise_office': 0.1,
'v2_noise_crowd': 0.05,
'v2_noise_wind': 0.05,
'v2_noise_clean': 0.0,
'v3_speech_rate': 3.5,
'v4_pitch_mean': 150.0,
'v5_pitch_std': 25.0,
'v6_energy_mean': 0.1,
'v7_energy_std': 0.05,
'v8_pause_ratio': 0.3,
'v9_avg_pause_dur': 0.8,
'v10_mid_pause_cnt': 5.0
}
normalized = normalizer.normalize_voice(test_voice)
print("Voice features normalized:")
print(f"Shape: {normalized.shape}")
print(f"Range: [{normalized.min():.3f}, {normalized.max():.3f}]")