|
""" |
|
Uncertainty Quantification Module for LLMs |
|
|
|
This module implements various uncertainty quantification methods for large language models, |
|
including softmax confidence, Monte Carlo dropout, ensemble disagreement, and calibration metrics. |
|
""" |
|
|
|
import numpy as np |
|
import torch |
|
from typing import List, Dict, Any, Union, Optional |
|
from scipy.special import softmax |
|
import torch.nn.functional as F |
|
|
|
class UncertaintyQuantifier: |
|
"""Base class for uncertainty quantification methods.""" |
|
|
|
def __init__(self, name: str): |
|
""" |
|
Initialize the uncertainty quantifier. |
|
|
|
Args: |
|
name: Name of the uncertainty quantification method |
|
""" |
|
self.name = name |
|
|
|
def quantify(self, model_outputs: Dict[str, Any]) -> Dict[str, float]: |
|
""" |
|
Quantify uncertainty in model outputs. |
|
|
|
Args: |
|
model_outputs: Outputs from the LLM interface |
|
|
|
Returns: |
|
Dictionary of uncertainty metrics |
|
""" |
|
raise NotImplementedError("Subclasses must implement this method") |
|
|
|
|
|
class SoftmaxConfidence(UncertaintyQuantifier): |
|
"""Uncertainty quantification based on softmax confidence scores.""" |
|
|
|
def __init__(self): |
|
"""Initialize the softmax confidence quantifier.""" |
|
super().__init__("softmax_confidence") |
|
|
|
def quantify(self, model_outputs: Dict[str, Any]) -> Dict[str, float]: |
|
""" |
|
Quantify uncertainty using softmax confidence scores. |
|
|
|
Args: |
|
model_outputs: Outputs from the LLM interface, must include logits |
|
|
|
Returns: |
|
Dictionary of uncertainty metrics: |
|
- mean_confidence: Average confidence across tokens |
|
- min_confidence: Minimum confidence across tokens |
|
- entropy: Average entropy of token distributions |
|
""" |
|
if "logits" not in model_outputs: |
|
raise ValueError("Model outputs must include logits for softmax confidence") |
|
|
|
logits = model_outputs["logits"][0] |
|
|
|
|
|
confidences = [] |
|
entropies = [] |
|
|
|
for token_logits in logits: |
|
probs = softmax(token_logits, axis=-1) |
|
max_prob = np.max(probs) |
|
confidences.append(max_prob) |
|
|
|
|
|
entropy = -np.sum(probs * np.log(probs + 1e-10)) |
|
entropies.append(entropy) |
|
|
|
return { |
|
"mean_confidence": float(np.mean(confidences)), |
|
"min_confidence": float(np.min(confidences)), |
|
"entropy": float(np.mean(entropies)) |
|
} |
|
|
|
|
|
class MonteCarloDropout(UncertaintyQuantifier): |
|
"""Uncertainty quantification based on Monte Carlo dropout sampling.""" |
|
|
|
def __init__(self): |
|
"""Initialize the Monte Carlo dropout quantifier.""" |
|
super().__init__("mc_dropout") |
|
|
|
def quantify(self, model_outputs: Dict[str, Any]) -> Dict[str, float]: |
|
""" |
|
Quantify uncertainty using Monte Carlo dropout sampling. |
|
|
|
Args: |
|
model_outputs: Outputs from the LLM interface, must include multiple samples |
|
|
|
Returns: |
|
Dictionary of uncertainty metrics: |
|
- sample_variance: Variance across different samples |
|
- sample_diversity: Lexical diversity across samples |
|
""" |
|
if "samples" not in model_outputs or len(model_outputs["samples"]) <= 1: |
|
raise ValueError("Model outputs must include multiple samples for MC dropout") |
|
|
|
samples = model_outputs["samples"] |
|
|
|
|
|
from nltk.tokenize import word_tokenize |
|
try: |
|
tokenized_samples = [set(word_tokenize(sample.lower())) for sample in samples] |
|
except: |
|
|
|
tokenized_samples = [set(sample.lower().split()) for sample in samples] |
|
|
|
|
|
similarities = [] |
|
for i in range(len(tokenized_samples)): |
|
for j in range(i+1, len(tokenized_samples)): |
|
intersection = len(tokenized_samples[i].intersection(tokenized_samples[j])) |
|
union = len(tokenized_samples[i].union(tokenized_samples[j])) |
|
if union > 0: |
|
similarities.append(intersection / union) |
|
else: |
|
similarities.append(1.0) |
|
|
|
|
|
diversity = 1.0 - np.mean(similarities) if similarities else 0.0 |
|
|
|
|
|
sample_lengths = [len(sample) for sample in samples] |
|
length_variance = np.var(sample_lengths) if len(sample_lengths) > 1 else 0.0 |
|
|
|
return { |
|
"sample_diversity": float(diversity), |
|
"length_variance": float(length_variance), |
|
"num_samples": len(samples) |
|
} |
|
|
|
|
|
class EnsembleDisagreement(UncertaintyQuantifier): |
|
"""Uncertainty quantification based on ensemble disagreement.""" |
|
|
|
def __init__(self): |
|
"""Initialize the ensemble disagreement quantifier.""" |
|
super().__init__("ensemble_disagreement") |
|
|
|
def quantify(self, ensemble_outputs: List[Dict[str, Any]]) -> Dict[str, float]: |
|
""" |
|
Quantify uncertainty using ensemble disagreement. |
|
|
|
Args: |
|
ensemble_outputs: List of outputs from different models |
|
|
|
Returns: |
|
Dictionary of uncertainty metrics: |
|
- response_diversity: Lexical diversity across model responses |
|
- confidence_variance: Variance in confidence scores across models |
|
""" |
|
if not ensemble_outputs or len(ensemble_outputs) <= 1: |
|
raise ValueError("Ensemble outputs must include results from multiple models") |
|
|
|
|
|
responses = [output["response"] for output in ensemble_outputs] |
|
|
|
|
|
from nltk.tokenize import word_tokenize |
|
try: |
|
tokenized_responses = [set(word_tokenize(response.lower())) for response in responses] |
|
except: |
|
|
|
tokenized_responses = [set(response.lower().split()) for response in responses] |
|
|
|
|
|
similarities = [] |
|
for i in range(len(tokenized_responses)): |
|
for j in range(i+1, len(tokenized_responses)): |
|
intersection = len(tokenized_responses[i].intersection(tokenized_responses[j])) |
|
union = len(tokenized_responses[i].union(tokenized_responses[j])) |
|
if union > 0: |
|
similarities.append(intersection / union) |
|
else: |
|
similarities.append(1.0) |
|
|
|
|
|
diversity = 1.0 - np.mean(similarities) if similarities else 0.0 |
|
|
|
|
|
confidences = [] |
|
for output in ensemble_outputs: |
|
if "mean_confidence" in output: |
|
confidences.append(output["mean_confidence"]) |
|
|
|
|
|
confidence_variance = np.var(confidences) if len(confidences) > 1 else 0.0 |
|
|
|
return { |
|
"response_diversity": float(diversity), |
|
"confidence_variance": float(confidence_variance), |
|
"num_models": len(ensemble_outputs) |
|
} |
|
|
|
|
|
class CalibrationMetrics(UncertaintyQuantifier): |
|
"""Uncertainty quantification based on calibration metrics.""" |
|
|
|
def __init__(self): |
|
"""Initialize the calibration metrics quantifier.""" |
|
super().__init__("calibration_metrics") |
|
|
|
def expected_calibration_error( |
|
self, |
|
confidences: List[float], |
|
accuracies: List[bool], |
|
num_bins: int = 10 |
|
) -> float: |
|
""" |
|
Calculate Expected Calibration Error (ECE). |
|
|
|
Args: |
|
confidences: List of confidence scores |
|
accuracies: List of boolean accuracy indicators |
|
num_bins: Number of bins for binning confidences |
|
|
|
Returns: |
|
Expected Calibration Error |
|
""" |
|
if len(confidences) != len(accuracies): |
|
raise ValueError("Confidences and accuracies must have the same length") |
|
|
|
if not confidences: |
|
return 0.0 |
|
|
|
|
|
bin_indices = np.digitize(confidences, np.linspace(0, 1, num_bins)) |
|
ece = 0.0 |
|
|
|
for bin_idx in range(1, num_bins + 1): |
|
bin_mask = (bin_indices == bin_idx) |
|
if np.any(bin_mask): |
|
bin_confidences = np.array(confidences)[bin_mask] |
|
bin_accuracies = np.array(accuracies)[bin_mask] |
|
bin_confidence = np.mean(bin_confidences) |
|
bin_accuracy = np.mean(bin_accuracies) |
|
bin_size = np.sum(bin_mask) |
|
|
|
|
|
ece += (bin_size / len(confidences)) * np.abs(bin_confidence - bin_accuracy) |
|
|
|
return float(ece) |
|
|
|
def maximum_calibration_error( |
|
self, |
|
confidences: List[float], |
|
accuracies: List[bool], |
|
num_bins: int = 10 |
|
) -> float: |
|
""" |
|
Calculate Maximum Calibration Error (MCE). |
|
|
|
Args: |
|
confidences: List of confidence scores |
|
accuracies: List of boolean accuracy indicators |
|
num_bins: Number of bins for binning confidences |
|
|
|
Returns: |
|
Maximum Calibration Error |
|
""" |
|
if len(confidences) != len(accuracies): |
|
raise ValueError("Confidences and accuracies must have the same length") |
|
|
|
if not confidences: |
|
return 0.0 |
|
|
|
|
|
bin_indices = np.digitize(confidences, np.linspace(0, 1, num_bins)) |
|
max_ce = 0.0 |
|
|
|
for bin_idx in range(1, num_bins + 1): |
|
bin_mask = (bin_indices == bin_idx) |
|
if np.any(bin_mask): |
|
bin_confidences = np.array(confidences)[bin_mask] |
|
bin_accuracies = np.array(accuracies)[bin_mask] |
|
bin_confidence = np.mean(bin_confidences) |
|
bin_accuracy = np.mean(bin_accuracies) |
|
|
|
|
|
ce = np.abs(bin_confidence - bin_accuracy) |
|
max_ce = max(max_ce, ce) |
|
|
|
return float(max_ce) |
|
|
|
def quantify( |
|
self, |
|
confidences: List[float], |
|
accuracies: List[bool] |
|
) -> Dict[str, float]: |
|
""" |
|
Quantify uncertainty using calibration metrics. |
|
|
|
Args: |
|
confidences: List of confidence scores |
|
accuracies: List of boolean accuracy indicators |
|
|
|
Returns: |
|
Dictionary of calibration metrics: |
|
- ece: Expected Calibration Error |
|
- mce: Maximum Calibration Error |
|
""" |
|
return { |
|
"ece": self.expected_calibration_error(confidences, accuracies), |
|
"mce": self.maximum_calibration_error(confidences, accuracies) |
|
} |
|
|
|
|
|
|
|
def create_uncertainty_quantifier(method: str) -> UncertaintyQuantifier: |
|
""" |
|
Create an uncertainty quantifier based on the specified method. |
|
|
|
Args: |
|
method: Name of the uncertainty quantification method |
|
|
|
Returns: |
|
Uncertainty quantifier instance |
|
""" |
|
if method == "softmax_confidence": |
|
return SoftmaxConfidence() |
|
elif method == "mc_dropout": |
|
return MonteCarloDropout() |
|
elif method == "ensemble_disagreement": |
|
return EnsembleDisagreement() |
|
elif method == "calibration_metrics": |
|
return CalibrationMetrics() |
|
else: |
|
raise ValueError(f"Unsupported uncertainty quantification method: {method}") |
|
|