| """ |
| Shared gradient visualization utilities for DP-SGD trainers. |
| |
| This module provides consistent gradient norm generation and clipping |
| visualization across all trainer implementations. |
| """ |
|
|
| import numpy as np |
| from typing import List, Dict |
|
|
|
|
| def generate_gradient_norms(clipping_norm: float, num_points: int = 100) -> List[Dict[str, float]]: |
| """ |
| Generate realistic gradient norms following a log-normal distribution. |
| |
| In real DP-SGD training, gradient norms typically follow a log-normal |
| distribution, with most gradients being smaller than the clipping threshold |
| and some exceeding it. |
| |
| Args: |
| clipping_norm: The clipping threshold (C) |
| num_points: Number of gradient samples to generate |
| |
| Returns: |
| List of dicts with 'x' (gradient norm) and 'y' (density) keys, |
| sorted by x value for smooth visualization |
| """ |
| gradients = [] |
| |
| |
| |
| mu = np.log(clipping_norm) - 0.5 |
| sigma = 0.8 |
| |
| for _ in range(num_points): |
| |
| u1, u2 = np.random.random(2) |
| z = np.sqrt(-2.0 * np.log(u1)) * np.cos(2.0 * np.pi * u2) |
| norm = np.exp(mu + sigma * z) |
| |
| |
| density = np.exp(-(np.power(np.log(norm) - mu, 2) / (2 * sigma * sigma))) / \ |
| (norm * sigma * np.sqrt(2 * np.pi)) |
| |
| |
| density = 0.2 + 0.8 * (density / 0.8) + 0.1 * (np.random.random() - 0.5) |
| |
| gradients.append({'x': float(norm), 'y': float(max(0.01, density))}) |
| |
| return sorted(gradients, key=lambda x: x['x']) |
|
|
|
|
| def generate_clipped_gradients( |
| clipping_norm: float, |
| original_gradients: List[Dict[str, float]] = None, |
| num_points: int = 100 |
| ) -> List[Dict[str, float]]: |
| """ |
| Generate clipped versions of gradient norms. |
| |
| Demonstrates how gradient clipping limits the maximum gradient norm, |
| creating a "pile-up" effect at the clipping threshold. |
| |
| Args: |
| clipping_norm: The clipping threshold (C) |
| original_gradients: Optional pre-generated gradients to clip. |
| If None, generates new gradients first. |
| num_points: Number of points if generating new gradients |
| |
| Returns: |
| List of dicts with 'x' (clipped gradient norm) and 'y' (density) keys, |
| sorted by x value |
| """ |
| if original_gradients is None: |
| original_gradients = generate_gradient_norms(clipping_norm, num_points) |
| |
| clipped = [ |
| {'x': min(g['x'], clipping_norm), 'y': g['y']} |
| for g in original_gradients |
| ] |
| |
| return sorted(clipped, key=lambda x: x['x']) |
|
|
|
|
| def generate_gradient_info(clipping_norm: float, num_points: int = 100) -> Dict[str, List[Dict[str, float]]]: |
| """ |
| Generate complete gradient information for visualization. |
| |
| This is a convenience function that generates both before and after |
| clipping gradient distributions for use in training results. |
| |
| Args: |
| clipping_norm: The clipping threshold (C) |
| num_points: Number of gradient samples to generate |
| |
| Returns: |
| Dict with 'before_clipping' and 'after_clipping' keys, |
| each containing a list of gradient samples |
| """ |
| before_clipping = generate_gradient_norms(clipping_norm, num_points) |
| after_clipping = generate_clipped_gradients(clipping_norm, before_clipping) |
| |
| return { |
| 'before_clipping': before_clipping, |
| 'after_clipping': after_clipping |
| } |
|
|