|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
Utilities to create common models |
|
|
""" |
|
|
|
|
|
from functools import lru_cache |
|
|
from typing import Optional, Tuple |
|
|
|
|
|
import torch |
|
|
import torch.distributed as dist |
|
|
from torch import nn |
|
|
|
|
|
|
|
|
@lru_cache |
|
|
def is_rank0() -> int: |
|
|
return (not dist.is_initialized()) or (dist.get_rank() == 0) |
|
|
|
|
|
|
|
|
def print_gpu_memory_usage(prefix: str = "GPU memory usage") -> None: |
|
|
"""Report the current GPU VRAM usage.""" |
|
|
if is_rank0(): |
|
|
free_mem, total_mem = torch.cuda.mem_get_info() |
|
|
print(f"{prefix}: {(total_mem - free_mem) / (1024**3):.2f} GB / {total_mem / (1024**3):.2f} GB.") |
|
|
|
|
|
|
|
|
def _get_model_size(model: nn.Module, scale: str = "auto") -> Tuple[float, str]: |
|
|
"""Compute the model size.""" |
|
|
n_params = sum(p.numel() for p in model.parameters()) |
|
|
|
|
|
if scale == "auto": |
|
|
if n_params > 1e9: |
|
|
scale = "B" |
|
|
elif n_params > 1e6: |
|
|
scale = "M" |
|
|
elif n_params > 1e3: |
|
|
scale = "K" |
|
|
else: |
|
|
scale = "" |
|
|
|
|
|
if scale == "B": |
|
|
n_params = n_params / 1e9 |
|
|
elif scale == "M": |
|
|
n_params = n_params / 1e6 |
|
|
elif scale == "K": |
|
|
n_params = n_params / 1e3 |
|
|
elif scale == "": |
|
|
pass |
|
|
else: |
|
|
raise NotImplementedError(f"Unknown scale {scale}.") |
|
|
|
|
|
return n_params, scale |
|
|
|
|
|
|
|
|
def print_model_size(model: nn.Module, name: Optional[str] = None) -> None: |
|
|
"""Print the model size.""" |
|
|
if is_rank0(): |
|
|
n_params, scale = _get_model_size(model, scale="auto") |
|
|
if name is None: |
|
|
name = model.__class__.__name__ |
|
|
|
|
|
print(f"{name} contains {n_params:.2f}{scale} parameters.") |
|
|
|