# ztrain/util.py | |
# Copyright (c) 2024 Praxis Maldevide - cc-by-nc-4.0 granted | |
import contextlib | |
import torch | |
def cuda_memory_profiler(display : str = True): | |
""" | |
A context manager for profiling CUDA memory usage in PyTorch. | |
""" | |
if display is False: | |
yield | |
return | |
if not torch.cuda.is_available(): | |
print("CUDA is not available, skipping memory profiling") | |
yield | |
return | |
torch.cuda.reset_peak_memory_stats() | |
torch.cuda.synchronize() | |
start_memory = torch.cuda.memory_allocated() | |
try: | |
yield | |
finally: | |
torch.cuda.synchronize() | |
end_memory = torch.cuda.memory_allocated() | |
print(f"Peak memory usage: {torch.cuda.max_memory_allocated() / (1024 ** 2):.2f} MB") | |
print(f"Memory allocated at start: {start_memory / (1024 ** 2):.2f} MB") | |
print(f"Memory allocated at end: {end_memory / (1024 ** 2):.2f} MB") | |
print(f"Net memory change: {(end_memory - start_memory) / (1024 ** 2):.2f} MB") | |
def get_device(): | |
return torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu") | |