| # ztrain/util.py | |
| # Copyright (c) 2024 Praxis Maldevide - cc-by-nc-4.0 granted | |
| import contextlib | |
| import torch | |
| def cuda_memory_profiler(display : str = True): | |
| """ | |
| A context manager for profiling CUDA memory usage in PyTorch. | |
| """ | |
| if display is False: | |
| yield | |
| return | |
| if not torch.cuda.is_available(): | |
| print("CUDA is not available, skipping memory profiling") | |
| yield | |
| return | |
| torch.cuda.reset_peak_memory_stats() | |
| torch.cuda.synchronize() | |
| start_memory = torch.cuda.memory_allocated() | |
| try: | |
| yield | |
| finally: | |
| torch.cuda.synchronize() | |
| end_memory = torch.cuda.memory_allocated() | |
| print(f"Peak memory usage: {torch.cuda.max_memory_allocated() / (1024 ** 2):.2f} MB") | |
| print(f"Memory allocated at start: {start_memory / (1024 ** 2):.2f} MB") | |
| print(f"Memory allocated at end: {end_memory / (1024 ** 2):.2f} MB") | |
| print(f"Net memory change: {(end_memory - start_memory) / (1024 ** 2):.2f} MB") | |
| def get_device(): | |
| return torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu") | |