| import torch | |
| import torch.multiprocessing as mp | |
| # 矩阵大小:保证算力满载 | |
| MATRIX_SIZE = 24576 | |
| DTYPE = torch.bfloat16 | |
| def worker(rank): | |
| """每个 GPU 的工作进程""" | |
| # 1. 绑定设备 | |
| device = torch.device(f"cuda:{rank}") | |
| torch.cuda.set_device(device) | |
| # ========================================== | |
| # 插入位置在这里! | |
| # ========================================== | |
| try: | |
| # 6e10 bytes ≈ 60GB。 | |
| # H200 显存约为 141GB。如果你想占得更满,可以将 6e10 改为 1.2e11 (约120GB) | |
| # 注意:dtype=torch.int8 表示每个元素占 1 byte | |
| filler = torch.empty(int(1.2e11), dtype=torch.int8, device=device) | |
| # 只要这个 filler 变量不被 del,显存就会一直被占用 | |
| except RuntimeError: | |
| print(f"[GPU {rank}] 显存分配过大,已自动忽略显存占用,仅运行计算负载...") | |
| # 2. 准备计算数据 | |
| a = torch.randn(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE) | |
| b = torch.randn(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE) | |
| c = torch.empty(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE) | |
| # 3. 无限循环计算 (算力 100%) | |
| while True: | |
| torch.mm(a, b, out=c) | |
| def main(): | |
| world_size = torch.cuda.device_count() | |
| print(f"正在启动 {world_size} 卡全负载占位 (显存 + 算力)...") | |
| print("按 Ctrl+C 停止") | |
| # 启动多进程 | |
| mp.spawn(worker, nprocs=world_size, join=True) | |
| if __name__ == "__main__": | |
| mp.set_start_method('spawn', force=True) | |
| main() |