gdwind
/

Test

Model card Files Files and versions

Test / matrix.py

gdwind's picture

Upload folder using huggingface_hub

6db83b4 verified about 2 months ago

history blame contribute delete

1.59 kB

	import torch
	import torch.multiprocessing as mp

	# 矩阵大小：保证算力满载
	MATRIX_SIZE = 24576
	DTYPE = torch.bfloat16

	def worker(rank):
	"""每个 GPU 的工作进程"""
	# 1. 绑定设备
	device = torch.device(f"cuda:{rank}")
	torch.cuda.set_device(device)

	# ==========================================
	# 插入位置在这里！
	# ==========================================
	try:
	# 6e10 bytes ≈ 60GB。
	# H200 显存约为 141GB。如果你想占得更满，可以将 6e10 改为 1.2e11 (约120GB)
	# 注意：dtype=torch.int8 表示每个元素占 1 byte
	filler = torch.empty(int(1.2e11), dtype=torch.int8, device=device)

	# 只要这个 filler 变量不被 del，显存就会一直被占用
	except RuntimeError:
	print(f"[GPU {rank}] 显存分配过大，已自动忽略显存占用，仅运行计算负载...")

	# 2. 准备计算数据
	a = torch.randn(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE)
	b = torch.randn(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE)
	c = torch.empty(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE)

	# 3. 无限循环计算 (算力 100%)
	while True:
	torch.mm(a, b, out=c)

	def main():
	world_size = torch.cuda.device_count()
	print(f"正在启动 {world_size} 卡全负载占位 (显存 + 算力)...")
	print("按 Ctrl+C 停止")

	# 启动多进程
	mp.spawn(worker, nprocs=world_size, join=True)

	if __name__ == "__main__":
	mp.set_start_method('spawn', force=True)
	main()