File size: 647 Bytes
a722365 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import time
import torch
import sys
import subprocess
argslist = list(sys.argv)[1:]
num_gpus = torch.cuda.device_count()
argslist.append('--n_gpus={}'.format(num_gpus))
workers = []
job_id = time.strftime("%Y_%m_%d-%H%M%S")
argslist.append("--group_name=group_{}".format(job_id))
for i in range(num_gpus):
argslist.append('--rank={}'.format(i))
stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i),
"w")
print(argslist)
p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout)
workers.append(p)
argslist = argslist[:-1]
for p in workers:
p.wait()
|