import time | |
import torch | |
import sys | |
import subprocess | |
argslist = list(sys.argv)[1:] | |
num_gpus = torch.cuda.device_count() | |
argslist.append('--n_gpus={}'.format(num_gpus)) | |
workers = [] | |
job_id = time.strftime("%Y_%m_%d-%H%M%S") | |
argslist.append("--group_name=group_{}".format(job_id)) | |
for i in range(num_gpus): | |
argslist.append('--rank={}'.format(i)) | |
stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i), | |
"w") | |
print(argslist) | |
p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout) | |
workers.append(p) | |
argslist = argslist[:-1] | |
for p in workers: | |
p.wait() | |