File size: 4,956 Bytes
c810120 b11e84c c810120 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import os
import sys
import ast
import copy
import time
import logging
import argparse
import yaml
import jinja2
from jinja2 import meta
import easydict
import torch
from torch import distributed as dist
from torch_geometric.data import Data
from torch_geometric.datasets import RelLinkPredDataset, WordNet18RR
from ultra import models, datasets
logger = logging.getLogger(__file__)
def detect_variables(cfg_file):
with open(cfg_file, "r") as fin:
raw = fin.read()
env = jinja2.Environment()
tree = env.parse(raw)
vars = meta.find_undeclared_variables(tree)
return vars
def load_config(cfg_file, context=None):
with open(cfg_file, "r") as fin:
raw = fin.read()
template = jinja2.Template(raw)
instance = template.render(context)
cfg = yaml.safe_load(instance)
cfg = easydict.EasyDict(cfg)
return cfg
def literal_eval(string):
try:
return ast.literal_eval(string)
except (ValueError, SyntaxError):
return string
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", help="yaml configuration file", required=True)
parser.add_argument("-s", "--seed", help="random seed for PyTorch", type=int, default=1024)
args, unparsed = parser.parse_known_args()
# get dynamic arguments defined in the config file
vars = detect_variables(args.config)
parser = argparse.ArgumentParser()
for var in vars:
parser.add_argument("--%s" % var, required=True)
vars = parser.parse_known_args(unparsed)[0]
vars = {k: literal_eval(v) for k, v in vars._get_kwargs()}
return args, vars
def get_root_logger(file=True):
format = "%(asctime)-10s %(message)s"
datefmt = "%H:%M:%S"
logging.basicConfig(format=format, datefmt=datefmt)
logger = logging.getLogger("")
logger.setLevel(logging.INFO)
if file:
handler = logging.FileHandler("log.txt")
format = logging.Formatter(format, datefmt)
handler.setFormatter(format)
logger.addHandler(handler)
return logger
def get_rank():
if dist.is_initialized():
return dist.get_rank()
if "RANK" in os.environ:
return int(os.environ["RANK"])
return 0
def get_world_size():
if dist.is_initialized():
return dist.get_world_size()
if "WORLD_SIZE" in os.environ:
return int(os.environ["WORLD_SIZE"])
return 1
def synchronize():
if get_world_size() > 1:
dist.barrier()
def get_device(cfg):
if cfg.train.gpus:
device = torch.device(cfg.train.gpus[get_rank()])
else:
device = torch.device("cpu")
return device
def get_devices(gpus):
if gpus is not None:
device = torch.device(gpus[get_rank()])
else:
device = torch.device("cpu")
return device
def create_working_directory(cfg):
file_name = "working_dir.tmp"
world_size = get_world_size()
if cfg.train.gpus is not None and len(cfg.train.gpus) != world_size:
error_msg = "World size is %d but found %d GPUs in the argument"
if world_size == 1:
error_msg += ". Did you launch with `python -m torch.distributed.launch`?"
raise ValueError(error_msg % (world_size, len(cfg.train.gpus)))
if world_size > 1 and not dist.is_initialized():
dist.init_process_group("nccl", init_method="env://")
working_dir = os.path.join(os.path.expanduser(cfg.output_dir),
cfg.model["class"], cfg.dataset["class"], time.strftime("%Y-%m-%d-%H-%M-%S"))
# synchronize working directory
if get_rank() == 0:
with open(file_name, "w") as fout:
fout.write(working_dir)
os.makedirs(working_dir)
synchronize()
if get_rank() != 0:
with open(file_name, "r") as fin:
working_dir = fin.read()
synchronize()
if get_rank() == 0:
os.remove(file_name)
os.chdir(working_dir)
return working_dir
def build_dataset(cfg):
data_config = copy.deepcopy(cfg.dataset)
cls = data_config.pop("class")
ds_cls = getattr(datasets, cls)
dataset = ds_cls(**data_config)
if get_rank() == 0:
logger.warning("%s dataset" % (cls if "version" not in cfg.dataset else f'{cls}({cfg.dataset.version})'))
if cls != "JointDataset":
logger.warning("#train: %d, #valid: %d, #test: %d" %
(dataset[0].target_edge_index.shape[1], dataset[1].target_edge_index.shape[1],
dataset[2].target_edge_index.shape[1]))
else:
logger.warning("#train: %d, #valid: %d, #test: %d" %
(sum(d.target_edge_index.shape[1] for d in dataset._data[0]),
sum(d.target_edge_index.shape[1] for d in dataset._data[1]),
sum(d.target_edge_index.shape[1] for d in dataset._data[2]),
))
return dataset
|