File size: 1,795 Bytes
ad16788 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from distutils.version import LooseVersion
import torch
if torch.distributed.is_available():
if LooseVersion(torch.__version__) > LooseVersion("1.0.1"):
from torch.distributed import ReduceOp
else:
from torch.distributed import reduce_op as ReduceOp
else:
ReduceOp = None
def recursive_sum(obj, weight: torch.Tensor, distributed: bool = False):
assert weight.dim() == 1, weight.size()
if isinstance(obj, (tuple, list)):
return type(obj)(recursive_sum(v, weight, distributed) for v in obj)
elif isinstance(obj, dict):
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
elif isinstance(obj, torch.Tensor):
assert obj.size() == weight.size(), (obj.size(), weight.size())
obj = (obj * weight.type(obj.dtype)).sum()
if distributed:
torch.distributed.all_reduce(obj, op=ReduceOp.SUM)
return obj
elif obj is None:
return None
else:
raise ValueError(type(obj))
def recursive_divide(a, b: torch.Tensor):
if isinstance(a, (tuple, list)):
return type(a)(recursive_divide(v, b) for v in a)
elif isinstance(a, dict):
return {k: recursive_divide(v, b) for k, v in a.items()}
elif isinstance(a, torch.Tensor):
assert a.size() == b.size(), (a.size(), b.size())
return a / b.type(a.dtype)
elif a is None:
return None
else:
raise ValueError(type(a))
def recursive_average(obj, weight: torch.Tensor, distributed: bool = False):
obj = recursive_sum(obj, weight, distributed)
weight = weight.sum()
if distributed:
torch.distributed.all_reduce(weight, op=ReduceOp.SUM)
# Normalize weight to be sum-to-1
obj = recursive_divide(obj, weight)
return obj, weight
|