Spaces:

Tzktz
/

Dit-document-layout-analysis

Sleeping

App Files Files Community

Dit-document-layout-analysis / unilm /unilm-v1 /src /nn /data_parallel.py

Tzktz

Upload 7664 files

6fc683c verified over 1 year ago

raw

history blame contribute delete

5.6 kB

	import torch
	from torch.nn import DataParallel
	from torch.cuda._utils import _get_device_index
	from torch.nn.parallel._functions import Scatter
	from itertools import chain


	def scatter_imbalance(inputs, target_gpus, dim=0):
	r"""
	Slices tensors into approximately equal chunks and
	distributes them across given GPUs. Duplicates
	references to objects that are not tensors.
	"""
	def scatter_map(obj):
	if isinstance(obj, torch.Tensor):
	if (len(target_gpus) == 4) and (obj.size(dim) == 22):
	return Scatter.apply(target_gpus, (4, 6, 6, 6), dim, obj)
	if (len(target_gpus) == 4) and (obj.size(dim) == 60):
	return Scatter.apply(target_gpus, (12, 16, 16, 16), dim, obj)
	elif (len(target_gpus) == 4) and (obj.size(dim) == 144):
	return Scatter.apply(target_gpus, (24, 40, 40, 40), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 46):
	return Scatter.apply(target_gpus, (4, 6, 6, 6, 6, 6, 6, 6), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 62):
	return Scatter.apply(target_gpus, (6, 8, 8, 8, 8, 8, 8, 8), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 94):
	return Scatter.apply(target_gpus, (10, 12, 12, 12, 12, 12, 12, 12), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 110):
	return Scatter.apply(target_gpus, (12, 14, 14, 14, 14, 14, 14, 14), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 118):
	return Scatter.apply(target_gpus, (13, 15, 15, 15, 15, 15, 15, 15), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 126):
	return Scatter.apply(target_gpus, (14, 16, 16, 16, 16, 16, 16, 16), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 134):
	return Scatter.apply(target_gpus, (15, 17, 17, 17, 17, 17, 17, 17), dim, obj)
	elif (len(target_gpus) == 8) and (obj.size(dim) == 142):
	return Scatter.apply(target_gpus, (16, 18, 18, 18, 18, 18, 18, 18), dim, obj)
	elif (len(target_gpus) == 16) and (obj.size(dim) == 222):
	return Scatter.apply(target_gpus, (12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14), dim, obj)
	return Scatter.apply(target_gpus, None, dim, obj)
	if isinstance(obj, tuple) and len(obj) > 0:
	return list(zip(*map(scatter_map, obj)))
	if isinstance(obj, list) and len(obj) > 0:
	return list(map(list, zip(*map(scatter_map, obj))))
	if isinstance(obj, dict) and len(obj) > 0:
	return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
	return [obj for targets in target_gpus]

	# After scatter_map is called, a scatter_map cell will exist. This cell
	# has a reference to the actual function scatter_map, which has references
	# to a closure that has a reference to the scatter_map cell (because the
	# fn is recursive). To avoid this reference cycle, we set the function to
	# None, clearing the cell
	try:
	return scatter_map(inputs)
	finally:
	scatter_map = None


	def scatter_kwargs_imbalance(inputs, kwargs, target_gpus, dim=0):
	r"""Scatter with support for kwargs dictionary"""
	inputs = scatter_imbalance(inputs, target_gpus, dim) if inputs else []
	kwargs = scatter_imbalance(kwargs, target_gpus, dim) if kwargs else []
	if len(inputs) < len(kwargs):
	inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
	elif len(kwargs) < len(inputs):
	kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
	inputs = tuple(inputs)
	kwargs = tuple(kwargs)
	return inputs, kwargs


	class DataParallelImbalance(DataParallel):
	def __init__(self, module, device_ids=None, output_device=None, dim=0):
	super(DataParallelImbalance, self).__init__(
	module, device_ids, output_device, dim)

	if not torch.cuda.is_available():
	self.module = module
	self.device_ids = []
	return

	if device_ids is None:
	device_ids = list(range(torch.cuda.device_count()))
	if output_device is None:
	output_device = device_ids[0]

	if not all(t.is_cuda and t.device.index == device_ids[0]
	for t in chain(module.parameters(), module.buffers())):
	raise RuntimeError("module must have its parameters and buffers "
	"on device %d (device_ids[0])" % device_ids[0])

	self.dim = dim
	self.module = module
	self.device_ids = list(
	map(lambda x: _get_device_index(x, True), device_ids))
	self.output_device = _get_device_index(output_device, True)

	if len(self.device_ids) == 1:
	self.module.cuda(device_ids[0])

	def forward(self, inputs, *kwargs):
	if not self.device_ids:
	return self.module(inputs, *kwargs)
	inputs, kwargs = self.scatter_imbalance(
	inputs, kwargs, self.device_ids)
	if len(self.device_ids) == 1:
	return self.module(inputs[0], *kwargs[0])
	replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
	outputs = self.parallel_apply(replicas, inputs, kwargs)
	return self.gather(outputs, self.output_device)

	def scatter_imbalance(self, inputs, kwargs, device_ids):
	return scatter_kwargs_imbalance(inputs, kwargs, device_ids, dim=self.dim)