271754echo
/

Q-KGR

Model card Files Files and versions

Metrics Training metrics Community

Q-KGR / transformers-4.33.3 /src /transformers /utils /model_parallel_utils.py

271754echo's picture

Upload folder using huggingface_hub

4844162 verified over 1 year ago

history blame contribute delete

2.27 kB

	# coding=utf-8
	# Copyright 2020 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from math import ceil


	def assert_device_map(device_map, num_blocks):
	blocks = list(range(0, num_blocks))

	device_map_blocks = [item for sublist in list(device_map.values()) for item in sublist]

	# Duplicate check
	duplicate_blocks = []
	for i in device_map_blocks:
	if device_map_blocks.count(i) > 1 and i not in duplicate_blocks:
	duplicate_blocks.append(i)
	# Missing blocks
	missing_blocks = [i for i in blocks if i not in device_map_blocks]
	extra_blocks = [i for i in device_map_blocks if i not in blocks]

	if len(duplicate_blocks) != 0:
	raise ValueError(
	"Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device."
	" These attention blocks were specified more than once: " + str(duplicate_blocks)
	)
	if len(missing_blocks) != 0:
	raise ValueError(
	"There are attention blocks for this model that are not specified in the device_map. Add these attention "
	"blocks to a device on the device_map: " + str(missing_blocks)
	)
	if len(extra_blocks) != 0:
	raise ValueError(
	"The device_map contains more attention blocks than this model has. Remove these from the device_map:"
	+ str(extra_blocks)
	)


	def get_device_map(n_layers, devices):
	"""Returns a dictionary of layers distributed evenly across all devices."""
	layers = list(range(n_layers))
	n_blocks = int(ceil(n_layers / len(devices)))
	layers_list = [layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)]

	return dict(zip(devices, layers_list))