Spaces:
Running
on
Zero
Running
on
Zero
| # // Copyright (c) 2025 Bytedance Ltd. and/or its affiliates | |
| # // | |
| # // Licensed under the Apache License, Version 2.0 (the "License"); | |
| # // you may not use this file except in compliance with the License. | |
| # // You may obtain a copy of the License at | |
| # // | |
| # // http://www.apache.org/licenses/LICENSE-2.0 | |
| # // | |
| # // Unless required by applicable law or agreed to in writing, software | |
| # // distributed under the License is distributed on an "AS IS" BASIS, | |
| # // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # // See the License for the specific language governing permissions and | |
| # // limitations under the License. | |
| """ | |
| Distributed basic functions. | |
| """ | |
| import os | |
| from datetime import timedelta | |
| import torch | |
| import torch.distributed as dist | |
| from torch.nn.parallel import DistributedDataParallel | |
| def get_global_rank() -> int: | |
| """ | |
| Get the global rank, the global index of the GPU. | |
| """ | |
| return int(os.environ.get("RANK", "0")) | |
| def get_local_rank() -> int: | |
| """ | |
| Get the local rank, the local index of the GPU. | |
| """ | |
| return int(os.environ.get("LOCAL_RANK", "0")) | |
| def get_world_size() -> int: | |
| """ | |
| Get the world size, the total amount of GPUs. | |
| """ | |
| return int(os.environ.get("WORLD_SIZE", "1")) | |
| def get_device() -> torch.device: | |
| """ | |
| Get current rank device. | |
| """ | |
| return torch.device("cuda", get_local_rank()) | |
| def barrier_if_distributed(*args, **kwargs): | |
| """ | |
| Synchronizes all processes if under distributed context. | |
| """ | |
| if dist.is_initialized(): | |
| return dist.barrier(*args, **kwargs) | |
| def init_torch(cudnn_benchmark=True, timeout=timedelta(seconds=600)): | |
| """ | |
| Common PyTorch initialization configuration. | |
| """ | |
| torch.backends.cuda.matmul.allow_tf32 = True | |
| torch.backends.cudnn.allow_tf32 = True | |
| torch.backends.cudnn.benchmark = cudnn_benchmark | |
| torch.cuda.set_device(get_local_rank()) | |
| dist.init_process_group( | |
| backend="nccl", | |
| rank=get_global_rank(), | |
| world_size=get_world_size(), | |
| timeout=timeout, | |
| ) | |
| def convert_to_ddp(module: torch.nn.Module, **kwargs) -> DistributedDataParallel: | |
| return DistributedDataParallel( | |
| module=module, | |
| device_ids=[get_local_rank()], | |
| output_device=get_local_rank(), | |
| **kwargs, | |
| ) | |