Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | |
| """ | |
| [Copied from detectron2] | |
| This file contains primitives for multi-gpu communication. | |
| This is useful when doing distributed training. | |
| """ | |
| import functools | |
| import logging | |
| import numpy as np | |
| import pickle | |
| import torch | |
| import torch.distributed as dist | |
| _LOCAL_PROCESS_GROUP = None | |
| """ | |
| A torch process group which only includes processes that on the same machine as the current process. | |
| This variable is set when processes are spawned by `launch()` in "engine/launch.py". | |
| """ | |
| def get_world_size() -> int: | |
| if not dist.is_available(): | |
| return 1 | |
| if not dist.is_initialized(): | |
| return 1 | |
| return dist.get_world_size() | |
| def get_rank() -> int: | |
| if not dist.is_available(): | |
| return 0 | |
| if not dist.is_initialized(): | |
| return 0 | |
| return dist.get_rank() | |
| def is_main_process() -> bool: | |
| return get_rank() == 0 | |
| def synchronize(): | |
| """ | |
| Helper function to synchronize (barrier) among all processes when | |
| using distributed training | |
| """ | |
| if not dist.is_available(): | |
| return | |
| if not dist.is_initialized(): | |
| return | |
| world_size = dist.get_world_size() | |
| if world_size == 1: | |
| return | |
| dist.barrier() | |