BryanW commited on
Commit
27d7c98
·
verified ·
1 Parent(s): 03665a7

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc +0 -0
  2. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc +0 -0
  3. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc +0 -0
  4. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/data_loader.cpython-312.pyc +0 -0
  5. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/hooks.cpython-312.pyc +0 -0
  6. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/inference.cpython-312.pyc +0 -0
  7. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc +0 -0
  8. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/local_sgd.cpython-312.pyc +0 -0
  9. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/logging.cpython-312.pyc +0 -0
  10. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/memory_utils.cpython-312.pyc +0 -0
  11. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc +0 -0
  12. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/parallelism_config.cpython-312.pyc +0 -0
  13. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/scheduler.cpython-312.pyc +0 -0
  14. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/state.cpython-312.pyc +0 -0
  15. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/tracking.cpython-312.pyc +0 -0
  16. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__init__.py +13 -0
  17. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/__init__.cpython-312.pyc +0 -0
  18. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/accelerate_cli.cpython-312.pyc +0 -0
  19. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/env.cpython-312.pyc +0 -0
  20. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/estimate.cpython-312.pyc +0 -0
  21. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/launch.cpython-312.pyc +0 -0
  22. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/merge.cpython-312.pyc +0 -0
  23. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/test.cpython-312.pyc +0 -0
  24. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/to_fsdp2.cpython-312.pyc +0 -0
  25. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/tpu.cpython-312.pyc +0 -0
  26. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/utils.cpython-312.pyc +0 -0
  27. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py +54 -0
  28. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__init__.py +52 -0
  29. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/__init__.cpython-312.pyc +0 -0
  30. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/cluster.cpython-312.pyc +0 -0
  31. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config.cpython-312.pyc +0 -0
  32. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_args.cpython-312.pyc +0 -0
  33. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_utils.cpython-312.pyc +0 -0
  34. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/default.cpython-312.pyc +0 -0
  35. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/sagemaker.cpython-312.pyc +0 -0
  36. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/update.cpython-312.pyc +0 -0
  37. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/cluster.py +939 -0
  38. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config.py +89 -0
  39. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_args.py +252 -0
  40. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_utils.py +122 -0
  41. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/default.py +172 -0
  42. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/sagemaker.py +274 -0
  43. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/update.py +63 -0
  44. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/env.py +143 -0
  45. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/estimate.py +318 -0
  46. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/launch.py +1415 -0
  47. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__init__.py +14 -0
  48. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/__init__.cpython-312.pyc +0 -0
  49. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/cursor.cpython-312.pyc +0 -0
  50. URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/helpers.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.38 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc ADDED
Binary file (37.4 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc ADDED
Binary file (15.9 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/data_loader.cpython-312.pyc ADDED
Binary file (65.4 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/hooks.cpython-312.pyc ADDED
Binary file (35.4 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/inference.cpython-312.pyc ADDED
Binary file (7.55 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc ADDED
Binary file (13 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/local_sgd.cpython-312.pyc ADDED
Binary file (5.18 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/logging.cpython-312.pyc ADDED
Binary file (6.06 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/memory_utils.cpython-312.pyc ADDED
Binary file (503 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc ADDED
Binary file (11.7 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/parallelism_config.cpython-312.pyc ADDED
Binary file (20.3 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/scheduler.cpython-312.pyc ADDED
Binary file (4.68 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/state.cpython-312.pyc ADDED
Binary file (64.5 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/tracking.cpython-312.pyc ADDED
Binary file (63.6 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (211 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/accelerate_cli.cpython-312.pyc ADDED
Binary file (1.84 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/env.cpython-312.pyc ADDED
Binary file (5.39 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/estimate.cpython-312.pyc ADDED
Binary file (14.1 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/launch.cpython-312.pyc ADDED
Binary file (56.4 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/merge.cpython-312.pyc ADDED
Binary file (2.43 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/test.cpython-312.pyc ADDED
Binary file (2.19 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/to_fsdp2.cpython-312.pyc ADDED
Binary file (6.24 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/tpu.cpython-312.pyc ADDED
Binary file (6.03 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/utils.cpython-312.pyc ADDED
Binary file (5.22 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from accelerate.commands.config import get_config_parser
18
+ from accelerate.commands.env import env_command_parser
19
+ from accelerate.commands.estimate import estimate_command_parser
20
+ from accelerate.commands.launch import launch_command_parser
21
+ from accelerate.commands.merge import merge_command_parser
22
+ from accelerate.commands.test import test_command_parser
23
+ from accelerate.commands.to_fsdp2 import to_fsdp2_command_parser
24
+ from accelerate.commands.tpu import tpu_command_parser
25
+ from accelerate.commands.utils import CustomArgumentParser
26
+
27
+
28
+ def main():
29
+ parser = CustomArgumentParser("Accelerate CLI tool", usage="accelerate <command> [<args>]", allow_abbrev=False)
30
+ subparsers = parser.add_subparsers(help="accelerate command helpers")
31
+
32
+ # Register commands
33
+ get_config_parser(subparsers=subparsers)
34
+ estimate_command_parser(subparsers=subparsers)
35
+ env_command_parser(subparsers=subparsers)
36
+ launch_command_parser(subparsers=subparsers)
37
+ merge_command_parser(subparsers=subparsers)
38
+ tpu_command_parser(subparsers=subparsers)
39
+ test_command_parser(subparsers=subparsers)
40
+ to_fsdp2_command_parser(subparsers=subparsers)
41
+
42
+ # Let's go
43
+ args = parser.parse_args()
44
+
45
+ if not hasattr(args, "func"):
46
+ parser.print_help()
47
+ exit(1)
48
+
49
+ # Run
50
+ args.func(args)
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__init__.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+
19
+ from .config import config_command_parser
20
+ from .config_args import default_config_file, load_config_from_file # noqa: F401
21
+ from .default import default_command_parser
22
+ from .update import update_command_parser
23
+
24
+
25
+ def get_config_parser(subparsers=None):
26
+ parent_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
27
+ # The main config parser
28
+ config_parser = config_command_parser(subparsers)
29
+ # The subparser to add commands to
30
+ subcommands = config_parser.add_subparsers(title="subcommands", dest="subcommand")
31
+
32
+ # Then add other parsers with the parent parser
33
+ default_command_parser(subcommands, parents=[parent_parser])
34
+ update_command_parser(subcommands, parents=[parent_parser])
35
+
36
+ return config_parser
37
+
38
+
39
+ def main():
40
+ config_parser = get_config_parser()
41
+ args = config_parser.parse_args()
42
+
43
+ if not hasattr(args, "func"):
44
+ config_parser.print_help()
45
+ exit(1)
46
+
47
+ # Run
48
+ args.func(args)
49
+
50
+
51
+ if __name__ == "__main__":
52
+ main()
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.49 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/cluster.cpython-312.pyc ADDED
Binary file (29.2 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config.cpython-312.pyc ADDED
Binary file (3.27 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_args.cpython-312.pyc ADDED
Binary file (12 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_utils.cpython-312.pyc ADDED
Binary file (3.97 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/default.cpython-312.pyc ADDED
Binary file (6.27 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/sagemaker.cpython-312.pyc ADDED
Binary file (9.5 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/update.cpython-312.pyc ADDED
Binary file (2.45 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/cluster.py ADDED
@@ -0,0 +1,939 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import os
18
+
19
+ from ...utils import (
20
+ ComputeEnvironment,
21
+ DistributedType,
22
+ is_deepspeed_available,
23
+ is_fp8_available,
24
+ is_hpu_available,
25
+ is_mlu_available,
26
+ is_mps_available,
27
+ is_msamp_available,
28
+ is_musa_available,
29
+ is_neuron_available,
30
+ is_npu_available,
31
+ is_sdaa_available,
32
+ is_torchao_available,
33
+ is_transformer_engine_available,
34
+ is_transformers_available,
35
+ is_xpu_available,
36
+ )
37
+ from ...utils.constants import (
38
+ DEEPSPEED_MULTINODE_LAUNCHERS,
39
+ FSDP2_STATE_DICT_TYPE,
40
+ FSDP_AUTO_WRAP_POLICY,
41
+ FSDP_BACKWARD_PREFETCH,
42
+ FSDP_SHARDING_STRATEGY,
43
+ FSDP_STATE_DICT_TYPE,
44
+ TORCH_DYNAMO_MODES,
45
+ )
46
+ from .config_args import ClusterConfig
47
+ from .config_utils import (
48
+ DYNAMO_BACKENDS,
49
+ _ask_field,
50
+ _ask_options,
51
+ _convert_distributed_mode,
52
+ _convert_dynamo_backend,
53
+ _convert_fp8_backend,
54
+ _convert_mixed_precision,
55
+ _convert_yes_no_to_bool,
56
+ )
57
+
58
+
59
+ def get_cluster_input():
60
+ distributed_type = _ask_options(
61
+ "Which type of machine are you using?",
62
+ [
63
+ "No distributed training",
64
+ "multi-CPU",
65
+ "multi-XPU",
66
+ "multi-HPU",
67
+ "multi-GPU",
68
+ "multi-NPU",
69
+ "multi-MLU",
70
+ "multi-SDAA",
71
+ "multi-MUSA",
72
+ "multi-NEURON",
73
+ "TPU",
74
+ ],
75
+ _convert_distributed_mode,
76
+ )
77
+
78
+ machine_rank = 0
79
+ num_machines = 1
80
+ num_processes = 1
81
+ gpu_ids = None
82
+ main_process_ip = None
83
+ main_process_port = None
84
+ rdzv_backend = "static"
85
+ same_network = True
86
+ debug = False
87
+
88
+ if distributed_type in [
89
+ DistributedType.MULTI_GPU,
90
+ DistributedType.MULTI_MLU,
91
+ DistributedType.MULTI_SDAA,
92
+ DistributedType.MULTI_MUSA,
93
+ DistributedType.MULTI_NPU,
94
+ DistributedType.MULTI_XPU,
95
+ DistributedType.MULTI_CPU,
96
+ DistributedType.MULTI_HPU,
97
+ DistributedType.MULTI_NEURON,
98
+ ]:
99
+ num_machines = _ask_field(
100
+ "How many different machines will you use (use more than 1 for multi-node training)? [1]: ",
101
+ int,
102
+ default=1,
103
+ )
104
+ if num_machines > 1:
105
+ machine_rank = _ask_options(
106
+ "What is the rank of this machine?",
107
+ list(range(num_machines)),
108
+ int,
109
+ )
110
+ main_process_ip = _ask_field(
111
+ "What is the IP address of the machine that will host the main process? ",
112
+ )
113
+ main_process_port = _ask_field(
114
+ "What is the port you will use to communicate with the main process? ",
115
+ int,
116
+ )
117
+ same_network = _ask_field(
118
+ "Are all the machines on the same local network? Answer `no` if nodes are on the cloud and/or on different network hosts [YES/no]: ",
119
+ _convert_yes_no_to_bool,
120
+ default=True,
121
+ error_message="Please enter yes or no.",
122
+ )
123
+ if not same_network:
124
+ rdzv_backend = _ask_field(
125
+ "What rendezvous backend will you use? ('static', 'c10d', ...): ", default="static"
126
+ )
127
+ debug = _ask_field(
128
+ "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
129
+ _convert_yes_no_to_bool,
130
+ default=False,
131
+ error_message="Please enter yes or no.",
132
+ )
133
+
134
+ if distributed_type == DistributedType.NO:
135
+ use_cpu = _ask_field(
136
+ "Do you want to run your training on CPU only (even if a GPU / Apple Silicon / Ascend NPU device is available)? [yes/NO]:",
137
+ _convert_yes_no_to_bool,
138
+ default=False,
139
+ error_message="Please enter yes or no.",
140
+ )
141
+ elif distributed_type == DistributedType.MULTI_CPU:
142
+ use_cpu = True
143
+ else:
144
+ use_cpu = False
145
+
146
+ mpirun_config = {}
147
+
148
+ if use_cpu:
149
+ if distributed_type == DistributedType.MULTI_CPU:
150
+ use_mpirun = _ask_field(
151
+ "Do you want accelerate to launch mpirun? [yes/NO]: ",
152
+ _convert_yes_no_to_bool,
153
+ default=False,
154
+ error_message="Please enter yes or no.",
155
+ )
156
+ if use_mpirun:
157
+ mpirun_hostfile = _ask_field(
158
+ "Please enter the path to the hostfile to use with mpirun [~/hostfile]: ",
159
+ str,
160
+ default="~/hostfile",
161
+ )
162
+ mpirun_config["mpirun_hostfile"] = os.path.expanduser(mpirun_hostfile.strip())
163
+
164
+ dynamo_config = {}
165
+ use_dynamo = _ask_field(
166
+ "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
167
+ _convert_yes_no_to_bool,
168
+ default=False,
169
+ error_message="Please enter yes or no.",
170
+ )
171
+ if use_dynamo:
172
+ prefix = "dynamo_"
173
+ dynamo_config[prefix + "backend"] = _ask_options(
174
+ "Which dynamo backend would you like to use?",
175
+ [x.lower() for x in DYNAMO_BACKENDS],
176
+ _convert_dynamo_backend,
177
+ default=2,
178
+ )
179
+ use_custom_options = _ask_field(
180
+ "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
181
+ _convert_yes_no_to_bool,
182
+ default=False,
183
+ error_message="Please enter yes or no.",
184
+ )
185
+
186
+ if use_custom_options:
187
+ dynamo_config[prefix + "mode"] = _ask_options(
188
+ "Which mode do you want to use?",
189
+ TORCH_DYNAMO_MODES,
190
+ lambda x: TORCH_DYNAMO_MODES[int(x)],
191
+ default=0,
192
+ )
193
+ dynamo_config[prefix + "use_fullgraph"] = _ask_field(
194
+ "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
195
+ _convert_yes_no_to_bool,
196
+ default=False,
197
+ error_message="Please enter yes or no.",
198
+ )
199
+ dynamo_config[prefix + "use_dynamic"] = _ask_field(
200
+ "Do you want to enable dynamic shape tracing? [yes/NO]: ",
201
+ _convert_yes_no_to_bool,
202
+ default=False,
203
+ error_message="Please enter yes or no.",
204
+ )
205
+ dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
206
+ "Do you want to enable regional compilation? [yes/NO]: ",
207
+ _convert_yes_no_to_bool,
208
+ default=False,
209
+ error_message="Please enter yes or no.",
210
+ )
211
+
212
+ use_mps = not use_cpu and is_mps_available()
213
+ deepspeed_config = {}
214
+ if (
215
+ distributed_type
216
+ in [
217
+ DistributedType.MULTI_GPU,
218
+ DistributedType.MULTI_XPU,
219
+ DistributedType.MULTI_HPU,
220
+ DistributedType.MULTI_NPU,
221
+ DistributedType.MULTI_MLU,
222
+ DistributedType.MULTI_SDAA,
223
+ DistributedType.MULTI_MUSA,
224
+ DistributedType.MULTI_NEURON,
225
+ DistributedType.NO,
226
+ ]
227
+ and not use_mps
228
+ ):
229
+ use_deepspeed = _ask_field(
230
+ "Do you want to use DeepSpeed? [yes/NO]: ",
231
+ _convert_yes_no_to_bool,
232
+ default=False,
233
+ error_message="Please enter yes or no.",
234
+ )
235
+ if use_deepspeed:
236
+ if distributed_type is DistributedType.MULTI_NEURON:
237
+ raise RuntimeError("DeepSpeed is not supported on Neuron devices.")
238
+
239
+ distributed_type = DistributedType.DEEPSPEED
240
+ assert is_deepspeed_available(), (
241
+ "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
242
+ )
243
+
244
+ if distributed_type == DistributedType.DEEPSPEED:
245
+ use_deepspeed_config = _ask_field(
246
+ "Do you want to specify a json file to a DeepSpeed config? [yes/NO]: ",
247
+ _convert_yes_no_to_bool,
248
+ default=False,
249
+ error_message="Please enter yes or no.",
250
+ )
251
+ if use_deepspeed_config:
252
+ deepspeed_config["deepspeed_config_file"] = _ask_field(
253
+ "Please enter the path to the json DeepSpeed config file: ",
254
+ str,
255
+ default="none",
256
+ )
257
+ else:
258
+ deepspeed_config["zero_stage"] = _ask_options(
259
+ "What should be your DeepSpeed's ZeRO optimization stage?",
260
+ [0, 1, 2, 3],
261
+ int,
262
+ default=2,
263
+ )
264
+
265
+ deepspeed_devices = ["none", "cpu", "nvme"]
266
+ if deepspeed_config["zero_stage"] >= 2:
267
+ deepspeed_config["offload_optimizer_device"] = _ask_options(
268
+ "Where to offload optimizer states?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
269
+ )
270
+ deepspeed_config["offload_param_device"] = _ask_options(
271
+ "Where to offload parameters?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
272
+ )
273
+ if deepspeed_config["offload_param_device"] == "nvme":
274
+ deepspeed_config["offload_param_nvme_path"] = _ask_field(
275
+ "Nvme Path to offload parameters?",
276
+ str,
277
+ default="/nvme",
278
+ )
279
+ if deepspeed_config["offload_optimizer_device"] == "nvme":
280
+ deepspeed_config["offload_optimizer_nvme_path"] = _ask_field(
281
+ "Nvme Path to offload optimizer states?",
282
+ str,
283
+ default="/nvme",
284
+ )
285
+ deepspeed_config["gradient_accumulation_steps"] = _ask_field(
286
+ "How many gradient accumulation steps you're passing in your script? [1]: ",
287
+ int,
288
+ default=1,
289
+ )
290
+ use_gradient_clipping = _ask_field(
291
+ "Do you want to use gradient clipping? [yes/NO]: ",
292
+ _convert_yes_no_to_bool,
293
+ default=False,
294
+ error_message="Please enter yes or no.",
295
+ )
296
+ if use_gradient_clipping:
297
+ deepspeed_config["gradient_clipping"] = _ask_field(
298
+ "What is the gradient clipping value? [1.0]: ",
299
+ float,
300
+ default=1.0,
301
+ )
302
+ if deepspeed_config["zero_stage"] == 3:
303
+ deepspeed_config["zero3_save_16bit_model"] = _ask_field(
304
+ "Do you want to save 16-bit model weights when using ZeRO Stage-3? [yes/NO]: ",
305
+ _convert_yes_no_to_bool,
306
+ default=False,
307
+ error_message="Please enter yes or no.",
308
+ )
309
+ deepspeed_config["zero3_init_flag"] = _ask_field(
310
+ "Do you want to enable `deepspeed.zero.Init` when using ZeRO Stage-3 for constructing massive models? [yes/NO]: ",
311
+ _convert_yes_no_to_bool,
312
+ default=False,
313
+ error_message="Please enter yes or no.",
314
+ )
315
+ if deepspeed_config["zero3_init_flag"]:
316
+ if not is_transformers_available():
317
+ raise Exception(
318
+ "When `zero3_init_flag` is set, it requires Transformers to be installed. "
319
+ "Please run `pip3 install transformers`."
320
+ )
321
+ use_moe = _ask_field(
322
+ "Do you want to enable Mixture-of-Experts training (MoE)? [yes/NO]: ",
323
+ _convert_yes_no_to_bool,
324
+ default=False,
325
+ error_message="Please enter yes or no.",
326
+ )
327
+ if use_moe:
328
+ deepspeed_config["deepspeed_moe_layer_cls_names"] = _ask_field(
329
+ "Specify the comma-separated list of transformers MoE layer class names (case-sensitive), e.g : "
330
+ " `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ... : ",
331
+ str,
332
+ )
333
+
334
+ if num_machines > 1:
335
+ launcher_query = "Which Type of launcher do you want to use?"
336
+ deepspeed_config["deepspeed_multinode_launcher"] = _ask_options(
337
+ launcher_query,
338
+ DEEPSPEED_MULTINODE_LAUNCHERS,
339
+ lambda x: DEEPSPEED_MULTINODE_LAUNCHERS[int(x)],
340
+ )
341
+
342
+ if deepspeed_config["deepspeed_multinode_launcher"] != DEEPSPEED_MULTINODE_LAUNCHERS[1]:
343
+ deepspeed_config["deepspeed_hostfile"] = _ask_field(
344
+ "DeepSpeed configures multi-node compute resources with hostfile. "
345
+ "Each row is of the format `hostname slots=[num_gpus]`, e.g., `localhost slots=2`; "
346
+ "for more information please refer official [documentation]"
347
+ "(https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node). "
348
+ "Please specify the location of hostfile: ",
349
+ str,
350
+ )
351
+
352
+ is_exclusion_filter = _ask_field(
353
+ "Do you want to specify exclusion filter string? [yes/NO]: ",
354
+ _convert_yes_no_to_bool,
355
+ default=False,
356
+ error_message="Please enter yes or no.",
357
+ )
358
+ if is_exclusion_filter:
359
+ deepspeed_config["deepspeed_exclusion_filter"] = _ask_field(
360
+ "DeepSpeed exclusion filter string: ",
361
+ str,
362
+ )
363
+
364
+ is_inclusion_filter = _ask_field(
365
+ "Do you want to specify inclusion filter string? [yes/NO]: ",
366
+ _convert_yes_no_to_bool,
367
+ default=False,
368
+ error_message="Please enter yes or no.",
369
+ )
370
+ if is_inclusion_filter:
371
+ deepspeed_config["deepspeed_inclusion_filter"] = _ask_field(
372
+ "DeepSpeed inclusion filter string: ",
373
+ str,
374
+ )
375
+
376
+ fsdp_config = {}
377
+
378
+ if distributed_type in [
379
+ DistributedType.MULTI_GPU,
380
+ DistributedType.MULTI_NPU,
381
+ DistributedType.MULTI_MLU,
382
+ DistributedType.MULTI_SDAA,
383
+ DistributedType.MULTI_MUSA,
384
+ DistributedType.MULTI_XPU,
385
+ DistributedType.MULTI_HPU,
386
+ DistributedType.MULTI_NEURON,
387
+ ]:
388
+ use_fsdp = _ask_field(
389
+ "Do you want to use FullyShardedDataParallel? [yes/NO]: ",
390
+ _convert_yes_no_to_bool,
391
+ default=False,
392
+ error_message="Please enter yes or no.",
393
+ )
394
+ if use_fsdp:
395
+ if distributed_type is DistributedType.MULTI_NEURON:
396
+ raise NotImplementedError("FSDP is not currently supported on Neuron devices.")
397
+ distributed_type = DistributedType.FSDP
398
+
399
+ if distributed_type == DistributedType.FSDP:
400
+ fsdp_config["fsdp_version"] = _ask_options(
401
+ "What should be your FSDP version? [2]: ",
402
+ [1, 2],
403
+ lambda x: int(x) + 1,
404
+ default=1,
405
+ )
406
+ fsdp_version = fsdp_config["fsdp_version"] # extract to a variable to simplify usage later
407
+
408
+ if fsdp_version == 1:
409
+ sharding_strategy_query = "What should be your sharding strategy?"
410
+ fsdp_config["fsdp_reshard_after_forward"] = _ask_options(
411
+ sharding_strategy_query,
412
+ FSDP_SHARDING_STRATEGY,
413
+ lambda x: FSDP_SHARDING_STRATEGY[int(x)],
414
+ )
415
+ else:
416
+ fsdp_config["fsdp_reshard_after_forward"] = _ask_field(
417
+ "Do you want to enable resharding after forward? [YES/no]: ",
418
+ _convert_yes_no_to_bool,
419
+ default=True,
420
+ error_message="Please enter yes or no.",
421
+ )
422
+
423
+ fsdp_config["fsdp_offload_params"] = _ask_field(
424
+ "Do you want to offload parameters and gradients to CPU? [yes/NO]: ",
425
+ _convert_yes_no_to_bool,
426
+ default=False,
427
+ error_message="Please enter yes or no.",
428
+ )
429
+
430
+ fsdp_wrap_query = "What should be your auto wrap policy?"
431
+ fsdp_config["fsdp_auto_wrap_policy"] = _ask_options(
432
+ fsdp_wrap_query,
433
+ FSDP_AUTO_WRAP_POLICY,
434
+ lambda x: FSDP_AUTO_WRAP_POLICY[int(x)],
435
+ )
436
+ if fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[0]:
437
+ use_no_split_modules = _ask_field(
438
+ "Do you want to use the model's `_no_split_modules` to wrap. Only applicable for 🤗 Transformers [yes/NO]: ",
439
+ _convert_yes_no_to_bool,
440
+ default=False,
441
+ error_message="Please enter yes or no.",
442
+ )
443
+ if not use_no_split_modules:
444
+ fsdp_config["fsdp_transformer_layer_cls_to_wrap"] = _ask_field(
445
+ "Specify the comma-separated list of transformer layer class names (case-sensitive) to wrap ,e.g, :"
446
+ "`BertLayer`, `GPTJBlock`, `T5Block`, `BertLayer,BertEmbeddings,BertSelfOutput` ...? : ",
447
+ str,
448
+ )
449
+ elif fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[1]:
450
+ fsdp_config["fsdp_min_num_params"] = _ask_field(
451
+ "What should be your FSDP's minimum number of parameters for Default Auto Wrapping Policy? [1e8]: ",
452
+ int,
453
+ default=100000000,
454
+ )
455
+ # Removed in FSDP2, ask for user input for FSDP1
456
+ if fsdp_version == 1:
457
+ fsdp_backward_prefetch_query = "What should be your FSDP's backward prefetch policy?"
458
+ fsdp_config["fsdp_backward_prefetch"] = _ask_options(
459
+ fsdp_backward_prefetch_query,
460
+ FSDP_BACKWARD_PREFETCH,
461
+ lambda x: FSDP_BACKWARD_PREFETCH[int(x)],
462
+ )
463
+
464
+ fsdp_state_dict_type_query = "What should be your FSDP's state dict type?"
465
+ fsdp_config["fsdp_state_dict_type"] = _ask_options(
466
+ fsdp_state_dict_type_query,
467
+ FSDP_STATE_DICT_TYPE if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE,
468
+ lambda x: FSDP_STATE_DICT_TYPE[int(x)] if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE[int(x)],
469
+ default=0,
470
+ )
471
+ # Not implemented in FSDP2, ask for user input for FSDP1
472
+ if fsdp_version == 1:
473
+ fsdp_config["fsdp_forward_prefetch"] = _ask_field(
474
+ "Do you want to enable FSDP's forward prefetch policy? [yes/NO]: ",
475
+ _convert_yes_no_to_bool,
476
+ default=False,
477
+ error_message="Please enter yes or no.",
478
+ )
479
+ # Obsolete in FSDP2, ask for user input for FSDP1
480
+ if fsdp_version == 1:
481
+ fsdp_config["fsdp_use_orig_params"] = _ask_field(
482
+ "Do you want to enable FSDP's `use_orig_params` feature? [YES/no]: ",
483
+ _convert_yes_no_to_bool,
484
+ default=True,
485
+ error_message="Please enter yes or no.",
486
+ )
487
+ fsdp_config["fsdp_cpu_ram_efficient_loading"] = _ask_field(
488
+ "Do you want to enable CPU RAM efficient model loading? Only applicable for 🤗 Transformers models. [YES/no]: ",
489
+ _convert_yes_no_to_bool,
490
+ default=True,
491
+ error_message="Please enter yes or no.",
492
+ )
493
+ # Obsolete in FSDP2, ask for user input for FSDP1
494
+ if fsdp_version == 1:
495
+ if fsdp_config["fsdp_cpu_ram_efficient_loading"]:
496
+ fsdp_config["fsdp_sync_module_states"] = True
497
+ else:
498
+ fsdp_config["fsdp_sync_module_states"] = _ask_field(
499
+ "Do you want each individually wrapped FSDP unit to broadcast module parameters from rank 0 at the start? [YES/no]: ",
500
+ _convert_yes_no_to_bool,
501
+ default=True,
502
+ error_message="Please enter yes or no.",
503
+ )
504
+ fsdp_config["fsdp_activation_checkpointing"] = _ask_field(
505
+ "Do you want to enable FSDP activation checkpointing? [yes/NO]: ",
506
+ _convert_yes_no_to_bool,
507
+ default=False,
508
+ error_message="Please enter yes or no.",
509
+ )
510
+
511
+ parallelism_config = {}
512
+
513
+ if fsdp_config.get("fsdp_version", 1) == 2:
514
+ use_parallelism_config = _ask_field(
515
+ "Do you want to use the parallelism config? [yes/NO]: ",
516
+ _convert_yes_no_to_bool,
517
+ default=False,
518
+ error_message="Please enter yes or no.",
519
+ )
520
+
521
+ if use_parallelism_config:
522
+ prefix = "parallelism_config_"
523
+ parallelism_config[prefix + "dp_replicate_size"] = _ask_field(
524
+ "What is the data parallelism replicate size? [1]: ",
525
+ int,
526
+ default=1,
527
+ error_message="Please enter an integer.",
528
+ )
529
+
530
+ parallelism_config[prefix + "dp_shard_size"] = _ask_field(
531
+ "What is the FSDP shard size? [1]: ",
532
+ int,
533
+ default=1,
534
+ error_message="Please enter an integer.",
535
+ )
536
+
537
+ parallelism_config[prefix + "tp_size"] = _ask_field(
538
+ "What is the tensor parallelism size? [1]: ",
539
+ int,
540
+ default=1,
541
+ error_message="Please enter an integer.",
542
+ )
543
+
544
+ parallelism_config[prefix + "cp_size"] = _ask_field(
545
+ "What is the context parallelism size? [1]: ",
546
+ int,
547
+ default=1,
548
+ error_message="Please enter an integer.",
549
+ )
550
+ if parallelism_config[prefix + "cp_size"] > 1:
551
+ parallelism_config[prefix + "cp_comm_strategy"] = _ask_options(
552
+ "What is the compute parallelism communication strategy?",
553
+ ["allgather", "alltoall"],
554
+ lambda x: ["allgather", "alltoall"][int(x)],
555
+ default=0,
556
+ )
557
+
558
+ megatron_lm_config = {}
559
+ if distributed_type in [DistributedType.MULTI_GPU]:
560
+ use_megatron_lm = _ask_field(
561
+ "Do you want to use Megatron-LM ? [yes/NO]: ",
562
+ _convert_yes_no_to_bool,
563
+ default=False,
564
+ error_message="Please enter yes or no.",
565
+ )
566
+ if use_megatron_lm:
567
+ distributed_type = DistributedType.MEGATRON_LM
568
+ if distributed_type == DistributedType.MEGATRON_LM:
569
+ prefix = "megatron_lm_"
570
+ megatron_lm_config[prefix + "tp_degree"] = _ask_field(
571
+ "What is the Tensor Parallelism degree/size? [1]:",
572
+ int,
573
+ default=1,
574
+ error_message="Please enter an integer.",
575
+ )
576
+ if megatron_lm_config[prefix + "tp_degree"] > 1:
577
+ megatron_lm_config[prefix + "sequence_parallelism"] = _ask_field(
578
+ "Do you want to enable Sequence Parallelism? [YES/no]: ",
579
+ _convert_yes_no_to_bool,
580
+ default=True,
581
+ error_message="Please enter yes or no.",
582
+ )
583
+
584
+ megatron_lm_config[prefix + "pp_degree"] = _ask_field(
585
+ "What is the Pipeline Parallelism degree/size? [1]:",
586
+ int,
587
+ default=1,
588
+ error_message="Please enter an integer.",
589
+ )
590
+ if megatron_lm_config[prefix + "pp_degree"] > 1:
591
+ megatron_lm_config[prefix + "num_micro_batches"] = _ask_field(
592
+ "What is the number of micro-batches? [1]:",
593
+ int,
594
+ default=1,
595
+ error_message="Please enter an integer.",
596
+ )
597
+
598
+ megatron_lm_config[prefix + "recompute_activations"] = _ask_field(
599
+ "Do you want to enable selective activation recomputation? [YES/no]: ",
600
+ _convert_yes_no_to_bool,
601
+ default=True,
602
+ error_message="Please enter yes or no.",
603
+ )
604
+
605
+ megatron_lm_config[prefix + "use_distributed_optimizer"] = _ask_field(
606
+ "Do you want to use distributed optimizer "
607
+ "which shards optimizer state and gradients across data parallel ranks? [YES/no]: ",
608
+ _convert_yes_no_to_bool,
609
+ default=True,
610
+ error_message="Please enter yes or no.",
611
+ )
612
+
613
+ megatron_lm_config[prefix + "gradient_clipping"] = _ask_field(
614
+ "What is the gradient clipping value based on global L2 Norm (0 to disable)? [1.0]: ",
615
+ float,
616
+ default=1.0,
617
+ )
618
+ # TPU specific defaults
619
+ tpu_commands = None
620
+ tpu_command_file = None
621
+ tpu_downcast_bf16 = "no"
622
+ tpu_env = []
623
+ tpu_name = None
624
+ tpu_vm = None
625
+ tpu_zone = None
626
+ tpu_use_sudo = False
627
+ tpu_use_cluster = False
628
+
629
+ if distributed_type in [
630
+ DistributedType.MULTI_CPU,
631
+ DistributedType.MULTI_XPU,
632
+ DistributedType.MULTI_HPU,
633
+ DistributedType.MULTI_GPU,
634
+ DistributedType.MULTI_MLU,
635
+ DistributedType.MULTI_SDAA,
636
+ DistributedType.MULTI_MUSA,
637
+ DistributedType.MULTI_NPU,
638
+ DistributedType.MULTI_NEURON,
639
+ DistributedType.XLA,
640
+ ]:
641
+ machine_type = str(distributed_type).split(".")[1].replace("MULTI_", "")
642
+ if machine_type in ["TPU", "NEURON"]:
643
+ machine_type += " cores"
644
+ elif machine_type == "CPU":
645
+ machine_type = "processes"
646
+ else:
647
+ machine_type += "(s)"
648
+ num_processes = _ask_field(
649
+ f"How many {machine_type} should be used for distributed training? [1]:",
650
+ int,
651
+ default=1,
652
+ error_message="Please enter an integer.",
653
+ )
654
+ elif distributed_type in [DistributedType.FSDP, DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM]:
655
+ num_processes = _ask_field(
656
+ "How many GPU(s) should be used for distributed training? [1]:",
657
+ int,
658
+ default=1,
659
+ error_message="Please enter an integer.",
660
+ )
661
+ else:
662
+ num_processes = 1
663
+
664
+ if (distributed_type == DistributedType.MULTI_GPU) and (num_machines == 1) and (num_processes == 1):
665
+ raise ValueError(
666
+ f"Specified distributed type {distributed_type} but only using 1 GPU on a single machine. Please select `No distributed training` for the type of machine you are using."
667
+ )
668
+
669
+ if (
670
+ distributed_type
671
+ in [
672
+ DistributedType.MULTI_GPU,
673
+ DistributedType.MULTI_MLU,
674
+ DistributedType.MULTI_SDAA,
675
+ DistributedType.MULTI_MUSA,
676
+ DistributedType.MULTI_NPU,
677
+ DistributedType.MULTI_XPU,
678
+ DistributedType.MULTI_HPU,
679
+ DistributedType.MULTI_NEURON,
680
+ DistributedType.NO,
681
+ ]
682
+ and not use_cpu
683
+ and not use_mps
684
+ ):
685
+ if is_npu_available():
686
+ machine_type = "NPU(s)"
687
+ elif is_mlu_available():
688
+ machine_type = "MLU(s)"
689
+ elif is_sdaa_available():
690
+ machine_type = "SDAA(s)"
691
+ elif is_musa_available():
692
+ machine_type = "MUSA(s)"
693
+ elif is_xpu_available():
694
+ machine_type = "XPU(s)"
695
+ elif is_hpu_available():
696
+ machine_type = "HPU(s)"
697
+ elif is_neuron_available():
698
+ machine_type = "Neuron cores"
699
+ else:
700
+ machine_type = "GPU(s)"
701
+ gpu_ids = _ask_field(
702
+ f"What {machine_type} (by id) should be used for training on this machine as a comma-separated list? [all]:",
703
+ default="all",
704
+ )
705
+
706
+ # CPU affinity is only supported on NVIDIA hardware for now
707
+ enable_cpu_affinity = False
708
+ if distributed_type in (DistributedType.NO, DistributedType.MULTI_GPU) and not use_cpu and not use_mps:
709
+ enable_cpu_affinity = _ask_field(
710
+ "Would you like to enable numa efficiency? (Currently only supported on NVIDIA hardware). [yes/NO]: ",
711
+ _convert_yes_no_to_bool,
712
+ default=False,
713
+ error_message="Please enter yes or no.",
714
+ )
715
+
716
+ fp8_config = None
717
+ if distributed_type == DistributedType.XLA:
718
+ mixed_precision = "no"
719
+ main_training_function = _ask_field(
720
+ "What is the name of the function in your script that should be launched in all parallel scripts? [main]: ",
721
+ default="main",
722
+ )
723
+ tpu_use_cluster = _ask_field(
724
+ "Are you using a TPU cluster? [yes/NO]: ",
725
+ _convert_yes_no_to_bool,
726
+ default=False,
727
+ error_message="Please enter yes or no.",
728
+ )
729
+ if tpu_use_cluster:
730
+ tpu_name = _ask_field(
731
+ "What is the name of your TPU cluster? ",
732
+ default=None,
733
+ error_message="Please enter the name of your TPU cluster.",
734
+ )
735
+ tpu_zone = _ask_field(
736
+ "What is the zone of your TPU cluster? ",
737
+ default=None,
738
+ error_message="Please enter the zone of your TPU cluster.",
739
+ )
740
+ tpu_use_sudo = _ask_field(
741
+ "To run a python script in a TPU pod, should `sudo` be used? [yes/NO]: ",
742
+ default=False,
743
+ error_message="Please enter yes or no.",
744
+ )
745
+ run_commands = _ask_field(
746
+ "Do you have code you wish to run on startup in each pod? [yes/NO]: ",
747
+ _convert_yes_no_to_bool,
748
+ default=False,
749
+ error_message="Please enter yes or no.",
750
+ )
751
+ if run_commands:
752
+ use_command_file = _ask_field(
753
+ "Is this code located in a bash script? [yes/NO]: ",
754
+ _convert_yes_no_to_bool,
755
+ default=False,
756
+ error_message="Please enter yes or no.",
757
+ )
758
+ if use_command_file:
759
+ tpu_command_file = _ask_field(
760
+ "What is the path to your bash script? ",
761
+ default=None,
762
+ error_message="Please enter the path to your bash script.",
763
+ )
764
+ tpu_command_file = os.path.abspath(tpu_command_file)
765
+ else:
766
+ print("Please enter each command separately you wish to run on startup in each pod.")
767
+ tpu_commands = []
768
+ another_command = True
769
+ while another_command:
770
+ tpu_commands.append(
771
+ _ask_field(
772
+ "Please enter a single command to be ran ",
773
+ default=None,
774
+ error_message="Please enter the commands you wish to run on startup in each pod as a single string.",
775
+ )
776
+ )
777
+ another_command = _ask_field(
778
+ "Do you wish to add another command? [yes/NO]: ",
779
+ _convert_yes_no_to_bool,
780
+ default=False,
781
+ error_message="Please enter yes or no.",
782
+ )
783
+ tpu_vm = _ask_field(
784
+ "If not using an instance group, what are the names of the Compute VM instances to be used, separated by a comma: ",
785
+ default="",
786
+ ).split(",")
787
+ tpu_env = _ask_field(
788
+ "What environment variables do you wish to set in each pod, separated by a comma: ",
789
+ default="",
790
+ ).split(",")
791
+
792
+ else:
793
+ main_training_function = "main"
794
+ if distributed_type == DistributedType.DEEPSPEED and use_deepspeed_config:
795
+ mixed_precision = None
796
+ else:
797
+ mixed_precision = _ask_options(
798
+ "Do you wish to use mixed precision?",
799
+ ["no", "fp16", "bf16", "fp8"],
800
+ _convert_mixed_precision,
801
+ )
802
+ if mixed_precision == "fp8":
803
+ if not is_fp8_available():
804
+ raise ValueError(
805
+ "FP8 (either torchao, Transformer Engine or MSAMP) is not installed on this machine."
806
+ )
807
+ fp8_config = {}
808
+ fp8_config["backend"] = _ask_options(
809
+ "Which FP8 backend do you want to use?",
810
+ ["ao", "te", "msamp"],
811
+ _convert_fp8_backend,
812
+ )
813
+ if fp8_config["backend"] == "TE":
814
+ if not is_transformer_engine_available():
815
+ raise ValueError("TransformersEngine was selected, but it is not installed on this machine.")
816
+ fp8_config["use_autocast_during_eval"] = _ask_field(
817
+ "Do you want to use FP8 autocast during eval mode? Generally better metrics are found when this is disabled [yes/NO]: ",
818
+ _convert_yes_no_to_bool,
819
+ default=False,
820
+ )
821
+ fp8_config["margin"] = _ask_field(
822
+ "What margin should be used for gradient scaling? [0]: ",
823
+ int,
824
+ default=0,
825
+ )
826
+ fp8_config["interval"] = _ask_field(
827
+ "What interval should be used for for how often the scaling factor is recomputed? [1]: ",
828
+ int,
829
+ default=1,
830
+ )
831
+ fp8_config["fp8_format"] = _ask_options(
832
+ "Which weight format should be used?",
833
+ ["HYBRID", "E4M3", "E5M2"],
834
+ lambda i: ["HYBRID", "E4M3", "E5M2"][i],
835
+ default=0,
836
+ )
837
+ fp8_config["amax_history_length"] = _ask_field(
838
+ "What length of history should be used for the amax scaling factor computation? [1024]: ",
839
+ int,
840
+ default=1024,
841
+ )
842
+ fp8_config["amax_compute_algorithm"] = _ask_options(
843
+ "Which algorithm should be used for the amax scaling factor computation?",
844
+ ["max", "most_recent"],
845
+ lambda x: "max" if x == 0 else "most_recent",
846
+ default=0,
847
+ )
848
+ fp8_config["override_linear_precision"] = _ask_field(
849
+ "Do you want to to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision? [yes/NO]: ",
850
+ _convert_yes_no_to_bool,
851
+ default=False,
852
+ )
853
+ if fp8_config["override_linear_precision"]:
854
+ fprop = _ask_field(
855
+ "Should `fprop` be executed in higher precision? [yes/NO]: ",
856
+ _convert_yes_no_to_bool,
857
+ default=False,
858
+ )
859
+ dgrad = _ask_field(
860
+ "Should `dgrad` be executed in higher precision? [yes/NO]: ",
861
+ _convert_yes_no_to_bool,
862
+ default=False,
863
+ )
864
+ wgrad = _ask_field(
865
+ "Should `wgrad` be executed in higher precision? [yes/NO]: ",
866
+ _convert_yes_no_to_bool,
867
+ default=False,
868
+ )
869
+ fp8_config["override_linear_precision"] = (fprop, dgrad, wgrad)
870
+ else:
871
+ fp8_config["override_linear_precision"] = (False, False, False)
872
+
873
+ elif fp8_config["backend"] == "MSAMP":
874
+ if not is_msamp_available():
875
+ raise ValueError("MSAMP was selected, but it is not installed on this machine.")
876
+ fp8_config["optimization_level"] = _ask_options(
877
+ "Which optimization level should be used?",
878
+ ["O1", "O2"],
879
+ lambda x: "O1" if x == 0 else "O2",
880
+ default=1,
881
+ )
882
+
883
+ elif fp8_config["backend"] == "AO":
884
+ if not is_torchao_available():
885
+ raise ValueError("torchao was selected, but it is not installed on this machine.")
886
+ fp8_config["enable_fsdp_float8_all_gather"] = _ask_field(
887
+ "Do you want to enable FSDP2 float8 all gather? This is recommended for better performance if using FSDP2. [YES/no]: ",
888
+ _convert_yes_no_to_bool,
889
+ default=True,
890
+ )
891
+ fp8_config["pad_inner_dim"] = _ask_field(
892
+ "Do you want to pad the inner dimension of weight matrices before float8 matmuls? This is required for _scaled_mm which has strict alignment requirements. Note: padding may cause memory spikes. [YES/no]: ",
893
+ _convert_yes_no_to_bool,
894
+ default=True,
895
+ )
896
+
897
+ if use_dynamo and mixed_precision == "no" and not use_cpu:
898
+ print(
899
+ "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
900
+ )
901
+
902
+ if distributed_type == DistributedType.XLA and mixed_precision == "bf16":
903
+ tpu_downcast_bf16 = _ask_field(
904
+ "Should `torch.float` be cast as `bfloat16` and `torch.double` remain `float32` on TPUs?", default="no"
905
+ )
906
+
907
+ return ClusterConfig(
908
+ compute_environment=ComputeEnvironment.LOCAL_MACHINE,
909
+ distributed_type=distributed_type,
910
+ num_processes=num_processes,
911
+ gpu_ids=gpu_ids,
912
+ mixed_precision=mixed_precision,
913
+ downcast_bf16=tpu_downcast_bf16,
914
+ machine_rank=machine_rank,
915
+ num_machines=num_machines,
916
+ main_process_ip=main_process_ip,
917
+ main_process_port=main_process_port,
918
+ main_training_function=main_training_function,
919
+ fp8_config=fp8_config,
920
+ deepspeed_config=deepspeed_config,
921
+ fsdp_config=fsdp_config,
922
+ parallelism_config=parallelism_config,
923
+ megatron_lm_config=megatron_lm_config,
924
+ mpirun_config=mpirun_config,
925
+ use_cpu=use_cpu,
926
+ rdzv_backend=rdzv_backend,
927
+ same_network=same_network,
928
+ commands=tpu_commands,
929
+ command_file=tpu_command_file,
930
+ tpu_env=tpu_env,
931
+ tpu_name=tpu_name,
932
+ tpu_vm=tpu_vm,
933
+ tpu_zone=tpu_zone,
934
+ tpu_use_sudo=tpu_use_sudo,
935
+ tpu_use_cluster=tpu_use_cluster,
936
+ dynamo_config=dynamo_config,
937
+ debug=debug,
938
+ enable_cpu_affinity=enable_cpu_affinity,
939
+ )
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+ import os
19
+
20
+ from accelerate.utils import ComputeEnvironment
21
+
22
+ from .cluster import get_cluster_input
23
+ from .config_args import cache_dir, default_config_file, default_yaml_config_file, load_config_from_file # noqa: F401
24
+ from .config_utils import _ask_field, _ask_options, _convert_compute_environment # noqa: F401
25
+ from .sagemaker import get_sagemaker_input
26
+
27
+
28
+ description = "Launches a series of prompts to create and save a `default_config.yaml` configuration file for your training system. Should always be ran first on your machine"
29
+
30
+
31
+ def get_user_input():
32
+ compute_environment = _ask_options(
33
+ "In which compute environment are you running?",
34
+ ["This machine", "AWS (Amazon SageMaker)"],
35
+ _convert_compute_environment,
36
+ )
37
+ if compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
38
+ config = get_sagemaker_input()
39
+ else:
40
+ config = get_cluster_input()
41
+ return config
42
+
43
+
44
+ def config_command_parser(subparsers=None):
45
+ if subparsers is not None:
46
+ parser = subparsers.add_parser("config", description=description)
47
+ else:
48
+ parser = argparse.ArgumentParser("Accelerate config command", description=description)
49
+
50
+ parser.add_argument(
51
+ "--config_file",
52
+ default=None,
53
+ help=(
54
+ "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
55
+ "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
56
+ "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
57
+ "with 'huggingface'."
58
+ ),
59
+ )
60
+
61
+ if subparsers is not None:
62
+ parser.set_defaults(func=config_command)
63
+ return parser
64
+
65
+
66
+ def config_command(args):
67
+ config = get_user_input()
68
+ if args.config_file is not None:
69
+ config_file = args.config_file
70
+ else:
71
+ if not os.path.isdir(cache_dir):
72
+ os.makedirs(cache_dir)
73
+ config_file = default_yaml_config_file
74
+
75
+ if config_file.endswith(".json"):
76
+ config.to_json_file(config_file)
77
+ else:
78
+ config.to_yaml_file(config_file)
79
+ print(f"accelerate configuration saved at {config_file}")
80
+
81
+
82
+ def main():
83
+ parser = config_command_parser()
84
+ args = parser.parse_args()
85
+ config_command(args)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ main()
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_args.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import json
18
+ import os
19
+ from dataclasses import dataclass
20
+ from enum import Enum
21
+ from typing import Optional, Union
22
+
23
+ import yaml
24
+
25
+ from ...utils import ComputeEnvironment, DistributedType, SageMakerDistributedType
26
+ from ...utils.constants import SAGEMAKER_PYTHON_VERSION, SAGEMAKER_PYTORCH_VERSION, SAGEMAKER_TRANSFORMERS_VERSION
27
+
28
+
29
+ hf_cache_home = os.path.expanduser(
30
+ os.environ.get("HF_HOME", os.path.join(os.environ.get("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
31
+ )
32
+ cache_dir = os.path.join(hf_cache_home, "accelerate")
33
+ default_json_config_file = os.path.join(cache_dir, "default_config.yaml")
34
+ default_yaml_config_file = os.path.join(cache_dir, "default_config.yaml")
35
+
36
+ # For backward compatibility: the default config is the json one if it's the only existing file.
37
+ if os.path.isfile(default_yaml_config_file) or not os.path.isfile(default_json_config_file):
38
+ default_config_file = default_yaml_config_file
39
+ else:
40
+ default_config_file = default_json_config_file
41
+
42
+
43
+ def load_config_from_file(config_file):
44
+ if config_file is not None:
45
+ if not os.path.isfile(config_file):
46
+ raise FileNotFoundError(
47
+ f"The passed configuration file `{config_file}` does not exist. "
48
+ "Please pass an existing file to `accelerate launch`, or use the default one "
49
+ "created through `accelerate config` and run `accelerate launch` "
50
+ "without the `--config_file` argument."
51
+ )
52
+ else:
53
+ config_file = default_config_file
54
+ with open(config_file, encoding="utf-8") as f:
55
+ if config_file.endswith(".json"):
56
+ if (
57
+ json.load(f).get("compute_environment", ComputeEnvironment.LOCAL_MACHINE)
58
+ == ComputeEnvironment.LOCAL_MACHINE
59
+ ):
60
+ config_class = ClusterConfig
61
+ else:
62
+ config_class = SageMakerConfig
63
+ return config_class.from_json_file(json_file=config_file)
64
+ else:
65
+ if (
66
+ yaml.safe_load(f).get("compute_environment", ComputeEnvironment.LOCAL_MACHINE)
67
+ == ComputeEnvironment.LOCAL_MACHINE
68
+ ):
69
+ config_class = ClusterConfig
70
+ else:
71
+ config_class = SageMakerConfig
72
+ return config_class.from_yaml_file(yaml_file=config_file)
73
+
74
+
75
+ @dataclass
76
+ class BaseConfig:
77
+ compute_environment: ComputeEnvironment
78
+ distributed_type: Union[DistributedType, SageMakerDistributedType]
79
+ mixed_precision: str
80
+ use_cpu: bool
81
+ debug: bool
82
+
83
+ def to_dict(self):
84
+ result = self.__dict__
85
+ # For serialization, it's best to convert Enums to strings (or their underlying value type).
86
+
87
+ def _convert_enums(value):
88
+ if isinstance(value, Enum):
89
+ return value.value
90
+ if isinstance(value, dict):
91
+ if not bool(value):
92
+ return None
93
+ for key1, value1 in value.items():
94
+ value[key1] = _convert_enums(value1)
95
+ return value
96
+
97
+ for key, value in result.items():
98
+ result[key] = _convert_enums(value)
99
+ result = {k: v for k, v in result.items() if v is not None}
100
+ return result
101
+
102
+ @staticmethod
103
+ def process_config(config_dict):
104
+ """
105
+ Processes `config_dict` and sets default values for any missing keys
106
+ """
107
+ if "compute_environment" not in config_dict:
108
+ config_dict["compute_environment"] = ComputeEnvironment.LOCAL_MACHINE
109
+ if "distributed_type" not in config_dict:
110
+ raise ValueError("A `distributed_type` must be specified in the config file.")
111
+ if "num_processes" not in config_dict and config_dict["distributed_type"] == DistributedType.NO:
112
+ config_dict["num_processes"] = 1
113
+ if "mixed_precision" not in config_dict:
114
+ config_dict["mixed_precision"] = "fp16" if ("fp16" in config_dict and config_dict["fp16"]) else None
115
+ if "fp16" in config_dict: # Convert the config to the new format.
116
+ del config_dict["fp16"]
117
+ if "dynamo_backend" in config_dict: # Convert the config to the new format.
118
+ dynamo_backend = config_dict.pop("dynamo_backend")
119
+ config_dict["dynamo_config"] = {} if dynamo_backend == "NO" else {"dynamo_backend": dynamo_backend}
120
+ if "use_cpu" not in config_dict:
121
+ config_dict["use_cpu"] = False
122
+ if "debug" not in config_dict:
123
+ config_dict["debug"] = False
124
+ if "enable_cpu_affinity" not in config_dict:
125
+ config_dict["enable_cpu_affinity"] = False
126
+ return config_dict
127
+
128
+ @classmethod
129
+ def from_json_file(cls, json_file=None):
130
+ json_file = default_json_config_file if json_file is None else json_file
131
+ with open(json_file, encoding="utf-8") as f:
132
+ config_dict = json.load(f)
133
+ config_dict = cls.process_config(config_dict)
134
+ extra_keys = sorted(set(config_dict.keys()) - set(cls.__dataclass_fields__.keys()))
135
+ if len(extra_keys) > 0:
136
+ raise ValueError(
137
+ f"The config file at {json_file} had unknown keys ({extra_keys}), please try upgrading your `accelerate`"
138
+ " version or fix (and potentially remove) these keys from your config file."
139
+ )
140
+
141
+ return cls(**config_dict)
142
+
143
+ def to_json_file(self, json_file):
144
+ with open(json_file, "w", encoding="utf-8") as f:
145
+ content = json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
146
+ f.write(content)
147
+
148
+ @classmethod
149
+ def from_yaml_file(cls, yaml_file=None):
150
+ yaml_file = default_yaml_config_file if yaml_file is None else yaml_file
151
+ with open(yaml_file, encoding="utf-8") as f:
152
+ config_dict = yaml.safe_load(f)
153
+ config_dict = cls.process_config(config_dict)
154
+ extra_keys = sorted(set(config_dict.keys()) - set(cls.__dataclass_fields__.keys()))
155
+ if len(extra_keys) > 0:
156
+ raise ValueError(
157
+ f"The config file at {yaml_file} had unknown keys ({extra_keys}), please try upgrading your `accelerate`"
158
+ " version or fix (and potentially remove) these keys from your config file."
159
+ )
160
+ return cls(**config_dict)
161
+
162
+ def to_yaml_file(self, yaml_file):
163
+ with open(yaml_file, "w", encoding="utf-8") as f:
164
+ yaml.safe_dump(self.to_dict(), f)
165
+
166
+ def __post_init__(self):
167
+ if isinstance(self.compute_environment, str):
168
+ self.compute_environment = ComputeEnvironment(self.compute_environment)
169
+ if isinstance(self.distributed_type, str):
170
+ if self.compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
171
+ self.distributed_type = SageMakerDistributedType(self.distributed_type)
172
+ else:
173
+ self.distributed_type = DistributedType(self.distributed_type)
174
+ if getattr(self, "dynamo_config", None) is None:
175
+ self.dynamo_config = {}
176
+
177
+
178
+ @dataclass
179
+ class ClusterConfig(BaseConfig):
180
+ num_processes: int = -1 # For instance if we use SLURM and the user manually passes it in
181
+ machine_rank: int = 0
182
+ num_machines: int = 1
183
+ gpu_ids: Optional[str] = None
184
+ main_process_ip: Optional[str] = None
185
+ main_process_port: Optional[int] = None
186
+ rdzv_backend: Optional[str] = "static"
187
+ same_network: Optional[bool] = False
188
+ main_training_function: str = "main"
189
+ enable_cpu_affinity: bool = False
190
+
191
+ # args for FP8 training
192
+ fp8_config: Optional[dict] = None
193
+ # args for deepspeed_plugin
194
+ deepspeed_config: Optional[dict] = None
195
+ # args for fsdp
196
+ fsdp_config: Optional[dict] = None
197
+ # args for parallelism config
198
+ parallelism_config: Optional[dict] = None
199
+ # args for megatron_lm
200
+ megatron_lm_config: Optional[dict] = None
201
+ # args for mpirun
202
+ mpirun_config: Optional[dict] = None
203
+ # args for TPU
204
+ downcast_bf16: bool = False
205
+
206
+ # args for TPU pods
207
+ tpu_name: Optional[str] = None
208
+ tpu_zone: Optional[str] = None
209
+ tpu_use_cluster: bool = False
210
+ tpu_use_sudo: bool = False
211
+ command_file: Optional[str] = None
212
+ commands: list[str] = None
213
+ tpu_vm: list[str] = None
214
+ tpu_env: list[str] = None
215
+
216
+ # args for dynamo
217
+ dynamo_config: Optional[dict] = None
218
+
219
+ def __post_init__(self):
220
+ if self.deepspeed_config is None:
221
+ self.deepspeed_config = {}
222
+ if self.fsdp_config is None:
223
+ self.fsdp_config = {}
224
+ if self.megatron_lm_config is None:
225
+ self.megatron_lm_config = {}
226
+ if self.mpirun_config is None:
227
+ self.mpirun_config = {}
228
+ if self.fp8_config is None:
229
+ self.fp8_config = {}
230
+ if self.parallelism_config is None:
231
+ self.parallelism_config = {}
232
+ return super().__post_init__()
233
+
234
+
235
+ @dataclass
236
+ class SageMakerConfig(BaseConfig):
237
+ ec2_instance_type: str
238
+ iam_role_name: str
239
+ image_uri: Optional[str] = None
240
+ profile: Optional[str] = None
241
+ region: str = "us-east-1"
242
+ num_machines: int = 1
243
+ gpu_ids: str = "all"
244
+ base_job_name: str = f"accelerate-sagemaker-{num_machines}"
245
+ pytorch_version: str = SAGEMAKER_PYTORCH_VERSION
246
+ transformers_version: str = SAGEMAKER_TRANSFORMERS_VERSION
247
+ py_version: str = SAGEMAKER_PYTHON_VERSION
248
+ sagemaker_inputs_file: Optional[str] = None
249
+ sagemaker_metrics_file: Optional[str] = None
250
+ additional_args: Optional[dict] = None
251
+ dynamo_config: Optional[dict] = None
252
+ enable_cpu_affinity: bool = False
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_utils.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+
19
+ from ...utils.dataclasses import (
20
+ ComputeEnvironment,
21
+ DistributedType,
22
+ DynamoBackend,
23
+ FP8BackendType,
24
+ PrecisionType,
25
+ SageMakerDistributedType,
26
+ )
27
+ from ..menu import BulletMenu
28
+
29
+
30
+ DYNAMO_BACKENDS = [
31
+ "EAGER",
32
+ "AOT_EAGER",
33
+ "INDUCTOR",
34
+ "AOT_TS_NVFUSER",
35
+ "NVPRIMS_NVFUSER",
36
+ "CUDAGRAPHS",
37
+ "OFI",
38
+ "FX2TRT",
39
+ "ONNXRT",
40
+ "TENSORRT",
41
+ "AOT_TORCHXLA_TRACE_ONCE",
42
+ "TORHCHXLA_TRACE_ONCE",
43
+ "TVM",
44
+ ]
45
+
46
+
47
+ def _ask_field(input_text, convert_value=None, default=None, error_message=None):
48
+ ask_again = True
49
+ while ask_again:
50
+ result = input(input_text)
51
+ try:
52
+ if default is not None and len(result) == 0:
53
+ return default
54
+ return convert_value(result) if convert_value is not None else result
55
+ except Exception:
56
+ if error_message is not None:
57
+ print(error_message)
58
+
59
+
60
+ def _ask_options(input_text, options=[], convert_value=None, default=0):
61
+ menu = BulletMenu(input_text, options)
62
+ result = menu.run(default_choice=default)
63
+ return convert_value(result) if convert_value is not None else result
64
+
65
+
66
+ def _convert_compute_environment(value):
67
+ value = int(value)
68
+ return ComputeEnvironment(["LOCAL_MACHINE", "AMAZON_SAGEMAKER"][value])
69
+
70
+
71
+ def _convert_distributed_mode(value):
72
+ value = int(value)
73
+ return DistributedType(
74
+ [
75
+ "NO",
76
+ "MULTI_CPU",
77
+ "MULTI_XPU",
78
+ "MULTI_HPU",
79
+ "MULTI_GPU",
80
+ "MULTI_NPU",
81
+ "MULTI_MLU",
82
+ "MULTI_SDAA",
83
+ "MULTI_MUSA",
84
+ "MULTI_NEURON",
85
+ "XLA",
86
+ ][value]
87
+ )
88
+
89
+
90
+ def _convert_dynamo_backend(value):
91
+ value = int(value)
92
+ return DynamoBackend(DYNAMO_BACKENDS[value]).value
93
+
94
+
95
+ def _convert_mixed_precision(value):
96
+ value = int(value)
97
+ return PrecisionType(["no", "fp16", "bf16", "fp8"][value])
98
+
99
+
100
+ def _convert_sagemaker_distributed_mode(value):
101
+ value = int(value)
102
+ return SageMakerDistributedType(["NO", "DATA_PARALLEL", "MODEL_PARALLEL"][value])
103
+
104
+
105
+ def _convert_fp8_backend(value):
106
+ value = int(value)
107
+ return FP8BackendType(["AO", "TE", "MSAMP"][value])
108
+
109
+
110
+ def _convert_yes_no_to_bool(value):
111
+ return {"yes": True, "no": False}[value.lower()]
112
+
113
+
114
+ class SubcommandHelpFormatter(argparse.RawDescriptionHelpFormatter):
115
+ """
116
+ A custom formatter that will remove the usage line from the help message for subcommands.
117
+ """
118
+
119
+ def _format_usage(self, usage, actions, groups, prefix):
120
+ usage = super()._format_usage(usage, actions, groups, prefix)
121
+ usage = usage.replace("<command> [<args>] ", "")
122
+ return usage
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/default.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from pathlib import Path
18
+
19
+ import torch
20
+
21
+ from ...utils import (
22
+ is_hpu_available,
23
+ is_mlu_available,
24
+ is_musa_available,
25
+ is_neuron_available,
26
+ is_npu_available,
27
+ is_sdaa_available,
28
+ is_xpu_available,
29
+ )
30
+ from .config_args import ClusterConfig, default_json_config_file
31
+ from .config_utils import SubcommandHelpFormatter
32
+
33
+
34
+ description = "Create a default config file for Accelerate with only a few flags set."
35
+
36
+
37
+ def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file):
38
+ """
39
+ Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also
40
+ set CPU if it is a CPU-only machine.
41
+
42
+ Args:
43
+ mixed_precision (`str`, *optional*, defaults to "no"):
44
+ Mixed Precision to use. Should be one of "no", "fp16", or "bf16"
45
+ save_location (`str`, *optional*, defaults to `default_json_config_file`):
46
+ Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default
47
+ location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overridden by setting
48
+ the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`.
49
+ """
50
+ path = Path(save_location)
51
+ path.parent.mkdir(parents=True, exist_ok=True)
52
+ if path.exists():
53
+ print(
54
+ f"Configuration already exists at {save_location}, will not override. Run `accelerate config` manually or pass a different `save_location`."
55
+ )
56
+ return False
57
+ mixed_precision = mixed_precision.lower()
58
+ if mixed_precision not in ["no", "fp16", "bf16", "fp8"]:
59
+ raise ValueError(
60
+ f"`mixed_precision` should be one of 'no', 'fp16', 'bf16', or 'fp8'. Received {mixed_precision}"
61
+ )
62
+ config = {
63
+ "compute_environment": "LOCAL_MACHINE",
64
+ "mixed_precision": mixed_precision,
65
+ }
66
+ if is_mlu_available():
67
+ num_mlus = torch.mlu.device_count()
68
+ config["num_processes"] = num_mlus
69
+ config["use_cpu"] = False
70
+ if num_mlus > 1:
71
+ config["distributed_type"] = "MULTI_MLU"
72
+ else:
73
+ config["distributed_type"] = "NO"
74
+ if is_sdaa_available():
75
+ num_sdaas = torch.sdaa.device_count()
76
+ config["num_processes"] = num_sdaas
77
+ config["use_cpu"] = False
78
+ if num_sdaas > 1:
79
+ config["distributed_type"] = "MULTI_SDAA"
80
+ else:
81
+ config["distributed_type"] = "NO"
82
+ elif is_musa_available():
83
+ num_musas = torch.musa.device_count()
84
+ config["num_processes"] = num_musas
85
+ config["use_cpu"] = False
86
+ if num_musas > 1:
87
+ config["distributed_type"] = "MULTI_MUSA"
88
+ else:
89
+ config["distributed_type"] = "NO"
90
+ elif is_hpu_available():
91
+ num_hpus = torch.hpu.device_count()
92
+ config["num_processes"] = num_hpus
93
+ config["use_cpu"] = False
94
+ if num_hpus > 1:
95
+ config["distributed_type"] = "MULTI_HPU"
96
+ else:
97
+ config["distributed_type"] = "NO"
98
+ elif torch.cuda.is_available():
99
+ num_gpus = torch.cuda.device_count()
100
+ config["num_processes"] = num_gpus
101
+ config["use_cpu"] = False
102
+ if num_gpus > 1:
103
+ config["distributed_type"] = "MULTI_GPU"
104
+ else:
105
+ config["distributed_type"] = "NO"
106
+ elif is_xpu_available():
107
+ num_xpus = torch.xpu.device_count()
108
+ config["num_processes"] = num_xpus
109
+ config["use_cpu"] = False
110
+ if num_xpus > 1:
111
+ config["distributed_type"] = "MULTI_XPU"
112
+ else:
113
+ config["distributed_type"] = "NO"
114
+ elif is_npu_available():
115
+ num_npus = torch.npu.device_count()
116
+ config["num_processes"] = num_npus
117
+ config["use_cpu"] = False
118
+ if num_npus > 1:
119
+ config["distributed_type"] = "MULTI_NPU"
120
+ else:
121
+ config["distributed_type"] = "NO"
122
+ elif is_neuron_available():
123
+ num_neuron_cores = torch.neuron.device_count()
124
+ config["num_processes"] = num_neuron_cores
125
+ config["use_cpu"] = False
126
+ if num_neuron_cores > 1:
127
+ config["distributed_type"] = "MULTI_NEURON"
128
+ else:
129
+ config["distributed_type"] = "NO"
130
+ else:
131
+ num_xpus = 0
132
+ config["use_cpu"] = True
133
+ config["num_processes"] = 1
134
+ config["distributed_type"] = "NO"
135
+ config["debug"] = False
136
+ config["enable_cpu_affinity"] = False
137
+ config = ClusterConfig(**config)
138
+ config.to_json_file(path)
139
+ return path
140
+
141
+
142
+ def default_command_parser(parser, parents):
143
+ parser = parser.add_parser("default", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
144
+ parser.add_argument(
145
+ "--config_file",
146
+ default=default_json_config_file,
147
+ help=(
148
+ "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
149
+ "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
150
+ "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
151
+ "with 'huggingface'."
152
+ ),
153
+ dest="save_location",
154
+ )
155
+
156
+ parser.add_argument(
157
+ "--mixed_precision",
158
+ choices=["no", "fp16", "bf16"],
159
+ type=str,
160
+ help="Whether or not to use mixed precision training. "
161
+ "Choose between FP16 and BF16 (bfloat16) training. "
162
+ "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.",
163
+ default="no",
164
+ )
165
+ parser.set_defaults(func=default_config_command)
166
+ return parser
167
+
168
+
169
+ def default_config_command(args):
170
+ config_file = write_basic_config(args.mixed_precision, args.save_location)
171
+ if config_file:
172
+ print(f"accelerate configuration saved at {config_file}")
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/sagemaker.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import json
17
+ import os
18
+
19
+ from ...utils.constants import SAGEMAKER_PARALLEL_EC2_INSTANCES, TORCH_DYNAMO_MODES
20
+ from ...utils.dataclasses import ComputeEnvironment, SageMakerDistributedType
21
+ from ...utils.imports import is_boto3_available
22
+ from .config_args import SageMakerConfig
23
+ from .config_utils import (
24
+ DYNAMO_BACKENDS,
25
+ _ask_field,
26
+ _ask_options,
27
+ _convert_dynamo_backend,
28
+ _convert_mixed_precision,
29
+ _convert_sagemaker_distributed_mode,
30
+ _convert_yes_no_to_bool,
31
+ )
32
+
33
+
34
+ if is_boto3_available():
35
+ import boto3 # noqa: F401
36
+
37
+
38
+ def _create_iam_role_for_sagemaker(role_name):
39
+ iam_client = boto3.client("iam")
40
+
41
+ sagemaker_trust_policy = {
42
+ "Version": "2012-10-17",
43
+ "Statement": [
44
+ {"Effect": "Allow", "Principal": {"Service": "sagemaker.amazonaws.com"}, "Action": "sts:AssumeRole"}
45
+ ],
46
+ }
47
+ try:
48
+ # create the role, associated with the chosen trust policy
49
+ iam_client.create_role(
50
+ RoleName=role_name, AssumeRolePolicyDocument=json.dumps(sagemaker_trust_policy, indent=2)
51
+ )
52
+ policy_document = {
53
+ "Version": "2012-10-17",
54
+ "Statement": [
55
+ {
56
+ "Effect": "Allow",
57
+ "Action": [
58
+ "sagemaker:*",
59
+ "ecr:GetDownloadUrlForLayer",
60
+ "ecr:BatchGetImage",
61
+ "ecr:BatchCheckLayerAvailability",
62
+ "ecr:GetAuthorizationToken",
63
+ "cloudwatch:PutMetricData",
64
+ "cloudwatch:GetMetricData",
65
+ "cloudwatch:GetMetricStatistics",
66
+ "cloudwatch:ListMetrics",
67
+ "logs:CreateLogGroup",
68
+ "logs:CreateLogStream",
69
+ "logs:DescribeLogStreams",
70
+ "logs:PutLogEvents",
71
+ "logs:GetLogEvents",
72
+ "s3:CreateBucket",
73
+ "s3:ListBucket",
74
+ "s3:GetBucketLocation",
75
+ "s3:GetObject",
76
+ "s3:PutObject",
77
+ ],
78
+ "Resource": "*",
79
+ }
80
+ ],
81
+ }
82
+ # attach policy to role
83
+ iam_client.put_role_policy(
84
+ RoleName=role_name,
85
+ PolicyName=f"{role_name}_policy_permission",
86
+ PolicyDocument=json.dumps(policy_document, indent=2),
87
+ )
88
+ except iam_client.exceptions.EntityAlreadyExistsException:
89
+ print(f"role {role_name} already exists. Using existing one")
90
+
91
+
92
+ def _get_iam_role_arn(role_name):
93
+ iam_client = boto3.client("iam")
94
+ return iam_client.get_role(RoleName=role_name)["Role"]["Arn"]
95
+
96
+
97
+ def get_sagemaker_input():
98
+ credentials_configuration = _ask_options(
99
+ "How do you want to authorize?",
100
+ ["AWS Profile", "Credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) "],
101
+ int,
102
+ )
103
+ aws_profile = None
104
+ if credentials_configuration == 0:
105
+ aws_profile = _ask_field("Enter your AWS Profile name: [default] ", default="default")
106
+ os.environ["AWS_PROFILE"] = aws_profile
107
+ else:
108
+ print(
109
+ "Note you will need to provide AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY when you launch you training script with,"
110
+ "`accelerate launch --aws_access_key_id XXX --aws_secret_access_key YYY`"
111
+ )
112
+ aws_access_key_id = _ask_field("AWS Access Key ID: ")
113
+ os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
114
+
115
+ aws_secret_access_key = _ask_field("AWS Secret Access Key: ")
116
+ os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
117
+
118
+ aws_region = _ask_field("Enter your AWS Region: [us-east-1]", default="us-east-1")
119
+ os.environ["AWS_DEFAULT_REGION"] = aws_region
120
+
121
+ role_management = _ask_options(
122
+ "Do you already have an IAM Role for executing Amazon SageMaker Training Jobs?",
123
+ ["Provide IAM Role name", "Create new IAM role using credentials"],
124
+ int,
125
+ )
126
+ if role_management == 0:
127
+ iam_role_name = _ask_field("Enter your IAM role name: ")
128
+ else:
129
+ iam_role_name = "accelerate_sagemaker_execution_role"
130
+ print(f'Accelerate will create an iam role "{iam_role_name}" using the provided credentials')
131
+ _create_iam_role_for_sagemaker(iam_role_name)
132
+
133
+ is_custom_docker_image = _ask_field(
134
+ "Do you want to use custom Docker image? [yes/NO]: ",
135
+ _convert_yes_no_to_bool,
136
+ default=False,
137
+ error_message="Please enter yes or no.",
138
+ )
139
+ docker_image = None
140
+ if is_custom_docker_image:
141
+ docker_image = _ask_field("Enter your Docker image: ", lambda x: str(x).lower())
142
+
143
+ is_sagemaker_inputs_enabled = _ask_field(
144
+ "Do you want to provide SageMaker input channels with data locations? [yes/NO]: ",
145
+ _convert_yes_no_to_bool,
146
+ default=False,
147
+ error_message="Please enter yes or no.",
148
+ )
149
+ sagemaker_inputs_file = None
150
+ if is_sagemaker_inputs_enabled:
151
+ sagemaker_inputs_file = _ask_field(
152
+ "Enter the path to the SageMaker inputs TSV file with columns (channel_name, data_location): ",
153
+ lambda x: str(x).lower(),
154
+ )
155
+
156
+ is_sagemaker_metrics_enabled = _ask_field(
157
+ "Do you want to enable SageMaker metrics? [yes/NO]: ",
158
+ _convert_yes_no_to_bool,
159
+ default=False,
160
+ error_message="Please enter yes or no.",
161
+ )
162
+ sagemaker_metrics_file = None
163
+ if is_sagemaker_metrics_enabled:
164
+ sagemaker_metrics_file = _ask_field(
165
+ "Enter the path to the SageMaker metrics TSV file with columns (metric_name, metric_regex): ",
166
+ lambda x: str(x).lower(),
167
+ )
168
+
169
+ distributed_type = _ask_options(
170
+ "What is the distributed mode?",
171
+ ["No distributed training", "Data parallelism"],
172
+ _convert_sagemaker_distributed_mode,
173
+ )
174
+ dynamo_config = {}
175
+ use_dynamo = _ask_field(
176
+ "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
177
+ _convert_yes_no_to_bool,
178
+ default=False,
179
+ error_message="Please enter yes or no.",
180
+ )
181
+ if use_dynamo:
182
+ prefix = "dynamo_"
183
+ dynamo_config[prefix + "backend"] = _ask_options(
184
+ "Which dynamo backend would you like to use?",
185
+ [x.lower() for x in DYNAMO_BACKENDS],
186
+ _convert_dynamo_backend,
187
+ default=2,
188
+ )
189
+ use_custom_options = _ask_field(
190
+ "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
191
+ _convert_yes_no_to_bool,
192
+ default=False,
193
+ error_message="Please enter yes or no.",
194
+ )
195
+
196
+ if use_custom_options:
197
+ dynamo_config[prefix + "mode"] = _ask_options(
198
+ "Which mode do you want to use?",
199
+ TORCH_DYNAMO_MODES,
200
+ lambda x: TORCH_DYNAMO_MODES[int(x)],
201
+ default="default",
202
+ )
203
+ dynamo_config[prefix + "use_fullgraph"] = _ask_field(
204
+ "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
205
+ _convert_yes_no_to_bool,
206
+ default=False,
207
+ error_message="Please enter yes or no.",
208
+ )
209
+ dynamo_config[prefix + "use_dynamic"] = _ask_field(
210
+ "Do you want to enable dynamic shape tracing? [yes/NO]: ",
211
+ _convert_yes_no_to_bool,
212
+ default=False,
213
+ error_message="Please enter yes or no.",
214
+ )
215
+ dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
216
+ "Do you want to enable regional compilation? [yes/NO]: ",
217
+ _convert_yes_no_to_bool,
218
+ default=False,
219
+ error_message="Please enter yes or no.",
220
+ )
221
+
222
+ ec2_instance_query = "Which EC2 instance type you want to use for your training?"
223
+ if distributed_type != SageMakerDistributedType.NO:
224
+ ec2_instance_type = _ask_options(
225
+ ec2_instance_query, SAGEMAKER_PARALLEL_EC2_INSTANCES, lambda x: SAGEMAKER_PARALLEL_EC2_INSTANCES[int(x)]
226
+ )
227
+ else:
228
+ ec2_instance_query += "? [ml.p3.2xlarge]:"
229
+ ec2_instance_type = _ask_field(ec2_instance_query, lambda x: str(x).lower(), default="ml.p3.2xlarge")
230
+
231
+ debug = False
232
+ if distributed_type != SageMakerDistributedType.NO:
233
+ debug = _ask_field(
234
+ "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
235
+ _convert_yes_no_to_bool,
236
+ default=False,
237
+ error_message="Please enter yes or no.",
238
+ )
239
+
240
+ num_machines = 1
241
+ if distributed_type in (SageMakerDistributedType.DATA_PARALLEL, SageMakerDistributedType.MODEL_PARALLEL):
242
+ num_machines = _ask_field(
243
+ "How many machines do you want use? [1]: ",
244
+ int,
245
+ default=1,
246
+ )
247
+
248
+ mixed_precision = _ask_options(
249
+ "Do you wish to use FP16 or BF16 (mixed precision)?",
250
+ ["no", "fp16", "bf16", "fp8"],
251
+ _convert_mixed_precision,
252
+ )
253
+
254
+ if use_dynamo and mixed_precision == "no":
255
+ print(
256
+ "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
257
+ )
258
+
259
+ return SageMakerConfig(
260
+ image_uri=docker_image,
261
+ compute_environment=ComputeEnvironment.AMAZON_SAGEMAKER,
262
+ distributed_type=distributed_type,
263
+ use_cpu=False,
264
+ dynamo_config=dynamo_config,
265
+ ec2_instance_type=ec2_instance_type,
266
+ profile=aws_profile,
267
+ region=aws_region,
268
+ iam_role_name=iam_role_name,
269
+ mixed_precision=mixed_precision,
270
+ num_machines=num_machines,
271
+ sagemaker_inputs_file=sagemaker_inputs_file,
272
+ sagemaker_metrics_file=sagemaker_metrics_file,
273
+ debug=debug,
274
+ )
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/update.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from pathlib import Path
18
+
19
+ from .config_args import default_config_file, load_config_from_file
20
+ from .config_utils import SubcommandHelpFormatter
21
+
22
+
23
+ description = "Update an existing config file with the latest defaults while maintaining the old configuration."
24
+
25
+
26
+ def update_config(args):
27
+ """
28
+ Update an existing config file with the latest defaults while maintaining the old configuration.
29
+ """
30
+ config_file = args.config_file
31
+ if config_file is None and Path(default_config_file).exists():
32
+ config_file = default_config_file
33
+ elif not Path(config_file).exists():
34
+ raise ValueError(f"The passed config file located at {config_file} doesn't exist.")
35
+ config = load_config_from_file(config_file)
36
+
37
+ if config_file.endswith(".json"):
38
+ config.to_json_file(config_file)
39
+ else:
40
+ config.to_yaml_file(config_file)
41
+ return config_file
42
+
43
+
44
+ def update_command_parser(parser, parents):
45
+ parser = parser.add_parser("update", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
46
+ parser.add_argument(
47
+ "--config_file",
48
+ default=None,
49
+ help=(
50
+ "The path to the config file to update. Will default to a file named default_config.yaml in the cache "
51
+ "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
52
+ "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
53
+ "with 'huggingface'."
54
+ ),
55
+ )
56
+
57
+ parser.set_defaults(func=update_config_command)
58
+ return parser
59
+
60
+
61
+ def update_config_command(args):
62
+ config_file = update_config(args)
63
+ print(f"Successfully updated the configuration file at {config_file}.")
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/env.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+ import os
19
+ import platform
20
+ import subprocess
21
+
22
+ import numpy as np
23
+ import psutil
24
+ import torch
25
+
26
+ from accelerate import __version__ as version
27
+ from accelerate.commands.config import default_config_file, load_config_from_file
28
+
29
+ from ..utils import (
30
+ is_mlu_available,
31
+ is_musa_available,
32
+ is_neuron_available,
33
+ is_npu_available,
34
+ is_sdaa_available,
35
+ is_xpu_available,
36
+ )
37
+
38
+
39
+ def env_command_parser(subparsers=None):
40
+ if subparsers is not None:
41
+ parser = subparsers.add_parser("env")
42
+ else:
43
+ parser = argparse.ArgumentParser("Accelerate env command")
44
+
45
+ parser.add_argument(
46
+ "--config_file", default=None, help="The config file to use for the default values in the launching script."
47
+ )
48
+
49
+ if subparsers is not None:
50
+ parser.set_defaults(func=env_command)
51
+ return parser
52
+
53
+
54
+ def env_command(args):
55
+ pt_version = torch.__version__
56
+ pt_cuda_available = torch.cuda.is_available()
57
+ pt_xpu_available = is_xpu_available()
58
+ pt_mlu_available = is_mlu_available()
59
+ pt_sdaa_available = is_sdaa_available()
60
+ pt_musa_available = is_musa_available()
61
+ pt_npu_available = is_npu_available()
62
+ pt_neuron_available = is_neuron_available()
63
+
64
+ accelerator = "N/A"
65
+ if pt_cuda_available:
66
+ accelerator = "CUDA"
67
+ elif pt_xpu_available:
68
+ accelerator = "XPU"
69
+ elif pt_mlu_available:
70
+ accelerator = "MLU"
71
+ elif pt_sdaa_available:
72
+ accelerator = "SDAA"
73
+ elif pt_musa_available:
74
+ accelerator = "MUSA"
75
+ elif pt_npu_available:
76
+ accelerator = "NPU"
77
+ elif pt_neuron_available:
78
+ accelerator = "NEURON"
79
+
80
+ accelerate_config = "Not found"
81
+ # Get the default from the config file.
82
+ if args.config_file is not None or os.path.isfile(default_config_file):
83
+ accelerate_config = load_config_from_file(args.config_file).to_dict()
84
+
85
+ # if we can run which, get it
86
+ command = None
87
+ bash_location = "Not found"
88
+ if os.name == "nt":
89
+ command = ["where", "accelerate"]
90
+ elif os.name == "posix":
91
+ command = ["which", "accelerate"]
92
+ if command is not None:
93
+ bash_location = subprocess.check_output(command, text=True, stderr=subprocess.STDOUT).strip()
94
+ info = {
95
+ "`Accelerate` version": version,
96
+ "Platform": platform.platform(),
97
+ "`accelerate` bash location": bash_location,
98
+ "Python version": platform.python_version(),
99
+ "Numpy version": np.__version__,
100
+ "PyTorch version": f"{pt_version}",
101
+ "PyTorch accelerator": accelerator,
102
+ "System RAM": f"{psutil.virtual_memory().total / 1024**3:.2f} GB",
103
+ }
104
+ if pt_cuda_available:
105
+ info["GPU type"] = torch.cuda.get_device_name()
106
+ elif pt_xpu_available:
107
+ info["XPU type"] = torch.xpu.get_device_name()
108
+ elif pt_mlu_available:
109
+ info["MLU type"] = torch.mlu.get_device_name()
110
+ elif pt_sdaa_available:
111
+ info["SDAA type"] = torch.sdaa.get_device_name()
112
+ elif pt_musa_available:
113
+ info["MUSA type"] = torch.musa.get_device_name()
114
+ elif pt_neuron_available:
115
+ info["NEURON type"] = torch.neuron.get_device_name()
116
+ elif pt_npu_available:
117
+ info["CANN version"] = torch.version.cann
118
+
119
+ print("\nCopy-and-paste the text below in your GitHub issue\n")
120
+ print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]))
121
+
122
+ print("- `Accelerate` default config:" if args.config_file is None else "- `Accelerate` config passed:")
123
+ accelerate_config_str = (
124
+ "\n".join([f"\t- {prop}: {val}" for prop, val in accelerate_config.items()])
125
+ if isinstance(accelerate_config, dict)
126
+ else f"\t{accelerate_config}"
127
+ )
128
+ print(accelerate_config_str)
129
+
130
+ info["`Accelerate` configs"] = accelerate_config
131
+
132
+ return info
133
+
134
+
135
+ def main() -> int:
136
+ parser = env_command_parser()
137
+ args = parser.parse_args()
138
+ env_command(args)
139
+ return 0
140
+
141
+
142
+ if __name__ == "__main__":
143
+ raise SystemExit(main())
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/estimate.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2023 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ from typing import Optional
17
+
18
+ import torch
19
+ from huggingface_hub import model_info
20
+ from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
21
+
22
+ from accelerate import init_empty_weights
23
+ from accelerate.commands.utils import CustomArgumentParser
24
+ from accelerate.utils import (
25
+ calculate_maximum_sizes,
26
+ convert_bytes,
27
+ is_timm_available,
28
+ is_transformers_available,
29
+ )
30
+
31
+
32
+ if is_transformers_available():
33
+ import transformers
34
+ from transformers import AutoConfig, AutoModel
35
+
36
+ if is_timm_available():
37
+ import timm
38
+
39
+
40
+ def verify_on_hub(repo: str, token: Optional[str] = None):
41
+ "Verifies that the model is on the hub and returns the model info."
42
+ try:
43
+ return model_info(repo, token=token)
44
+ except (OSError, GatedRepoError):
45
+ return "gated"
46
+ except RepositoryNotFoundError:
47
+ return "repo"
48
+
49
+
50
+ def check_has_model(error):
51
+ """
52
+ Checks what library spawned `error` when a model is not found
53
+ """
54
+ if is_timm_available() and isinstance(error, RuntimeError) and "Unknown model" in error.args[0]:
55
+ return "timm"
56
+ elif (
57
+ is_transformers_available()
58
+ and isinstance(error, OSError)
59
+ and "does not appear to have a file named" in error.args[0]
60
+ ):
61
+ return "transformers"
62
+ else:
63
+ return "unknown"
64
+
65
+
66
+ def create_empty_model(
67
+ model_name: str, library_name: str, trust_remote_code: bool = False, access_token: Optional[str] = None
68
+ ):
69
+ """
70
+ Creates an empty model in full precision from its parent library on the `Hub` to calculate the overall memory
71
+ consumption.
72
+
73
+ Args:
74
+ model_name (`str`):
75
+ The model name on the Hub
76
+ library_name (`str`):
77
+ The library the model has an integration with, such as `transformers`. Will be used if `model_name` has no
78
+ metadata on the Hub to determine the library.
79
+ trust_remote_code (`bool`, `optional`, defaults to `False`):
80
+ Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
81
+ should only be set to `True` for repositories you trust and in which you have read the code, as it will
82
+ execute code present on the Hub on your local machine.
83
+ access_token (`str`, `optional`, defaults to `None`):
84
+ The access token to use to access private or gated models on the Hub. (for use on the Gradio app)
85
+
86
+ Returns:
87
+ `torch.nn.Module`: The torch model that has been initialized on the `meta` device.
88
+
89
+ """
90
+ model_info = verify_on_hub(model_name, access_token)
91
+ # Simplified errors
92
+ if model_info == "gated":
93
+ raise OSError(
94
+ f"Repo for model `{model_name}` is gated. You must be authenticated to access it. Please run `huggingface-cli login`."
95
+ )
96
+ elif model_info == "repo":
97
+ raise OSError(
98
+ f"Repo for model `{model_name}` does not exist on the Hub. If you are trying to access a private repo,"
99
+ " make sure you are authenticated via `huggingface-cli login` and have access."
100
+ )
101
+ if library_name is None:
102
+ library_name = getattr(model_info, "library_name", False)
103
+ if not library_name:
104
+ raise ValueError(
105
+ f"Model `{model_name}` does not have any library metadata on the Hub, please manually pass in a `--library_name` to use (such as `transformers`)"
106
+ )
107
+ if library_name == "transformers":
108
+ if not is_transformers_available():
109
+ raise ImportError(
110
+ f"To check `{model_name}`, `transformers` must be installed. Please install it via `pip install transformers`"
111
+ )
112
+ print(f"Loading pretrained config for `{model_name}` from `transformers`...")
113
+ if model_info.config is None:
114
+ raise RuntimeError(f"Tried to load `{model_name}` with `transformers` but it does not have any metadata.")
115
+
116
+ auto_map = model_info.config.get("auto_map", False)
117
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code, token=access_token)
118
+ with init_empty_weights():
119
+ # remote code could specify a specific `AutoModel` class in the `auto_map`
120
+ constructor = AutoModel
121
+ if isinstance(auto_map, dict):
122
+ value = None
123
+ for key in auto_map.keys():
124
+ if key.startswith("AutoModelFor"):
125
+ value = key
126
+ break
127
+ if value is not None:
128
+ constructor = getattr(transformers, value)
129
+ # we need to pass the dtype, otherwise it is going to use the torch_dtype that is saved in the config
130
+ model = constructor.from_config(config, torch_dtype=torch.float32, trust_remote_code=trust_remote_code)
131
+ elif library_name == "timm":
132
+ if not is_timm_available():
133
+ raise ImportError(
134
+ f"To check `{model_name}`, `timm` must be installed. Please install it via `pip install timm`"
135
+ )
136
+ print(f"Loading pretrained config for `{model_name}` from `timm`...")
137
+ with init_empty_weights():
138
+ model = timm.create_model(model_name, pretrained=False)
139
+ else:
140
+ raise ValueError(
141
+ f"Library `{library_name}` is not supported yet, please open an issue on GitHub for us to add support."
142
+ )
143
+ return model
144
+
145
+
146
+ def create_ascii_table(headers: list, rows: list, title: str):
147
+ "Creates a pretty table from a list of rows, minimal version of `tabulate`."
148
+ sep_char, in_between = "│", "─"
149
+ column_widths = []
150
+ for i in range(len(headers)):
151
+ column_values = [row[i] for row in rows] + [headers[i]]
152
+ max_column_width = max(len(value) for value in column_values)
153
+ column_widths.append(max_column_width)
154
+
155
+ formats = [f"%{column_widths[i]}s" for i in range(len(rows[0]))]
156
+
157
+ pattern = f"{sep_char}{sep_char.join(formats)}{sep_char}"
158
+ diff = 0
159
+
160
+ def make_row(left_char, middle_char, right_char):
161
+ return f"{left_char}{middle_char.join([in_between * n for n in column_widths])}{in_between * diff}{right_char}"
162
+
163
+ separator = make_row("├", "┼", "┤")
164
+ if len(title) > sum(column_widths):
165
+ diff = abs(len(title) - len(separator))
166
+ column_widths[-1] += diff
167
+
168
+ # Update with diff
169
+ separator = make_row("├", "┼", "┤")
170
+ initial_rows = [
171
+ make_row("┌", in_between, "┐"),
172
+ f"{sep_char}{title.center(len(separator) - 2)}{sep_char}",
173
+ make_row("├", "┬", "┤"),
174
+ ]
175
+ table = "\n".join(initial_rows) + "\n"
176
+ column_widths[-1] += diff
177
+ centered_line = [text.center(column_widths[i]) for i, text in enumerate(headers)]
178
+ table += f"{pattern % tuple(centered_line)}\n{separator}\n"
179
+ for i, line in enumerate(rows):
180
+ centered_line = [t.center(column_widths[i]) for i, t in enumerate(line)]
181
+ table += f"{pattern % tuple(centered_line)}\n"
182
+ table += f"└{'┴'.join([in_between * n for n in column_widths])}┘"
183
+
184
+ return table
185
+
186
+
187
+ def estimate_command_parser(subparsers=None):
188
+ if subparsers is not None:
189
+ parser = subparsers.add_parser("estimate-memory")
190
+ else:
191
+ parser = CustomArgumentParser(
192
+ description="Model size estimator for fitting a model onto device(e.g. cuda, xpu) memory."
193
+ )
194
+
195
+ parser.add_argument("model_name", type=str, help="The model name on the Hugging Face Hub.")
196
+ parser.add_argument(
197
+ "--library_name",
198
+ type=str,
199
+ help="The library the model has an integration with, such as `transformers`, needed only if this information is not stored on the Hub.",
200
+ choices=["timm", "transformers"],
201
+ )
202
+ parser.add_argument(
203
+ "--dtypes",
204
+ type=str,
205
+ nargs="+",
206
+ default=["float32", "float16", "int8", "int4"],
207
+ help="The dtypes to use for the model, must be one (or many) of `float32`, `float16`, `int8`, and `int4`",
208
+ choices=["float32", "float16", "int8", "int4"],
209
+ )
210
+ parser.add_argument(
211
+ "--trust_remote_code",
212
+ action="store_true",
213
+ help="""Whether or not to allow for custom models defined on the Hub in their own modeling files. This flag
214
+ should only be used for repositories you trust and in which you have read the code, as it will execute
215
+ code present on the Hub on your local machine.""",
216
+ default=False,
217
+ )
218
+
219
+ if subparsers is not None:
220
+ parser.set_defaults(func=estimate_command)
221
+ return parser
222
+
223
+
224
+ def estimate_training_usage(bytes: int, mixed_precision: str, msamp_config: Optional[str] = None) -> dict:
225
+ """
226
+ Given an amount of `bytes` and `mixed_precision`, calculates how much training memory is needed for a batch size of
227
+ 1.
228
+
229
+ Args:
230
+ bytes (`int`):
231
+ The size of the model being trained.
232
+ mixed_precision (`str`):
233
+ The mixed precision that would be ran.
234
+ msamp_config (`str`):
235
+ The msamp config to estimate the training memory for if `mixed_precision` is set to `"fp8"`.
236
+ """
237
+ memory_sizes = {"model": -1, "optimizer": -1, "gradients": -1, "step": -1}
238
+ fp32_size = bytes
239
+ fp16_size = bytes // 2
240
+
241
+ if mixed_precision == "float32":
242
+ memory_sizes["model"] = fp32_size
243
+ memory_sizes["gradients"] = fp32_size
244
+ memory_sizes["optimizer"] = fp32_size * 2
245
+ memory_sizes["step"] = fp32_size * 4
246
+ elif mixed_precision in ("float16", "bfloat16") or (mixed_precision == "fp8" and msamp_config is None):
247
+ # With native `TransformersEngine`, there is no memory savings with FP8
248
+ # With mixed precision training, the model has weights stored
249
+ # in FP16 and FP32
250
+ memory_sizes["model"] = fp32_size
251
+ # 1.5 from weight gradient + computation (GEMM)
252
+ memory_sizes["gradients"] = fp32_size + fp16_size
253
+ # 2x from optimizer states
254
+ memory_sizes["optimizer"] = fp32_size * 2 # Optimizer states
255
+ memory_sizes["step"] = memory_sizes["optimizer"]
256
+ return memory_sizes
257
+
258
+
259
+ def gather_data(args):
260
+ "Creates an empty model and gathers the data for the sizes"
261
+ try:
262
+ model = create_empty_model(
263
+ args.model_name, library_name=args.library_name, trust_remote_code=args.trust_remote_code
264
+ )
265
+ except (RuntimeError, OSError) as e:
266
+ library = check_has_model(e)
267
+ if library != "unknown":
268
+ raise RuntimeError(
269
+ f"Tried to load `{args.model_name}` with `{library}` but a possible model to load was not found inside the repo."
270
+ )
271
+ raise e
272
+
273
+ total_size, largest_layer = calculate_maximum_sizes(model)
274
+
275
+ data = []
276
+
277
+ for dtype in args.dtypes:
278
+ dtype_total_size = total_size
279
+ dtype_largest_layer = largest_layer[0]
280
+ dtype_training_size = estimate_training_usage(dtype_total_size, dtype)
281
+ if dtype == "float16":
282
+ dtype_total_size /= 2
283
+ dtype_largest_layer /= 2
284
+ elif dtype == "int8":
285
+ dtype_total_size /= 4
286
+ dtype_largest_layer /= 4
287
+ elif dtype == "int4":
288
+ dtype_total_size /= 8
289
+ dtype_largest_layer /= 8
290
+ data.append([dtype, dtype_largest_layer, dtype_total_size, dtype_training_size])
291
+ return data
292
+
293
+
294
+ def estimate_command(args):
295
+ data = gather_data(args)
296
+ for row in data:
297
+ for i, item in enumerate(row):
298
+ if isinstance(item, (int, float)):
299
+ row[i] = convert_bytes(item)
300
+ elif isinstance(item, dict):
301
+ training_usage = max(item.values())
302
+ row[i] = convert_bytes(training_usage) if training_usage != -1 else "N/A"
303
+
304
+ headers = ["dtype", "Largest Layer", "Total Size", "Training using Adam"]
305
+
306
+ title = f"Memory Usage for loading `{args.model_name}`"
307
+ table = create_ascii_table(headers, data, title)
308
+ print(table)
309
+
310
+
311
+ def main():
312
+ parser = estimate_command_parser()
313
+ args = parser.parse_args()
314
+ estimate_command(args)
315
+
316
+
317
+ if __name__ == "__main__":
318
+ main()
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/launch.py ADDED
@@ -0,0 +1,1415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import argparse
18
+ import importlib
19
+ import logging
20
+ import os
21
+ import subprocess
22
+ import sys
23
+ from pathlib import Path
24
+
25
+ import torch
26
+
27
+ from accelerate.commands.config import default_config_file, load_config_from_file
28
+ from accelerate.commands.config.config_args import SageMakerConfig
29
+ from accelerate.commands.config.config_utils import DYNAMO_BACKENDS
30
+ from accelerate.commands.utils import CustomArgumentParser
31
+ from accelerate.state import get_int_from_env
32
+ from accelerate.utils import (
33
+ ComputeEnvironment,
34
+ DistributedType,
35
+ PrepareForLaunch,
36
+ _filter_args,
37
+ check_cuda_p2p_ib_support,
38
+ convert_dict_to_env_variables,
39
+ is_bf16_available,
40
+ is_deepspeed_available,
41
+ is_hpu_available,
42
+ is_mlu_available,
43
+ is_musa_available,
44
+ is_neuron_available,
45
+ is_npu_available,
46
+ is_rich_available,
47
+ is_sagemaker_available,
48
+ is_sdaa_available,
49
+ is_torch_xla_available,
50
+ is_xpu_available,
51
+ patch_environment,
52
+ prepare_deepspeed_cmd_env,
53
+ prepare_multi_gpu_env,
54
+ prepare_sagemager_args_inputs,
55
+ prepare_simple_launcher_cmd_env,
56
+ prepare_tpu,
57
+ str_to_bool,
58
+ )
59
+ from accelerate.utils.constants import DEEPSPEED_MULTINODE_LAUNCHERS, TORCH_DYNAMO_MODES
60
+
61
+
62
+ if is_rich_available():
63
+ from rich import get_console
64
+ from rich.logging import RichHandler
65
+
66
+ FORMAT = "%(message)s"
67
+ logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()])
68
+
69
+
70
+ logger = logging.getLogger(__name__)
71
+
72
+
73
+ options_to_group = {
74
+ "multi_gpu": "Distributed GPUs",
75
+ "tpu": "TPU",
76
+ "use_deepspeed": "DeepSpeed Arguments",
77
+ "use_fsdp": "FSDP Arguments",
78
+ "use_megatron_lm": "Megatron-LM Arguments",
79
+ "fp8_backend": "FP8 Arguments",
80
+ }
81
+
82
+
83
+ def clean_option(option):
84
+ "Finds all cases of - after the first two characters and changes them to _"
85
+ if "fp8_backend" in option:
86
+ option = "--fp8_backend"
87
+ if option.startswith("--"):
88
+ return option[2:].replace("-", "_")
89
+
90
+
91
+ class CustomHelpFormatter(argparse.HelpFormatter):
92
+ """
93
+ This is a custom help formatter that will hide all arguments that are not used in the command line when the help is
94
+ called. This is useful for the case where the user is using a specific platform and only wants to see the arguments
95
+ for that platform.
96
+ """
97
+
98
+ def __init__(self, *args, **kwargs):
99
+ super().__init__(*args, **kwargs)
100
+ self.titles = [
101
+ "Hardware Selection Arguments",
102
+ "Resource Selection Arguments",
103
+ "Training Paradigm Arguments",
104
+ "positional arguments",
105
+ "optional arguments",
106
+ ]
107
+
108
+ def add_argument(self, action: argparse.Action):
109
+ if "accelerate" in sys.argv[0] and "launch" in sys.argv[1:]:
110
+ args = sys.argv[2:]
111
+ else:
112
+ args = sys.argv[1:]
113
+
114
+ if len(args) > 1:
115
+ args = list(map(clean_option, args))
116
+ used_platforms = [arg for arg in args if arg in options_to_group.keys()]
117
+ used_titles = [options_to_group[o] for o in used_platforms]
118
+ if action.container.title not in self.titles + used_titles:
119
+ action.help = argparse.SUPPRESS
120
+ elif action.container.title == "Hardware Selection Arguments":
121
+ if set(action.option_strings).isdisjoint(set(args)):
122
+ action.help = argparse.SUPPRESS
123
+ else:
124
+ action.help = action.help + " (currently selected)"
125
+ elif action.container.title == "Training Paradigm Arguments":
126
+ if set(action.option_strings).isdisjoint(set(args)):
127
+ action.help = argparse.SUPPRESS
128
+ else:
129
+ action.help = action.help + " (currently selected)"
130
+
131
+ action.option_strings = [s for s in action.option_strings if "-" not in s[2:]]
132
+ super().add_argument(action)
133
+
134
+ def end_section(self):
135
+ if len(self._current_section.items) < 2:
136
+ self._current_section.items = []
137
+ self._current_section.heading = ""
138
+ super().end_section()
139
+
140
+
141
+ def launch_command_parser(subparsers=None):
142
+ description = "Launch a python script in a distributed scenario. Arguments can be passed in with either hyphens (`--num-processes=2`) or underscores (`--num_processes=2`)"
143
+ if subparsers is not None:
144
+ parser = subparsers.add_parser(
145
+ "launch", description=description, add_help=False, allow_abbrev=False, formatter_class=CustomHelpFormatter
146
+ )
147
+ else:
148
+ parser = CustomArgumentParser(
149
+ "Accelerate launch command",
150
+ description=description,
151
+ add_help=False,
152
+ allow_abbrev=False,
153
+ formatter_class=CustomHelpFormatter,
154
+ )
155
+
156
+ parser.add_argument("-h", "--help", action="help", help="Show this help message and exit.")
157
+
158
+ parser.add_argument(
159
+ "--config_file",
160
+ default=None,
161
+ help="The config file to use for the default values in the launching script.",
162
+ )
163
+ parser.add_argument(
164
+ "--quiet",
165
+ "-q",
166
+ action="store_true",
167
+ help="Silence subprocess errors from the launch stack trace and only show the relevant tracebacks. (Only applicable to DeepSpeed and single-process configurations)",
168
+ )
169
+ # Hardware selection arguments
170
+ hardware_args = parser.add_argument_group(
171
+ "Hardware Selection Arguments", "Arguments for selecting the hardware to be used."
172
+ )
173
+ hardware_args.add_argument(
174
+ "--cpu", default=False, action="store_true", help="Whether or not to force the training on the CPU."
175
+ )
176
+ hardware_args.add_argument(
177
+ "--multi_gpu",
178
+ default=False,
179
+ action="store_true",
180
+ help="Whether or not this should launch a distributed GPU training.",
181
+ )
182
+ hardware_args.add_argument(
183
+ "--tpu", default=False, action="store_true", help="Whether or not this should launch a TPU training."
184
+ )
185
+ # Resource selection arguments
186
+ resource_args = parser.add_argument_group(
187
+ "Resource Selection Arguments", "Arguments for fine-tuning how available hardware should be used."
188
+ )
189
+ resource_args.add_argument(
190
+ "--mixed_precision",
191
+ type=str,
192
+ choices=["no", "fp16", "bf16", "fp8"],
193
+ help="Whether or not to use mixed precision training. "
194
+ "Choose between FP16 and BF16 (bfloat16) training. "
195
+ "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.",
196
+ )
197
+ resource_args.add_argument(
198
+ "--num_processes", type=int, default=None, help="The total number of processes to be launched in parallel."
199
+ )
200
+ resource_args.add_argument(
201
+ "--num_machines", type=int, default=None, help="The total number of machines used in this training."
202
+ )
203
+ resource_args.add_argument(
204
+ "--num_cpu_threads_per_process",
205
+ type=int,
206
+ default=None,
207
+ help="The number of CPU threads per process. Can be tuned for optimal performance.",
208
+ )
209
+ resource_args.add_argument(
210
+ "--enable_cpu_affinity",
211
+ default=False,
212
+ action="store_true",
213
+ help="Whether or not CPU affinity and balancing should be enabled. Currently only supported on NVIDIA hardware.",
214
+ )
215
+ # Dynamo arguments
216
+ resource_args.add_argument(
217
+ "--dynamo_backend",
218
+ type=str,
219
+ choices=["no"] + [b.lower() for b in DYNAMO_BACKENDS],
220
+ help="Choose a backend to optimize your training with dynamo, see more at "
221
+ "https://github.com/pytorch/torchdynamo.",
222
+ )
223
+ resource_args.add_argument(
224
+ "--dynamo_mode",
225
+ type=str,
226
+ default="default",
227
+ choices=TORCH_DYNAMO_MODES,
228
+ help="Choose a mode to optimize your training with dynamo.",
229
+ )
230
+ resource_args.add_argument(
231
+ "--dynamo_use_fullgraph",
232
+ default=False,
233
+ action="store_true",
234
+ help="Whether to use full graph mode for dynamo or it is ok to break model into several subgraphs",
235
+ )
236
+ resource_args.add_argument(
237
+ "--dynamo_use_dynamic",
238
+ default=False,
239
+ action="store_true",
240
+ help="Whether to enable dynamic shape tracing.",
241
+ )
242
+ resource_args.add_argument(
243
+ "--dynamo_use_regional_compilation",
244
+ default=False,
245
+ action="store_true",
246
+ help="Whether to enable regional compilation.",
247
+ )
248
+
249
+ # Training Paradigm arguments
250
+ paradigm_args = parser.add_argument_group(
251
+ "Training Paradigm Arguments", "Arguments for selecting which training paradigm to be used."
252
+ )
253
+ paradigm_args.add_argument(
254
+ "--use_deepspeed",
255
+ default=False,
256
+ action="store_true",
257
+ help="Whether to use deepspeed.",
258
+ )
259
+ paradigm_args.add_argument(
260
+ "--use_fsdp",
261
+ default=False,
262
+ action="store_true",
263
+ help="Whether to use fsdp.",
264
+ )
265
+ paradigm_args.add_argument(
266
+ "--use_parallelism_config",
267
+ default=False,
268
+ action="store_true",
269
+ help="Whether to use the parallelism config to configure the N-d distributed training.",
270
+ )
271
+ paradigm_args.add_argument(
272
+ "--use_megatron_lm",
273
+ default=False,
274
+ action="store_true",
275
+ help="Whether to use Megatron-LM.",
276
+ )
277
+
278
+ # distributed GPU training arguments
279
+ distributed_args = parser.add_argument_group("Distributed GPUs", "Arguments related to distributed GPU training.")
280
+ distributed_args.add_argument(
281
+ "--gpu_ids",
282
+ default=None,
283
+ help="What GPUs (by id) should be used for training on this machine as a comma-separated list",
284
+ )
285
+ distributed_args.add_argument(
286
+ "--same_network",
287
+ default=False,
288
+ action="store_true",
289
+ help="Whether all machines used for multinode training exist on the same local network.",
290
+ )
291
+ distributed_args.add_argument(
292
+ "--machine_rank", type=int, default=None, help="The rank of the machine on which this script is launched."
293
+ )
294
+ distributed_args.add_argument(
295
+ "--main_process_ip", type=str, default=None, help="The IP address of the machine of rank 0."
296
+ )
297
+ distributed_args.add_argument(
298
+ "--main_process_port",
299
+ type=int,
300
+ default=None,
301
+ help="The port to use to communicate with the machine of rank 0.",
302
+ )
303
+ distributed_args.add_argument(
304
+ "-t",
305
+ "--tee",
306
+ default="0",
307
+ type=str,
308
+ help="Tee std streams into a log file and also to console.",
309
+ )
310
+ distributed_args.add_argument(
311
+ "--log_dir",
312
+ type=str,
313
+ default=None,
314
+ help=(
315
+ "Base directory to use for log files when using torchrun/torch.distributed.run as launcher. "
316
+ "Use with --tee to redirect std streams info log files."
317
+ ),
318
+ )
319
+ distributed_args.add_argument(
320
+ "--role",
321
+ type=str,
322
+ default="default",
323
+ help="User-defined role for the workers.",
324
+ )
325
+ # Rendezvous related arguments
326
+ distributed_args.add_argument(
327
+ "--rdzv_backend",
328
+ type=str,
329
+ default="static",
330
+ help="The rendezvous method to use, such as 'static' (the default) or 'c10d'",
331
+ )
332
+ distributed_args.add_argument(
333
+ "--rdzv_conf",
334
+ type=str,
335
+ default="",
336
+ help="Additional rendezvous configuration (<key1>=<value1>,<key2>=<value2>,...).",
337
+ )
338
+ distributed_args.add_argument(
339
+ "--max_restarts",
340
+ type=int,
341
+ default=0,
342
+ help="Maximum number of worker group restarts before failing.",
343
+ )
344
+ distributed_args.add_argument(
345
+ "--monitor_interval",
346
+ type=float,
347
+ default=0.1,
348
+ help="Interval, in seconds, to monitor the state of workers.",
349
+ )
350
+ parser.add_argument(
351
+ "-m",
352
+ "--module",
353
+ action="store_true",
354
+ help="Change each process to interpret the launch script as a Python module, executing with the same behavior as 'python -m'.",
355
+ )
356
+ parser.add_argument(
357
+ "--no_python",
358
+ action="store_true",
359
+ help="Skip prepending the training script with 'python' - just execute it directly. Useful when the script is not a Python script.",
360
+ )
361
+
362
+ # TPU arguments
363
+ tpu_args = parser.add_argument_group("TPU", "Arguments related to TPU.")
364
+ tpu_args.add_argument(
365
+ "--tpu_cluster",
366
+ action="store_true",
367
+ dest="tpu_use_cluster",
368
+ help="Whether to use a GCP TPU pod for training.",
369
+ )
370
+ tpu_args.add_argument(
371
+ "--no_tpu_cluster",
372
+ action="store_false",
373
+ dest="tpu_use_cluster",
374
+ help="Should not be passed explicitly, this is for internal use only.",
375
+ )
376
+ tpu_args.add_argument(
377
+ "--tpu_use_sudo",
378
+ action="store_true",
379
+ help="Whether to use `sudo` when running the TPU training script in each pod.",
380
+ )
381
+ tpu_args.add_argument(
382
+ "--vm",
383
+ type=str,
384
+ action="append",
385
+ help=(
386
+ "List of single Compute VM instance names. "
387
+ "If not provided we assume usage of instance groups. For TPU pods."
388
+ ),
389
+ )
390
+ tpu_args.add_argument(
391
+ "--env",
392
+ type=str,
393
+ action="append",
394
+ help="List of environment variables to set on the Compute VM instances. For TPU pods.",
395
+ )
396
+ tpu_args.add_argument(
397
+ "--main_training_function",
398
+ type=str,
399
+ default=None,
400
+ help="The name of the main function to be executed in your script (only for TPU training).",
401
+ )
402
+ tpu_args.add_argument(
403
+ "--downcast_bf16",
404
+ action="store_true",
405
+ help="Whether when using bf16 precision on TPUs if both float and double tensors are cast to bfloat16 or if double tensors remain as float32.",
406
+ )
407
+
408
+ # DeepSpeed arguments
409
+ deepspeed_args = parser.add_argument_group("DeepSpeed Arguments", "Arguments related to DeepSpeed.")
410
+ deepspeed_args.add_argument(
411
+ "--deepspeed_config_file",
412
+ default=None,
413
+ type=str,
414
+ help="DeepSpeed config file.",
415
+ )
416
+ deepspeed_args.add_argument(
417
+ "--zero_stage",
418
+ default=None,
419
+ type=int,
420
+ help="DeepSpeed's ZeRO optimization stage (useful only when `use_deepspeed` flag is passed). "
421
+ "If unspecified, will default to `2`.",
422
+ )
423
+ deepspeed_args.add_argument(
424
+ "--offload_optimizer_device",
425
+ default=None,
426
+ type=str,
427
+ help="Decides where (none|cpu|nvme) to offload optimizer states (useful only when `use_deepspeed` flag is passed). "
428
+ "If unspecified, will default to 'none'.",
429
+ )
430
+ deepspeed_args.add_argument(
431
+ "--offload_param_device",
432
+ default=None,
433
+ type=str,
434
+ help="Decides where (none|cpu|nvme) to offload parameters (useful only when `use_deepspeed` flag is passed). "
435
+ "If unspecified, will default to 'none'.",
436
+ )
437
+ deepspeed_args.add_argument(
438
+ "--offload_optimizer_nvme_path",
439
+ default=None,
440
+ type=str,
441
+ help="Decides Nvme Path to offload optimizer states (useful only when `use_deepspeed` flag is passed). "
442
+ "If unspecified, will default to 'none'.",
443
+ )
444
+ deepspeed_args.add_argument(
445
+ "--offload_param_nvme_path",
446
+ default=None,
447
+ type=str,
448
+ help="Decides Nvme Path to offload parameters (useful only when `use_deepspeed` flag is passed). "
449
+ "If unspecified, will default to 'none'.",
450
+ )
451
+ deepspeed_args.add_argument(
452
+ "--gradient_accumulation_steps",
453
+ default=None,
454
+ type=int,
455
+ help="No of gradient_accumulation_steps used in your training script (useful only when `use_deepspeed` flag is passed). "
456
+ "If unspecified, will default to `1`.",
457
+ )
458
+ deepspeed_args.add_argument(
459
+ "--gradient_clipping",
460
+ default=None,
461
+ type=float,
462
+ help="gradient clipping value used in your training script (useful only when `use_deepspeed` flag is passed). "
463
+ "If unspecified, will default to `1.0`.",
464
+ )
465
+ deepspeed_args.add_argument(
466
+ "--zero3_init_flag",
467
+ default=None,
468
+ type=str,
469
+ help="Decides Whether (true|false) to enable `deepspeed.zero.Init` for constructing massive models. "
470
+ "Only applicable with DeepSpeed ZeRO Stage-3. If unspecified, will default to `true`.",
471
+ )
472
+ deepspeed_args.add_argument(
473
+ "--zero3_save_16bit_model",
474
+ default=None,
475
+ type=str,
476
+ help="Decides Whether (true|false) to save 16-bit model weights when using ZeRO Stage-3. "
477
+ "Only applicable with DeepSpeed ZeRO Stage-3. If unspecified, will default to `false`.",
478
+ )
479
+ deepspeed_args.add_argument(
480
+ "--deepspeed_hostfile",
481
+ default=None,
482
+ type=str,
483
+ help="DeepSpeed hostfile for configuring multi-node compute resources.",
484
+ )
485
+ deepspeed_args.add_argument(
486
+ "--deepspeed_exclusion_filter",
487
+ default=None,
488
+ type=str,
489
+ help="DeepSpeed exclusion filter string when using multi-node setup.",
490
+ )
491
+ deepspeed_args.add_argument(
492
+ "--deepspeed_inclusion_filter",
493
+ default=None,
494
+ type=str,
495
+ help="DeepSpeed inclusion filter string when using multi-node setup.",
496
+ )
497
+ deepspeed_args.add_argument(
498
+ "--deepspeed_multinode_launcher",
499
+ default=None,
500
+ type=str,
501
+ help="DeepSpeed multi-node launcher to use, e.g. `pdsh`, `standard`, `openmpi`, `mvapich`, `mpich`, `slurm`, `nossh` (requires DeepSpeed >= 0.14.5). If unspecified, will default to `pdsh`.",
502
+ )
503
+ deepspeed_args.add_argument(
504
+ "--deepspeed_moe_layer_cls_names",
505
+ default=None,
506
+ type=str,
507
+ help="comma-separated list of transformer MoE layer class names (case-sensitive) to wrap ,e.g, `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ..."
508
+ " (useful only when `use_deepspeed` flag is passed).",
509
+ )
510
+
511
+ # fsdp arguments
512
+ fsdp_args = parser.add_argument_group("FSDP Arguments", "Arguments related to Fully Shared Data Parallelism.")
513
+ fsdp_args.add_argument(
514
+ "--fsdp_version",
515
+ type=str,
516
+ default="1",
517
+ choices=["1", "2"],
518
+ help="FSDP version to use. (useful only when `use_fsdp` flag is passed).",
519
+ )
520
+ fsdp_args.add_argument(
521
+ "--fsdp_offload_params",
522
+ default="false",
523
+ type=str,
524
+ help="Decides Whether (true|false) to offload parameters and gradients to CPU. (useful only when `use_fsdp` flag is passed).",
525
+ )
526
+ fsdp_args.add_argument(
527
+ "--fsdp_min_num_params",
528
+ type=int,
529
+ default=int(1e8),
530
+ help="FSDP's minimum number of parameters for Default Auto Wrapping. (useful only when `use_fsdp` flag is passed).",
531
+ )
532
+ # We enable this for backwards compatibility, throw a warning if this is set in `FullyShardedDataParallelPlugin`
533
+ fsdp_args.add_argument(
534
+ "--fsdp_sharding_strategy",
535
+ type=str,
536
+ default="FULL_SHARD",
537
+ help="FSDP's sharding strategy. (useful only when `use_fsdp` flag is passed and `fsdp_version=1`).",
538
+ )
539
+ fsdp_args.add_argument(
540
+ "--fsdp_reshard_after_forward",
541
+ type=str,
542
+ default="true",
543
+ help="FSDP's Reshard After Forward Strategy. (useful only when `use_fsdp` flag is passed). Supports either boolean (FSDP2) or `FULL_SHARD | SHARD_GRAD_OP | NO_RESHARD` (FSDP1).",
544
+ )
545
+ fsdp_args.add_argument(
546
+ "--fsdp_auto_wrap_policy",
547
+ type=str,
548
+ default=None,
549
+ help="FSDP's auto wrap policy. (useful only when `use_fsdp` flag is passed).",
550
+ )
551
+ fsdp_args.add_argument(
552
+ "--fsdp_transformer_layer_cls_to_wrap",
553
+ default=None,
554
+ type=str,
555
+ help="Transformer layer class name (case-sensitive) to wrap ,e.g, `BertLayer`, `GPTJBlock`, `T5Block` .... "
556
+ "(useful only when `use_fsdp` flag is passed).",
557
+ )
558
+ fsdp_args.add_argument(
559
+ "--fsdp_backward_prefetch",
560
+ default=None,
561
+ type=str,
562
+ help="FSDP's backward prefetch policy. (useful only when `use_fsdp` flag is passed).",
563
+ )
564
+ fsdp_args.add_argument(
565
+ "--fsdp_state_dict_type",
566
+ default=None,
567
+ type=str,
568
+ help="FSDP's state dict type. (useful only when `use_fsdp` flag is passed).",
569
+ )
570
+ fsdp_args.add_argument(
571
+ "--fsdp_forward_prefetch",
572
+ default="false",
573
+ type=str,
574
+ help="If True, then FSDP explicitly prefetches the next upcoming "
575
+ "all-gather while executing in the forward pass (useful only when `use_fsdp` flag is passed).",
576
+ )
577
+ fsdp_args.add_argument(
578
+ "--fsdp_use_orig_params",
579
+ default="true",
580
+ type=str,
581
+ help="If True, allows non-uniform `requires_grad` during init, which means support for interspersed frozen and trainable parameters."
582
+ " (useful only when `use_fsdp` flag is passed).",
583
+ )
584
+ fsdp_args.add_argument(
585
+ "--fsdp_cpu_ram_efficient_loading",
586
+ default="true",
587
+ type=str,
588
+ help="If True, only the first process loads the pretrained model checkoint while all other processes have empty weights. "
589
+ "Only applicable for 🤗 Transformers. When using this, `--fsdp_sync_module_states` needs to True. "
590
+ "(useful only when `use_fsdp` flag is passed).",
591
+ )
592
+ fsdp_args.add_argument(
593
+ "--fsdp_sync_module_states",
594
+ default="true",
595
+ type=str,
596
+ help="If True, each individually wrapped FSDP unit will broadcast module parameters from rank 0."
597
+ " (useful only when `use_fsdp` flag is passed).",
598
+ )
599
+ fsdp_args.add_argument(
600
+ "--fsdp_activation_checkpointing",
601
+ default="false",
602
+ type=str,
603
+ help="Decides Whether (true|false) intermediate activations are freed during the forward pass, and a checkpoint is left as a placeholder. (useful only when `use_fsdp` flag is passed).",
604
+ )
605
+
606
+ # megatron_lm args
607
+ megatron_lm_args = parser.add_argument_group("Megatron-LM Arguments", "Arguments related to Megatron-LM.")
608
+ megatron_lm_args.add_argument(
609
+ "--megatron_lm_tp_degree",
610
+ type=int,
611
+ default=1,
612
+ help="Megatron-LM's Tensor Parallelism (TP) degree. (useful only when `use_megatron_lm` flag is passed).",
613
+ )
614
+ megatron_lm_args.add_argument(
615
+ "--megatron_lm_use_custom_fsdp",
616
+ type=bool,
617
+ default=False,
618
+ help="Whether to use custom FSDP. (useful only when `use_megatron_lm` flag is passed).",
619
+ )
620
+ megatron_lm_args.add_argument(
621
+ "--megatron_lm_no_load_optim",
622
+ type=bool,
623
+ default=False,
624
+ help="Whether to not load optimizer. (useful only when `use_megatron_lm` flag is passed).",
625
+ )
626
+ megatron_lm_args.add_argument(
627
+ "--megatron_lm_eod_mask_loss",
628
+ type=bool,
629
+ default=False,
630
+ help="Whether to use eod mask loss. (useful only when `use_megatron_lm` flag is passed).",
631
+ )
632
+ megatron_lm_args.add_argument(
633
+ "--megatron_lm_overlap_cpu_optimizer_d2h_h2d",
634
+ type=bool,
635
+ default=False,
636
+ help="Whether to overlap CPU optimizer step, gradients D2H and updated parameters H2D. (useful only when `use_megatron_lm` flag is passed).",
637
+ )
638
+ megatron_lm_args.add_argument(
639
+ "--megatron_lm_no_save_optim",
640
+ type=bool,
641
+ default=False,
642
+ help="Whether to not save optimizer. (useful only when `use_megatron_lm` flag is passed).",
643
+ )
644
+ megatron_lm_args.add_argument(
645
+ "--megatron_lm_optimizer_cpu_offload",
646
+ type=bool,
647
+ default=False,
648
+ help="Whether to use CPU offload for optimizer. (useful only when `use_megatron_lm` flag is passed).",
649
+ )
650
+ megatron_lm_args.add_argument(
651
+ "--megatron_lm_use_precision_aware_optimizer",
652
+ type=bool,
653
+ default=False,
654
+ help="Whether to use precision aware optimizer. (useful only when `use_megatron_lm` flag is passed).",
655
+ )
656
+ megatron_lm_args.add_argument(
657
+ "--megatron_lm_decoder_last_pipeline_num_layers",
658
+ type=int,
659
+ default=None,
660
+ help="Megatron-LM's decoder last pipeline number of layers, default None is even split of transformer layers across all pipeline stages.",
661
+ )
662
+ megatron_lm_args.add_argument(
663
+ "--megatron_lm_pp_degree",
664
+ type=int,
665
+ default=1,
666
+ help="Megatron-LM's Pipeline Parallelism (PP) degree. (useful only when `use_megatron_lm` flag is passed).",
667
+ )
668
+ megatron_lm_args.add_argument(
669
+ "--megatron_lm_num_micro_batches",
670
+ type=int,
671
+ default=None,
672
+ help="Megatron-LM's number of micro batches when PP degree > 1. (useful only when `use_megatron_lm` flag is passed).",
673
+ )
674
+ megatron_lm_args.add_argument(
675
+ "--megatron_lm_sequence_parallelism",
676
+ default=None,
677
+ type=str,
678
+ help="Decides Whether (true|false) to enable Sequence Parallelism when TP degree > 1. "
679
+ "(useful only when `use_megatron_lm` flag is passed).",
680
+ )
681
+ megatron_lm_args.add_argument(
682
+ "--megatron_lm_recompute_activations",
683
+ default=None,
684
+ type=str,
685
+ help="Decides Whether (true|false) to enable Selective Activation Recomputation. "
686
+ "(useful only when `use_megatron_lm` flag is passed).",
687
+ )
688
+ megatron_lm_args.add_argument(
689
+ "--megatron_lm_use_distributed_optimizer",
690
+ default=None,
691
+ type=str,
692
+ help="Decides Whether (true|false) to use distributed optimizer "
693
+ "which shards optimizer state and gradients across Data Pralellel (DP) ranks. "
694
+ "(useful only when `use_megatron_lm` flag is passed).",
695
+ )
696
+ megatron_lm_args.add_argument(
697
+ "--megatron_lm_gradient_clipping",
698
+ default=1.0,
699
+ type=float,
700
+ help="Megatron-LM's gradient clipping value based on global L2 Norm (0 to disable). "
701
+ "(useful only when `use_megatron_lm` flag is passed).",
702
+ )
703
+ megatron_lm_args.add_argument(
704
+ "--megatron_lm_recompute_granularity",
705
+ default=None,
706
+ type=str,
707
+ help="Megatron-LM's recompute granularity (full, selective). "
708
+ "(useful only when `use_megatron_lm` flag is passed).",
709
+ )
710
+ megatron_lm_args.add_argument(
711
+ "--megatron_lm_recompute_method",
712
+ default=None,
713
+ type=str,
714
+ help="Megatron-LM's recompute method (uniform, block). (useful only when `use_megatron_lm` flag is passed).",
715
+ )
716
+ megatron_lm_args.add_argument(
717
+ "--megatron_lm_recompute_num_layers",
718
+ default=None,
719
+ type=int,
720
+ help="Megatron-LM's number of layers to recompute. (useful only when `use_megatron_lm` flag is passed).",
721
+ )
722
+ megatron_lm_args.add_argument(
723
+ "--megatron_lm_attention_backend",
724
+ default=None,
725
+ type=str,
726
+ help="Decides Whether (true|false) to enable attention backend. "
727
+ "(useful only when `use_megatron_lm` flag is passed).",
728
+ )
729
+ megatron_lm_args.add_argument(
730
+ "--megatron_lm_expert_model_parallel_size",
731
+ default=None,
732
+ type=int,
733
+ help="Megatron-LM's expert model parallel size. (useful only when `use_megatron_lm` flag is passed).",
734
+ )
735
+ megatron_lm_args.add_argument(
736
+ "--megatron_lm_context_parallel_size",
737
+ default=None,
738
+ type=int,
739
+ help="Megatron-LM's context parallel size. (useful only when `use_megatron_lm` flag is passed).",
740
+ )
741
+ megatron_lm_args.add_argument(
742
+ "--megatron_lm_attention_dropout",
743
+ default=None,
744
+ type=float,
745
+ help="Megatron-LM's attention dropout rate. (useful only when `use_megatron_lm` flag is passed).",
746
+ )
747
+ megatron_lm_args.add_argument(
748
+ "--megatron_lm_hidden_dropout",
749
+ default=None,
750
+ type=float,
751
+ help="Megatron-LM's hidden dropout rate. (useful only when `use_megatron_lm` flag is passed).",
752
+ )
753
+ megatron_lm_args.add_argument(
754
+ "--megatron_lm_attention_softmax_in_fp32",
755
+ default=None,
756
+ type=str,
757
+ help="Decides Whether (true|false) to use fp32 for attention softmax. "
758
+ "(useful only when `use_megatron_lm` flag is passed).",
759
+ )
760
+ megatron_lm_args.add_argument(
761
+ "--megatron_lm_expert_tensor_parallel_size",
762
+ default=None,
763
+ type=int,
764
+ help="Megatron-LM's expert tensor parallel size. (useful only when `use_megatron_lm` flag is passed).",
765
+ )
766
+ megatron_lm_args.add_argument(
767
+ "--megatron_lm_calculate_per_token_loss",
768
+ default=None,
769
+ type=str,
770
+ help="Decides Whether (true|false) to calculate per token loss. "
771
+ "(useful only when `use_megatron_lm` flag is passed).",
772
+ )
773
+ megatron_lm_args.add_argument(
774
+ "--megatron_lm_use_rotary_position_embeddings",
775
+ default=None,
776
+ type=str,
777
+ help="Decides Whether (true|false) to use rotary position embeddings. "
778
+ "(useful only when `use_megatron_lm` flag is passed).",
779
+ )
780
+
781
+ # FP8 arguments
782
+ fp8_args = parser.add_argument_group(
783
+ "FP8 Arguments", "Arguments related to FP8 training (requires `--mixed_precision=fp8`)"
784
+ )
785
+ fp8_args.add_argument(
786
+ "--fp8_backend",
787
+ type=str,
788
+ choices=["ao", "te", "msamp"],
789
+ help="Choose a backend to train with FP8 (ao: torchao, te: TransformerEngine, msamp: MS-AMP)",
790
+ )
791
+ fp8_args.add_argument(
792
+ "--fp8_use_autocast_during_eval",
793
+ default=False,
794
+ action="store_true",
795
+ help="Whether to use FP8 autocast during eval mode (useful only when `--fp8_backend=te` is passed). Generally better metrics are found when this is not passed.",
796
+ )
797
+ fp8_args.add_argument(
798
+ "--fp8_margin",
799
+ type=int,
800
+ default=0,
801
+ help="The margin to use for the gradient scaling (useful only when `--fp8_backend=te` is passed).",
802
+ )
803
+ fp8_args.add_argument(
804
+ "--fp8_interval",
805
+ type=int,
806
+ default=1,
807
+ help="The interval to use for how often the scaling factor is recomputed (useful only when `--fp8_backend=te` is passed).",
808
+ )
809
+ fp8_args.add_argument(
810
+ "--fp8_format",
811
+ type=str,
812
+ default="HYBRID",
813
+ choices=["HYBRID", "E4M3", "E5M2"],
814
+ help="The format to use for the FP8 recipe (useful only when `--fp8_backend=te` is passed).",
815
+ )
816
+ fp8_args.add_argument(
817
+ "--fp8_amax_history_len",
818
+ type=int,
819
+ default=1024,
820
+ help="The length of the history to use for the scaling factor computation (useful only when `--fp8_backend=te` is passed).",
821
+ )
822
+ fp8_args.add_argument(
823
+ "--fp8_amax_compute_algo",
824
+ type=str,
825
+ default="most_recent",
826
+ choices=["max", "most_recent"],
827
+ help="The algorithm to use for the scaling factor computation. (useful only when `--fp8_backend=te` is passed).",
828
+ )
829
+ fp8_args.add_argument(
830
+ "--fp8_override_linear_precision",
831
+ type=lambda x: tuple(map(str_to_bool, x.split(","))),
832
+ default=(False, False, False),
833
+ help="Whether or not to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision. Should be passed in a comma-separated string of booleans (useful only when `--fp8_backend=te` is passed).",
834
+ )
835
+ fp8_args.add_argument(
836
+ "--fp8_opt_level",
837
+ type=str,
838
+ default="O2",
839
+ choices=["O1", "O2"],
840
+ help="What level of 8-bit collective communication should be used with MS-AMP (useful only when `--fp8_backend=msamp` is passed).",
841
+ )
842
+ fp8_args.add_argument(
843
+ "--fp8_enable_fsdp_float8_all_gather",
844
+ default="true",
845
+ type=str_to_bool,
846
+ help="Whether to enable FSDP2 float8 all gather (useful only when `--fp8_backend=ao` is passed).",
847
+ )
848
+ fp8_args.add_argument(
849
+ "--fp8_pad_inner_dim",
850
+ default="true",
851
+ type=str_to_bool,
852
+ help="Whether to pad the inner dimension for FP8 GEMMs (useful only when `--fp8_backend=ao` is passed).",
853
+ )
854
+
855
+ # AWS arguments
856
+ aws_args = parser.add_argument_group("AWS Arguments", "Arguments related to AWS.")
857
+ aws_args.add_argument(
858
+ "--aws_access_key_id",
859
+ type=str,
860
+ default=None,
861
+ help="The AWS_ACCESS_KEY_ID used to launch the Amazon SageMaker training job",
862
+ )
863
+ aws_args.add_argument(
864
+ "--aws_secret_access_key",
865
+ type=str,
866
+ default=None,
867
+ help="The AWS_SECRET_ACCESS_KEY used to launch the Amazon SageMaker training job.",
868
+ )
869
+ parser.add_argument(
870
+ "--debug",
871
+ action="store_true",
872
+ help="Whether to print out the torch.distributed stack trace when something fails.",
873
+ )
874
+ parser.add_argument(
875
+ "training_script",
876
+ type=str,
877
+ help=(
878
+ "The full path to the script to be launched in parallel, followed by all the arguments for the training "
879
+ "script."
880
+ ),
881
+ )
882
+
883
+ # MPI arguments
884
+ mpirun_args = parser.add_argument_group("MPI Arguments", "Arguments related to mpirun for Multi-CPU")
885
+ mpirun_args.add_argument(
886
+ "--mpirun_hostfile",
887
+ type=str,
888
+ default=None,
889
+ help="Location for a hostfile for using Accelerate to launch a multi-CPU training job with mpirun. This will "
890
+ "get passed to the MPI --hostfile or -f parameter, depending on which MPI program is installed.",
891
+ )
892
+
893
+ # ParallelismConfig arguments
894
+ parallelism_config_args = parser.add_argument_group(
895
+ "ParallelismConfig Arguments",
896
+ "Arguments related to the ParallelismConfig used for distributed training.",
897
+ )
898
+
899
+ parallelism_config_args.add_argument(
900
+ "--parallelism_config_dp_replicate_size",
901
+ type=int,
902
+ default=1,
903
+ help="The number of processes for data parallel training. Defaults to 1 (no data parallelism).",
904
+ )
905
+
906
+ parallelism_config_args.add_argument(
907
+ "--parallelism_config_dp_shard_size",
908
+ type=int,
909
+ default=1,
910
+ help="The number of processes for FSDP sharding. Defaults to 1 (No FSDP sharding).",
911
+ )
912
+
913
+ parallelism_config_args.add_argument(
914
+ "--parallelism_config_tp_size",
915
+ type=int,
916
+ default=1,
917
+ help="The number of processes for tensor parallel training. Defaults to 1 (no tensor parallelism).",
918
+ )
919
+
920
+ parallelism_config_args.add_argument(
921
+ "--parallelism_config_cp_size",
922
+ type=int,
923
+ default=1,
924
+ help="The number of processese for context parallel training. Defaults to 1 (no context parallelism).",
925
+ )
926
+
927
+ parallelism_config_args.add_argument(
928
+ "--parallelism_config_cp_backend",
929
+ type=str,
930
+ choices=["torch"],
931
+ default="torch",
932
+ help="Context Parallelism backend: torch (FSDP2) or deepspeed (ALST/Ulysses)",
933
+ )
934
+
935
+ parallelism_config_args.add_argument(
936
+ "--parallelism_config_cp_comm_strategy",
937
+ type=str,
938
+ default="allgather",
939
+ help="The communication strategy for context parallel training. Defaults to 'allgather'. Other option is alltoall",
940
+ )
941
+
942
+ parallelism_config_args.add_argument(
943
+ "--parallelism_config_sp_size",
944
+ type=int,
945
+ default=1,
946
+ help="The number of processese for context parallel training. Defaults to 1 (no context parallelism).",
947
+ )
948
+
949
+ parallelism_config_args.add_argument(
950
+ "--parallelism_config_sp_backend",
951
+ type=str,
952
+ choices=["deepspeed"],
953
+ default="deepspeed",
954
+ help="Sequence Parallelism backend: deepspeed (ALST/Ulysses)",
955
+ )
956
+
957
+ parallelism_config_args.add_argument(
958
+ "--parallelism_config_sp_seq_length",
959
+ type=str,
960
+ default=None,
961
+ help="Sequence length for when batches are all of the same length. For variable sequence lengths across batches set `parallelism_config_sp_seq_length_is_variable=True`",
962
+ )
963
+
964
+ parallelism_config_args.add_argument(
965
+ "--parallelism_config_sp_seq_length_is_variable",
966
+ type=bool,
967
+ default=True,
968
+ help="If `True` will work with a sequence length that may change between batches, in which case `parallelism_config_sp_seq_length` value can be set to anything divisible by sp size or remain unset. If `False` then `parallelism_config_sp_seq_length` needs to match the batch's sequence length dimension. The default is `True`.",
969
+ )
970
+
971
+ parallelism_config_args.add_argument(
972
+ "--parallelism_config_sp_attn_implementation",
973
+ type=str,
974
+ default="sdpa",
975
+ help="Attention implementation to use. Can be one of 'flash_attention_2', 'flash_attention_3' or 'sdpa'. Defaults to `sdpa`.",
976
+ )
977
+
978
+ # Other arguments of the training scripts
979
+ parser.add_argument("training_script_args", nargs=argparse.REMAINDER, help="Arguments of the training script.")
980
+
981
+ if subparsers is not None:
982
+ parser.set_defaults(func=launch_command)
983
+ return parser
984
+
985
+
986
+ def simple_launcher(args):
987
+ cmd, current_env = prepare_simple_launcher_cmd_env(args)
988
+
989
+ process = subprocess.Popen(cmd, env=current_env)
990
+ process.wait()
991
+ if process.returncode != 0:
992
+ if not args.quiet:
993
+ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
994
+ else:
995
+ sys.exit(1)
996
+
997
+
998
+ def multi_gpu_launcher(args):
999
+ import torch.distributed.run as distrib_run
1000
+
1001
+ current_env = prepare_multi_gpu_env(args)
1002
+ if not check_cuda_p2p_ib_support():
1003
+ message = "Using RTX 4000 series which doesn't support faster communication speedups. Ensuring P2P and IB communications are disabled."
1004
+ warn = False
1005
+ if "NCCL_P2P_DISABLE" not in current_env:
1006
+ current_env["NCCL_P2P_DISABLE"] = "1"
1007
+ warn = True
1008
+ if "NCCL_IB_DISABLE" not in current_env:
1009
+ current_env["NCCL_IB_DISABLE"] = "1"
1010
+ warn = True
1011
+ if warn:
1012
+ logger.warning(message)
1013
+
1014
+ debug = getattr(args, "debug", False)
1015
+ args = _filter_args(
1016
+ args,
1017
+ distrib_run.get_args_parser(),
1018
+ ["--training_script", args.training_script, "--training_script_args", args.training_script_args],
1019
+ )
1020
+
1021
+ with patch_environment(**current_env):
1022
+ try:
1023
+ distrib_run.run(args)
1024
+ except Exception:
1025
+ if is_rich_available() and debug:
1026
+ console = get_console()
1027
+ console.print("\n[bold red]Using --debug, `torch.distributed` Stack Trace:[/bold red]")
1028
+ console.print_exception(suppress=[__file__], show_locals=False)
1029
+ else:
1030
+ raise
1031
+
1032
+
1033
+ def deepspeed_launcher(args):
1034
+ import torch.distributed.run as distrib_run
1035
+
1036
+ if not is_deepspeed_available():
1037
+ raise ImportError("DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source.")
1038
+ else:
1039
+ from deepspeed.launcher.runner import DEEPSPEED_ENVIRONMENT_NAME
1040
+
1041
+ cmd, current_env = prepare_deepspeed_cmd_env(args)
1042
+ if not check_cuda_p2p_ib_support():
1043
+ message = "Using RTX 4000 series which doesn't support faster communication speedups. Ensuring P2P and IB communications are disabled."
1044
+ warn = False
1045
+ if "NCCL_P2P_DISABLE" not in current_env:
1046
+ current_env["NCCL_P2P_DISABLE"] = "1"
1047
+ warn = True
1048
+ if "NCCL_IB_DISABLE" not in current_env:
1049
+ current_env["NCCL_IB_DISABLE"] = "1"
1050
+ warn = True
1051
+ if warn:
1052
+ logger.warning(message)
1053
+
1054
+ if args.num_machines > 1 and args.deepspeed_multinode_launcher != DEEPSPEED_MULTINODE_LAUNCHERS[1]:
1055
+ with open(DEEPSPEED_ENVIRONMENT_NAME, "a") as f:
1056
+ valid_env_items = convert_dict_to_env_variables(current_env)
1057
+ if len(valid_env_items) > 1:
1058
+ f.writelines(valid_env_items)
1059
+
1060
+ process = subprocess.Popen(cmd, env=current_env)
1061
+ process.wait()
1062
+ if process.returncode != 0:
1063
+ if not args.quiet:
1064
+ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
1065
+ else:
1066
+ sys.exit(1)
1067
+ else:
1068
+ debug = getattr(args, "debug", False)
1069
+ args = _filter_args(
1070
+ args,
1071
+ distrib_run.get_args_parser(),
1072
+ ["--training_script", args.training_script, "--training_script_args", args.training_script_args],
1073
+ )
1074
+ with patch_environment(**current_env):
1075
+ try:
1076
+ distrib_run.run(args)
1077
+ except Exception:
1078
+ if is_rich_available() and debug:
1079
+ console = get_console()
1080
+ console.print("\n[bold red]Using --debug, `torch.distributed` Stack Trace:[/bold red]")
1081
+ console.print_exception(suppress=[__file__], show_locals=False)
1082
+ else:
1083
+ raise
1084
+
1085
+
1086
+ def tpu_launcher(args):
1087
+ import torch_xla.distributed.xla_multiprocessing as xmp
1088
+
1089
+ if args.no_python:
1090
+ raise ValueError("--no_python cannot be used with TPU launcher")
1091
+
1092
+ args, current_env = prepare_tpu(args, {})
1093
+
1094
+ if args.module:
1095
+ mod_name = args.training_script
1096
+ else:
1097
+ # Import training_script as a module
1098
+ script_path = Path(args.training_script)
1099
+ sys.path.append(str(script_path.parent.resolve()))
1100
+ mod_name = script_path.stem
1101
+
1102
+ mod = importlib.import_module(mod_name)
1103
+ if not hasattr(mod, args.main_training_function):
1104
+ raise ValueError(
1105
+ f"Your training script should have a function named {args.main_training_function}, or you should pass a "
1106
+ "different value to `--main_training_function`."
1107
+ )
1108
+
1109
+ # Patch sys.argv
1110
+ sys.argv = [mod.__file__] + args.training_script_args
1111
+
1112
+ main_function = getattr(mod, args.main_training_function)
1113
+ with patch_environment(**current_env):
1114
+ xmp.spawn(PrepareForLaunch(main_function), args=())
1115
+
1116
+
1117
+ def tpu_pod_launcher(args):
1118
+ from torch_xla.distributed import xla_dist
1119
+
1120
+ current_env = {}
1121
+ args, current_env = prepare_tpu(args, current_env, True)
1122
+ debug = getattr(args, "debug", False)
1123
+
1124
+ training_script = args.training_script
1125
+ training_script_args = args.training_script_args
1126
+ new_args = _filter_args(
1127
+ args, xla_dist.get_args_parser(), ["--tpu", args.tpu_name, "--positional", "", "--restart-tpuvm-pod-server"]
1128
+ )
1129
+
1130
+ if args.tpu_use_sudo:
1131
+ new_cmd = ["sudo"]
1132
+ else:
1133
+ new_cmd = []
1134
+
1135
+ new_cmd += [
1136
+ "accelerate-launch",
1137
+ "--tpu",
1138
+ "--no_tpu_cluster",
1139
+ "--num_machines",
1140
+ "1",
1141
+ "--mixed_precision",
1142
+ "no",
1143
+ "--dynamo_backend",
1144
+ "no",
1145
+ "--num_processes",
1146
+ str(args.num_processes),
1147
+ "--main_training_function",
1148
+ str(args.main_training_function),
1149
+ training_script,
1150
+ ] + training_script_args
1151
+
1152
+ new_args.positional = new_cmd
1153
+ bad_flags = ""
1154
+ for arg in vars(new_args):
1155
+ if arg.startswith("docker_"):
1156
+ value = getattr(new_args, arg)
1157
+ if value != "" and value is not None:
1158
+ bad_flags += f'{arg}="{value}"\n'
1159
+ if bad_flags != "":
1160
+ raise ValueError(
1161
+ f"Docker containers are not supported for TPU pod launcher currently, please remove the following flags:\n{bad_flags}"
1162
+ )
1163
+ new_args.env = [f"{k}={v}" for k, v in current_env.items()]
1164
+ new_args.env.append("ACCELERATE_IN_TPU_POD=1")
1165
+ try:
1166
+ xla_dist.resolve_and_execute(new_args)
1167
+ except Exception:
1168
+ if is_rich_available() and debug:
1169
+ console = get_console()
1170
+ console.print("\n[bold red]Using --debug, `torch_xla.xla_dist` Stack Trace:[/bold red]")
1171
+ console.print_exception(suppress=[__file__], show_locals=False)
1172
+ else:
1173
+ raise
1174
+
1175
+
1176
+ def sagemaker_launcher(sagemaker_config: SageMakerConfig, args):
1177
+ if not is_sagemaker_available():
1178
+ raise ImportError(
1179
+ "Please install sagemaker to be able to launch training on Amazon SageMaker with `pip install accelerate[sagemaker]`"
1180
+ )
1181
+ if args.module or args.no_python:
1182
+ raise ValueError(
1183
+ "SageMaker requires a python training script file and cannot be used with --module or --no_python"
1184
+ )
1185
+
1186
+ from sagemaker.huggingface import HuggingFace
1187
+
1188
+ args, sagemaker_inputs = prepare_sagemager_args_inputs(sagemaker_config, args)
1189
+
1190
+ huggingface_estimator = HuggingFace(**args)
1191
+
1192
+ huggingface_estimator.fit(inputs=sagemaker_inputs)
1193
+ print(f"You can find your model data at: {huggingface_estimator.model_data}")
1194
+
1195
+
1196
+ def _validate_launch_command(args):
1197
+ # Sanity checks
1198
+ if sum([args.multi_gpu, args.cpu, args.tpu, args.use_deepspeed, args.use_fsdp]) > 1:
1199
+ raise ValueError(
1200
+ "You can only use one of `--cpu`, `--multi_gpu`, `--tpu`, `--use_deepspeed`, `--use_fsdp` at a time."
1201
+ )
1202
+ if args.multi_gpu and (args.num_processes is not None) and (args.num_processes < 2):
1203
+ raise ValueError("You need to use at least 2 processes to use `--multi_gpu`.")
1204
+
1205
+ if (not args.use_fsdp or args.fsdp_version == 1) and args.use_parallelism_config:
1206
+ raise ValueError("You cannot use `--use_parallelism_config` without `--use_fsdp` and `--fsdp_version=2`. ")
1207
+
1208
+ defaults = None
1209
+ warned = []
1210
+ mp_from_config_flag = False
1211
+ # Get the default from the config file.
1212
+ if args.config_file is not None or os.path.isfile(default_config_file) and not args.cpu:
1213
+ defaults = load_config_from_file(args.config_file)
1214
+ if (
1215
+ not args.multi_gpu
1216
+ and not args.tpu
1217
+ and not args.tpu_use_cluster
1218
+ and not args.use_deepspeed
1219
+ and not args.use_fsdp
1220
+ and not args.use_megatron_lm
1221
+ ):
1222
+ args.use_deepspeed = defaults.distributed_type == DistributedType.DEEPSPEED
1223
+ args.multi_gpu = (
1224
+ True
1225
+ if defaults.distributed_type
1226
+ in (
1227
+ DistributedType.MULTI_GPU,
1228
+ DistributedType.MULTI_NPU,
1229
+ DistributedType.MULTI_MLU,
1230
+ DistributedType.MULTI_SDAA,
1231
+ DistributedType.MULTI_MUSA,
1232
+ DistributedType.MULTI_XPU,
1233
+ DistributedType.MULTI_HPU,
1234
+ DistributedType.MULTI_NEURON,
1235
+ )
1236
+ else False
1237
+ )
1238
+ args.tpu = defaults.distributed_type == DistributedType.XLA
1239
+ args.use_fsdp = defaults.distributed_type == DistributedType.FSDP
1240
+ args.use_megatron_lm = defaults.distributed_type == DistributedType.MEGATRON_LM
1241
+ args.tpu_use_cluster = defaults.tpu_use_cluster if args.tpu else False
1242
+ args.use_parallelism_config = defaults.parallelism_config != {}
1243
+ if args.gpu_ids is None:
1244
+ if defaults.gpu_ids is not None:
1245
+ args.gpu_ids = defaults.gpu_ids
1246
+ else:
1247
+ args.gpu_ids = "all"
1248
+
1249
+ if args.multi_gpu and args.num_machines is None:
1250
+ args.num_machines = defaults.num_machines
1251
+
1252
+ if len(args.gpu_ids.split(",")) < 2 and (args.gpu_ids != "all") and args.multi_gpu and args.num_machines <= 1:
1253
+ raise ValueError(
1254
+ "Less than two GPU ids were configured and tried to run on on multiple GPUs. "
1255
+ "Please ensure at least two are specified for `--gpu_ids`, or use `--gpu_ids='all'`."
1256
+ )
1257
+ if defaults.compute_environment == ComputeEnvironment.LOCAL_MACHINE:
1258
+ # Update args with the defaults
1259
+ for name, attr in defaults.__dict__.items():
1260
+ if isinstance(attr, dict):
1261
+ # Copy defaults.somedict.somearg to args.somearg and
1262
+ # defaults.fsdp_config.x to args.fsdp_x
1263
+ for key, value in attr.items():
1264
+ if name == "fsdp_config" and not key.startswith("fsdp"):
1265
+ key = "fsdp_" + key
1266
+ elif name == "fp8_config" and not key.startswith("fp8"):
1267
+ key = "fp8_" + key
1268
+ if hasattr(args, "nondefault") and key not in args.nondefault:
1269
+ setattr(args, key, value)
1270
+ elif (
1271
+ name not in ["compute_environment", "mixed_precision", "distributed_type"]
1272
+ and getattr(args, name, None) is None
1273
+ ):
1274
+ # Those args are handled separately
1275
+ setattr(args, name, attr)
1276
+ if not args.debug:
1277
+ args.debug = defaults.debug
1278
+
1279
+ if not args.mixed_precision:
1280
+ if defaults.mixed_precision is None:
1281
+ args.mixed_precision = "no"
1282
+ else:
1283
+ args.mixed_precision = defaults.mixed_precision
1284
+ mp_from_config_flag = True
1285
+ else:
1286
+ native_amp = is_bf16_available(True)
1287
+ if (
1288
+ args.mixed_precision == "bf16"
1289
+ and not native_amp
1290
+ and not (args.tpu and is_torch_xla_available(check_is_tpu=True))
1291
+ ):
1292
+ raise ValueError("bf16 mixed precision requires PyTorch >= 1.10 and a supported device.")
1293
+
1294
+ # Silently set the default here
1295
+ if args.dynamo_backend is None:
1296
+ args.dynamo_backend = "no"
1297
+ if args.num_processes == -1:
1298
+ raise ValueError("You need to manually pass in `--num_processes` using this config yaml.")
1299
+ else:
1300
+ if args.num_processes is None:
1301
+ if is_xpu_available():
1302
+ args.num_processes = torch.xpu.device_count()
1303
+ elif is_mlu_available():
1304
+ args.num_processes = torch.mlu.device_count()
1305
+ elif is_sdaa_available():
1306
+ args.num_processes = torch.sdaa.device_count()
1307
+ elif is_musa_available():
1308
+ args.num_processes = torch.musa.device_count()
1309
+ elif is_npu_available():
1310
+ args.num_processes = torch.npu.device_count()
1311
+ elif is_hpu_available():
1312
+ args.num_processes = torch.hpu.device_count()
1313
+ elif is_neuron_available():
1314
+ args.num_processes = torch.neuron.device_count()
1315
+ else:
1316
+ args.num_processes = torch.cuda.device_count()
1317
+ warned.append(f"\t`--num_processes` was set to a value of `{args.num_processes}`")
1318
+ if args.debug is None:
1319
+ args.debug = False
1320
+ if (
1321
+ not args.multi_gpu
1322
+ and args.num_processes > 1
1323
+ and (
1324
+ (is_xpu_available() and torch.xpu.device_count() > 1)
1325
+ or (is_npu_available() and torch.npu.device_count() > 1)
1326
+ or (is_hpu_available() and torch.hpu.device_count() > 1)
1327
+ or (is_mlu_available() and torch.mlu.device_count() > 1)
1328
+ or (is_sdaa_available() and torch.sdaa.device_count() > 1)
1329
+ or (is_musa_available() and torch.musa.device_count() > 1)
1330
+ or (is_neuron_available() and torch.neuron.device_count() > 1)
1331
+ or (torch.cuda.is_available() and torch.cuda.device_count() > 1)
1332
+ )
1333
+ ):
1334
+ warned.append(
1335
+ "\t\tMore than one GPU was found, enabling multi-GPU training.\n"
1336
+ "\t\tIf this was unintended please pass in `--num_processes=1`."
1337
+ )
1338
+ args.multi_gpu = True
1339
+ if args.num_machines is None:
1340
+ warned.append("\t`--num_machines` was set to a value of `1`")
1341
+ args.num_machines = 1
1342
+ if args.mixed_precision is None:
1343
+ warned.append("\t`--mixed_precision` was set to a value of `'no'`")
1344
+ args.mixed_precision = "no"
1345
+ if not hasattr(args, "use_cpu"):
1346
+ args.use_cpu = args.cpu
1347
+ if args.dynamo_backend is None:
1348
+ warned.append("\t`--dynamo_backend` was set to a value of `'no'`")
1349
+ args.dynamo_backend = "no"
1350
+ if args.debug:
1351
+ logger.debug("Running script in debug mode, expect distributed operations to be slightly slower.")
1352
+
1353
+ is_aws_env_disabled = defaults is None or (
1354
+ defaults is not None and defaults.compute_environment != ComputeEnvironment.AMAZON_SAGEMAKER
1355
+ )
1356
+ if is_aws_env_disabled and args.num_cpu_threads_per_process is None:
1357
+ args.num_cpu_threads_per_process = get_int_from_env(["OMP_NUM_THREADS"], 1)
1358
+ if args.use_cpu and args.num_processes >= 1 and get_int_from_env(["OMP_NUM_THREADS"], 0) == 0:
1359
+ local_size = get_int_from_env(
1360
+ ["MPI_LOCALNRANKS", "OMPI_COMM_WORLD_LOCAL_SIZE", "MV2_COMM_WORLD_LOCAL_SIZE"],
1361
+ max(int(args.num_processes / args.num_machines), 1),
1362
+ )
1363
+ import psutil
1364
+
1365
+ threads_per_process = int(psutil.cpu_count(logical=False) / local_size)
1366
+ if threads_per_process > 1:
1367
+ args.num_cpu_threads_per_process = threads_per_process
1368
+ warned.append(
1369
+ f"\t`--num_cpu_threads_per_process` was set to `{args.num_cpu_threads_per_process}` to improve out-of-box performance when training on CPUs"
1370
+ )
1371
+
1372
+ if any(warned):
1373
+ message = "The following values were not passed to `accelerate launch` and had defaults used instead:\n"
1374
+ message += "\n".join(warned)
1375
+ message += (
1376
+ "\nTo avoid this warning pass in values for each of the problematic parameters or run `accelerate config`."
1377
+ )
1378
+ logger.warning(message)
1379
+ return args, defaults, mp_from_config_flag
1380
+
1381
+
1382
+ def launch_command(args):
1383
+ args, defaults, mp_from_config_flag = _validate_launch_command(args)
1384
+ # Use the proper launcher
1385
+ if args.use_deepspeed and not args.cpu:
1386
+ args.deepspeed_fields_from_accelerate_config = list(defaults.deepspeed_config.keys()) if defaults else []
1387
+ if mp_from_config_flag:
1388
+ args.deepspeed_fields_from_accelerate_config.append("mixed_precision")
1389
+ args.deepspeed_fields_from_accelerate_config = ",".join(args.deepspeed_fields_from_accelerate_config)
1390
+ deepspeed_launcher(args)
1391
+ elif args.use_fsdp and not args.cpu:
1392
+ multi_gpu_launcher(args)
1393
+ elif args.use_megatron_lm and not args.cpu:
1394
+ multi_gpu_launcher(args)
1395
+ elif args.multi_gpu and not args.cpu:
1396
+ multi_gpu_launcher(args)
1397
+ elif args.tpu and not args.cpu:
1398
+ if args.tpu_use_cluster:
1399
+ tpu_pod_launcher(args)
1400
+ else:
1401
+ tpu_launcher(args)
1402
+ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
1403
+ sagemaker_launcher(defaults, args)
1404
+ else:
1405
+ simple_launcher(args)
1406
+
1407
+
1408
+ def main():
1409
+ parser = launch_command_parser()
1410
+ args = parser.parse_args()
1411
+ launch_command(args)
1412
+
1413
+
1414
+ if __name__ == "__main__":
1415
+ main()
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from .selection_menu import BulletMenu
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (270 Bytes). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/cursor.cpython-312.pyc ADDED
Binary file (3.05 kB). View file
 
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/helpers.cpython-312.pyc ADDED
Binary file (2.2 kB). View file