BryanW commited on 18 days ago

Commit

27d7c98

verified ·

1 Parent(s): 03665a7

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/data_loader.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/hooks.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/inference.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/local_sgd.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/logging.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/memory_utils.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/parallelism_config.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/scheduler.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/state.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/tracking.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__init__.py +13 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/accelerate_cli.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/env.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/estimate.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/launch.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/merge.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/test.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/to_fsdp2.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/tpu.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/utils.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py +54 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__init__.py +52 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/cluster.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_args.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_utils.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/default.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/sagemaker.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/update.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/cluster.py +939 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config.py +89 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_args.py +252 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_utils.py +122 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/default.py +172 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/sagemaker.py +274 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/update.py +63 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/env.py +143 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/estimate.py +318 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/launch.py +1415 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__init__.py +14 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/__init__.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/cursor.cpython-312.pyc +0 -0
URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/helpers.cpython-312.pyc +0 -0

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.38 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc ADDED Viewed

Binary file (37.4 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc ADDED Viewed

Binary file (15.9 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/data_loader.cpython-312.pyc ADDED Viewed

Binary file (65.4 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/hooks.cpython-312.pyc ADDED Viewed

Binary file (35.4 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/inference.cpython-312.pyc ADDED Viewed

Binary file (7.55 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc ADDED Viewed

Binary file (13 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/local_sgd.cpython-312.pyc ADDED Viewed

Binary file (5.18 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/logging.cpython-312.pyc ADDED Viewed

Binary file (6.06 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/memory_utils.cpython-312.pyc ADDED Viewed

Binary file (503 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc ADDED Viewed

Binary file (11.7 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/parallelism_config.cpython-312.pyc ADDED Viewed

Binary file (20.3 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/scheduler.cpython-312.pyc ADDED Viewed

Binary file (4.68 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/state.cpython-312.pyc ADDED Viewed

Binary file (64.5 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__pycache__/tracking.cpython-312.pyc ADDED Viewed

Binary file (63.6 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (211 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/accelerate_cli.cpython-312.pyc ADDED Viewed

Binary file (1.84 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/env.cpython-312.pyc ADDED Viewed

Binary file (5.39 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/estimate.cpython-312.pyc ADDED Viewed

Binary file (14.1 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/launch.cpython-312.pyc ADDED Viewed

Binary file (56.4 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/merge.cpython-312.pyc ADDED Viewed

Binary file (2.43 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/test.cpython-312.pyc ADDED Viewed

Binary file (2.19 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/to_fsdp2.cpython-312.pyc ADDED Viewed

Binary file (6.24 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/tpu.cpython-312.pyc ADDED Viewed

Binary file (6.03 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (5.22 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py ADDED Viewed

	@@ -0,0 +1,54 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from accelerate.commands.config import get_config_parser
+from accelerate.commands.env import env_command_parser
+from accelerate.commands.estimate import estimate_command_parser
+from accelerate.commands.launch import launch_command_parser
+from accelerate.commands.merge import merge_command_parser
+from accelerate.commands.test import test_command_parser
+from accelerate.commands.to_fsdp2 import to_fsdp2_command_parser
+from accelerate.commands.tpu import tpu_command_parser
+from accelerate.commands.utils import CustomArgumentParser
+def main():
+    parser = CustomArgumentParser("Accelerate CLI tool", usage="accelerate <command> [<args>]", allow_abbrev=False)
+    subparsers = parser.add_subparsers(help="accelerate command helpers")
+    # Register commands
+    get_config_parser(subparsers=subparsers)
+    estimate_command_parser(subparsers=subparsers)
+    env_command_parser(subparsers=subparsers)
+    launch_command_parser(subparsers=subparsers)
+    merge_command_parser(subparsers=subparsers)
+    tpu_command_parser(subparsers=subparsers)
+    test_command_parser(subparsers=subparsers)
+    to_fsdp2_command_parser(subparsers=subparsers)
+    # Let's go
+    args = parser.parse_args()
+    if not hasattr(args, "func"):
+        parser.print_help()
+        exit(1)
+    # Run
+    args.func(args)
+if __name__ == "__main__":
+    main()

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from .config import config_command_parser
+from .config_args import default_config_file, load_config_from_file  # noqa: F401
+from .default import default_command_parser
+from .update import update_command_parser
+def get_config_parser(subparsers=None):
+    parent_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
+    # The main config parser
+    config_parser = config_command_parser(subparsers)
+    # The subparser to add commands to
+    subcommands = config_parser.add_subparsers(title="subcommands", dest="subcommand")
+    # Then add other parsers with the parent parser
+    default_command_parser(subcommands, parents=[parent_parser])
+    update_command_parser(subcommands, parents=[parent_parser])
+    return config_parser
+def main():
+    config_parser = get_config_parser()
+    args = config_parser.parse_args()
+    if not hasattr(args, "func"):
+        config_parser.print_help()
+        exit(1)
+    # Run
+    args.func(args)
+if __name__ == "__main__":
+    main()

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.49 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/cluster.cpython-312.pyc ADDED Viewed

Binary file (29.2 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (3.27 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_args.cpython-312.pyc ADDED Viewed

Binary file (12 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_utils.cpython-312.pyc ADDED Viewed

Binary file (3.97 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/default.cpython-312.pyc ADDED Viewed

Binary file (6.27 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/sagemaker.cpython-312.pyc ADDED Viewed

Binary file (9.5 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/update.cpython-312.pyc ADDED Viewed

Binary file (2.45 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/cluster.py ADDED Viewed

	@@ -0,0 +1,939 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from ...utils import (
+    ComputeEnvironment,
+    DistributedType,
+    is_deepspeed_available,
+    is_fp8_available,
+    is_hpu_available,
+    is_mlu_available,
+    is_mps_available,
+    is_msamp_available,
+    is_musa_available,
+    is_neuron_available,
+    is_npu_available,
+    is_sdaa_available,
+    is_torchao_available,
+    is_transformer_engine_available,
+    is_transformers_available,
+    is_xpu_available,
+)
+from ...utils.constants import (
+    DEEPSPEED_MULTINODE_LAUNCHERS,
+    FSDP2_STATE_DICT_TYPE,
+    FSDP_AUTO_WRAP_POLICY,
+    FSDP_BACKWARD_PREFETCH,
+    FSDP_SHARDING_STRATEGY,
+    FSDP_STATE_DICT_TYPE,
+    TORCH_DYNAMO_MODES,
+)
+from .config_args import ClusterConfig
+from .config_utils import (
+    DYNAMO_BACKENDS,
+    _ask_field,
+    _ask_options,
+    _convert_distributed_mode,
+    _convert_dynamo_backend,
+    _convert_fp8_backend,
+    _convert_mixed_precision,
+    _convert_yes_no_to_bool,
+)
+def get_cluster_input():
+    distributed_type = _ask_options(
+        "Which type of machine are you using?",
+        [
+            "No distributed training",
+            "multi-CPU",
+            "multi-XPU",
+            "multi-HPU",
+            "multi-GPU",
+            "multi-NPU",
+            "multi-MLU",
+            "multi-SDAA",
+            "multi-MUSA",
+            "multi-NEURON",
+            "TPU",
+        ],
+        _convert_distributed_mode,
+    )
+    machine_rank = 0
+    num_machines = 1
+    num_processes = 1
+    gpu_ids = None
+    main_process_ip = None
+    main_process_port = None
+    rdzv_backend = "static"
+    same_network = True
+    debug = False
+    if distributed_type in [
+        DistributedType.MULTI_GPU,
+        DistributedType.MULTI_MLU,
+        DistributedType.MULTI_SDAA,
+        DistributedType.MULTI_MUSA,
+        DistributedType.MULTI_NPU,
+        DistributedType.MULTI_XPU,
+        DistributedType.MULTI_CPU,
+        DistributedType.MULTI_HPU,
+        DistributedType.MULTI_NEURON,
+    ]:
+        num_machines = _ask_field(
+            "How many different machines will you use (use more than 1 for multi-node training)? [1]: ",
+            int,
+            default=1,
+        )
+        if num_machines > 1:
+            machine_rank = _ask_options(
+                "What is the rank of this machine?",
+                list(range(num_machines)),
+                int,
+            )
+            main_process_ip = _ask_field(
+                "What is the IP address of the machine that will host the main process? ",
+            )
+            main_process_port = _ask_field(
+                "What is the port you will use to communicate with the main process? ",
+                int,
+            )
+            same_network = _ask_field(
+                "Are all the machines on the same local network? Answer `no` if nodes are on the cloud and/or on different network hosts [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            if not same_network:
+                rdzv_backend = _ask_field(
+                    "What rendezvous backend will you use? ('static', 'c10d', ...): ", default="static"
+                )
+        debug = _ask_field(
+            "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    if distributed_type == DistributedType.NO:
+        use_cpu = _ask_field(
+            "Do you want to run your training on CPU only (even if a GPU / Apple Silicon / Ascend NPU device is available)? [yes/NO]:",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    elif distributed_type == DistributedType.MULTI_CPU:
+        use_cpu = True
+    else:
+        use_cpu = False
+    mpirun_config = {}
+    if use_cpu:
+        if distributed_type == DistributedType.MULTI_CPU:
+            use_mpirun = _ask_field(
+                "Do you want accelerate to launch mpirun? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if use_mpirun:
+                mpirun_hostfile = _ask_field(
+                    "Please enter the path to the hostfile to use with mpirun [~/hostfile]: ",
+                    str,
+                    default="~/hostfile",
+                )
+                mpirun_config["mpirun_hostfile"] = os.path.expanduser(mpirun_hostfile.strip())
+    dynamo_config = {}
+    use_dynamo = _ask_field(
+        "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    if use_dynamo:
+        prefix = "dynamo_"
+        dynamo_config[prefix + "backend"] = _ask_options(
+            "Which dynamo backend would you like to use?",
+            [x.lower() for x in DYNAMO_BACKENDS],
+            _convert_dynamo_backend,
+            default=2,
+        )
+        use_custom_options = _ask_field(
+            "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_custom_options:
+            dynamo_config[prefix + "mode"] = _ask_options(
+                "Which mode do you want to use?",
+                TORCH_DYNAMO_MODES,
+                lambda x: TORCH_DYNAMO_MODES[int(x)],
+                default=0,
+            )
+            dynamo_config[prefix + "use_fullgraph"] = _ask_field(
+                "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_dynamic"] = _ask_field(
+                "Do you want to enable dynamic shape tracing? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
+                "Do you want to enable regional compilation? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+    use_mps = not use_cpu and is_mps_available()
+    deepspeed_config = {}
+    if (
+        distributed_type
+        in [
+            DistributedType.MULTI_GPU,
+            DistributedType.MULTI_XPU,
+            DistributedType.MULTI_HPU,
+            DistributedType.MULTI_NPU,
+            DistributedType.MULTI_MLU,
+            DistributedType.MULTI_SDAA,
+            DistributedType.MULTI_MUSA,
+            DistributedType.MULTI_NEURON,
+            DistributedType.NO,
+        ]
+        and not use_mps
+    ):
+        use_deepspeed = _ask_field(
+            "Do you want to use DeepSpeed? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_deepspeed:
+            if distributed_type is DistributedType.MULTI_NEURON:
+                raise RuntimeError("DeepSpeed is not supported on Neuron devices.")
+            distributed_type = DistributedType.DEEPSPEED
+            assert is_deepspeed_available(), (
+                "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
+            )
+        if distributed_type == DistributedType.DEEPSPEED:
+            use_deepspeed_config = _ask_field(
+                "Do you want to specify a json file to a DeepSpeed config? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if use_deepspeed_config:
+                deepspeed_config["deepspeed_config_file"] = _ask_field(
+                    "Please enter the path to the json DeepSpeed config file: ",
+                    str,
+                    default="none",
+                )
+            else:
+                deepspeed_config["zero_stage"] = _ask_options(
+                    "What should be your DeepSpeed's ZeRO optimization stage?",
+                    [0, 1, 2, 3],
+                    int,
+                    default=2,
+                )
+                deepspeed_devices = ["none", "cpu", "nvme"]
+                if deepspeed_config["zero_stage"] >= 2:
+                    deepspeed_config["offload_optimizer_device"] = _ask_options(
+                        "Where to offload optimizer states?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
+                    )
+                    deepspeed_config["offload_param_device"] = _ask_options(
+                        "Where to offload parameters?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
+                    )
+                    if deepspeed_config["offload_param_device"] == "nvme":
+                        deepspeed_config["offload_param_nvme_path"] = _ask_field(
+                            "Nvme Path to offload parameters?",
+                            str,
+                            default="/nvme",
+                        )
+                    if deepspeed_config["offload_optimizer_device"] == "nvme":
+                        deepspeed_config["offload_optimizer_nvme_path"] = _ask_field(
+                            "Nvme Path to offload optimizer states?",
+                            str,
+                            default="/nvme",
+                        )
+                deepspeed_config["gradient_accumulation_steps"] = _ask_field(
+                    "How many gradient accumulation steps you're passing in your script? [1]: ",
+                    int,
+                    default=1,
+                )
+                use_gradient_clipping = _ask_field(
+                    "Do you want to use gradient clipping? [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+                if use_gradient_clipping:
+                    deepspeed_config["gradient_clipping"] = _ask_field(
+                        "What is the gradient clipping value? [1.0]: ",
+                        float,
+                        default=1.0,
+                    )
+                if deepspeed_config["zero_stage"] == 3:
+                    deepspeed_config["zero3_save_16bit_model"] = _ask_field(
+                        "Do you want to save 16-bit model weights when using ZeRO Stage-3? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                        error_message="Please enter yes or no.",
+                    )
+            deepspeed_config["zero3_init_flag"] = _ask_field(
+                "Do you want to enable `deepspeed.zero.Init` when using ZeRO Stage-3 for constructing massive models? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if deepspeed_config["zero3_init_flag"]:
+                if not is_transformers_available():
+                    raise Exception(
+                        "When `zero3_init_flag` is set, it requires Transformers to be installed. "
+                        "Please run `pip3 install transformers`."
+                    )
+            use_moe = _ask_field(
+                "Do you want to enable Mixture-of-Experts training (MoE)? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if use_moe:
+                deepspeed_config["deepspeed_moe_layer_cls_names"] = _ask_field(
+                    "Specify the comma-separated list of transformers MoE layer class names (case-sensitive), e.g : "
+                    " `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ... : ",
+                    str,
+                )
+            if num_machines > 1:
+                launcher_query = "Which Type of launcher do you want to use?"
+                deepspeed_config["deepspeed_multinode_launcher"] = _ask_options(
+                    launcher_query,
+                    DEEPSPEED_MULTINODE_LAUNCHERS,
+                    lambda x: DEEPSPEED_MULTINODE_LAUNCHERS[int(x)],
+                )
+                if deepspeed_config["deepspeed_multinode_launcher"] != DEEPSPEED_MULTINODE_LAUNCHERS[1]:
+                    deepspeed_config["deepspeed_hostfile"] = _ask_field(
+                        "DeepSpeed configures multi-node compute resources with hostfile. "
+                        "Each row is of the format `hostname slots=[num_gpus]`, e.g., `localhost slots=2`; "
+                        "for more information please refer official [documentation]"
+                        "(https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node). "
+                        "Please specify the location of hostfile: ",
+                        str,
+                    )
+                    is_exclusion_filter = _ask_field(
+                        "Do you want to specify exclusion filter string? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                        error_message="Please enter yes or no.",
+                    )
+                    if is_exclusion_filter:
+                        deepspeed_config["deepspeed_exclusion_filter"] = _ask_field(
+                            "DeepSpeed exclusion filter string: ",
+                            str,
+                        )
+                    is_inclusion_filter = _ask_field(
+                        "Do you want to specify inclusion filter string? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                        error_message="Please enter yes or no.",
+                    )
+                    if is_inclusion_filter:
+                        deepspeed_config["deepspeed_inclusion_filter"] = _ask_field(
+                            "DeepSpeed inclusion filter string: ",
+                            str,
+                        )
+    fsdp_config = {}
+    if distributed_type in [
+        DistributedType.MULTI_GPU,
+        DistributedType.MULTI_NPU,
+        DistributedType.MULTI_MLU,
+        DistributedType.MULTI_SDAA,
+        DistributedType.MULTI_MUSA,
+        DistributedType.MULTI_XPU,
+        DistributedType.MULTI_HPU,
+        DistributedType.MULTI_NEURON,
+    ]:
+        use_fsdp = _ask_field(
+            "Do you want to use FullyShardedDataParallel? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_fsdp:
+            if distributed_type is DistributedType.MULTI_NEURON:
+                raise NotImplementedError("FSDP is not currently supported on Neuron devices.")
+            distributed_type = DistributedType.FSDP
+        if distributed_type == DistributedType.FSDP:
+            fsdp_config["fsdp_version"] = _ask_options(
+                "What should be your FSDP version? [2]: ",
+                [1, 2],
+                lambda x: int(x) + 1,
+                default=1,
+            )
+            fsdp_version = fsdp_config["fsdp_version"]  # extract to a variable to simplify usage later
+            if fsdp_version == 1:
+                sharding_strategy_query = "What should be your sharding strategy?"
+                fsdp_config["fsdp_reshard_after_forward"] = _ask_options(
+                    sharding_strategy_query,
+                    FSDP_SHARDING_STRATEGY,
+                    lambda x: FSDP_SHARDING_STRATEGY[int(x)],
+                )
+            else:
+                fsdp_config["fsdp_reshard_after_forward"] = _ask_field(
+                    "Do you want to enable resharding after forward? [YES/no]: ",
+                    _convert_yes_no_to_bool,
+                    default=True,
+                    error_message="Please enter yes or no.",
+                )
+            fsdp_config["fsdp_offload_params"] = _ask_field(
+                "Do you want to offload parameters and gradients to CPU? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            fsdp_wrap_query = "What should be your auto wrap policy?"
+            fsdp_config["fsdp_auto_wrap_policy"] = _ask_options(
+                fsdp_wrap_query,
+                FSDP_AUTO_WRAP_POLICY,
+                lambda x: FSDP_AUTO_WRAP_POLICY[int(x)],
+            )
+            if fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[0]:
+                use_no_split_modules = _ask_field(
+                    "Do you want to use the model's `_no_split_modules` to wrap. Only applicable for 🤗 Transformers [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+                if not use_no_split_modules:
+                    fsdp_config["fsdp_transformer_layer_cls_to_wrap"] = _ask_field(
+                        "Specify the comma-separated list of transformer layer class names (case-sensitive) to wrap ,e.g, :"
+                        "`BertLayer`, `GPTJBlock`, `T5Block`, `BertLayer,BertEmbeddings,BertSelfOutput` ...? : ",
+                        str,
+                    )
+            elif fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[1]:
+                fsdp_config["fsdp_min_num_params"] = _ask_field(
+                    "What should be your FSDP's minimum number of parameters for Default Auto Wrapping Policy? [1e8]: ",
+                    int,
+                    default=100000000,
+                )
+            # Removed in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                fsdp_backward_prefetch_query = "What should be your FSDP's backward prefetch policy?"
+                fsdp_config["fsdp_backward_prefetch"] = _ask_options(
+                    fsdp_backward_prefetch_query,
+                    FSDP_BACKWARD_PREFETCH,
+                    lambda x: FSDP_BACKWARD_PREFETCH[int(x)],
+                )
+            fsdp_state_dict_type_query = "What should be your FSDP's state dict type?"
+            fsdp_config["fsdp_state_dict_type"] = _ask_options(
+                fsdp_state_dict_type_query,
+                FSDP_STATE_DICT_TYPE if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE,
+                lambda x: FSDP_STATE_DICT_TYPE[int(x)] if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE[int(x)],
+                default=0,
+            )
+            # Not implemented in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                fsdp_config["fsdp_forward_prefetch"] = _ask_field(
+                    "Do you want to enable FSDP's forward prefetch policy? [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+            # Obsolete in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                fsdp_config["fsdp_use_orig_params"] = _ask_field(
+                    "Do you want to enable FSDP's `use_orig_params` feature? [YES/no]: ",
+                    _convert_yes_no_to_bool,
+                    default=True,
+                    error_message="Please enter yes or no.",
+                )
+            fsdp_config["fsdp_cpu_ram_efficient_loading"] = _ask_field(
+                "Do you want to enable CPU RAM efficient model loading? Only applicable for 🤗 Transformers models. [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            # Obsolete in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                if fsdp_config["fsdp_cpu_ram_efficient_loading"]:
+                    fsdp_config["fsdp_sync_module_states"] = True
+                else:
+                    fsdp_config["fsdp_sync_module_states"] = _ask_field(
+                        "Do you want each individually wrapped FSDP unit to broadcast module parameters from rank 0 at the start? [YES/no]: ",
+                        _convert_yes_no_to_bool,
+                        default=True,
+                        error_message="Please enter yes or no.",
+                    )
+            fsdp_config["fsdp_activation_checkpointing"] = _ask_field(
+                "Do you want to enable FSDP activation checkpointing? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+    parallelism_config = {}
+    if fsdp_config.get("fsdp_version", 1) == 2:
+        use_parallelism_config = _ask_field(
+            "Do you want to use the parallelism config? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_parallelism_config:
+            prefix = "parallelism_config_"
+            parallelism_config[prefix + "dp_replicate_size"] = _ask_field(
+                "What is the data parallelism replicate size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            parallelism_config[prefix + "dp_shard_size"] = _ask_field(
+                "What is the FSDP shard size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            parallelism_config[prefix + "tp_size"] = _ask_field(
+                "What is the tensor parallelism size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            parallelism_config[prefix + "cp_size"] = _ask_field(
+                "What is the context parallelism size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            if parallelism_config[prefix + "cp_size"] > 1:
+                parallelism_config[prefix + "cp_comm_strategy"] = _ask_options(
+                    "What is the compute parallelism communication strategy?",
+                    ["allgather", "alltoall"],
+                    lambda x: ["allgather", "alltoall"][int(x)],
+                    default=0,
+                )
+    megatron_lm_config = {}
+    if distributed_type in [DistributedType.MULTI_GPU]:
+        use_megatron_lm = _ask_field(
+            "Do you want to use Megatron-LM ? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_megatron_lm:
+            distributed_type = DistributedType.MEGATRON_LM
+        if distributed_type == DistributedType.MEGATRON_LM:
+            prefix = "megatron_lm_"
+            megatron_lm_config[prefix + "tp_degree"] = _ask_field(
+                "What is the Tensor Parallelism degree/size? [1]:",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            if megatron_lm_config[prefix + "tp_degree"] > 1:
+                megatron_lm_config[prefix + "sequence_parallelism"] = _ask_field(
+                    "Do you want to enable Sequence Parallelism? [YES/no]: ",
+                    _convert_yes_no_to_bool,
+                    default=True,
+                    error_message="Please enter yes or no.",
+                )
+            megatron_lm_config[prefix + "pp_degree"] = _ask_field(
+                "What is the Pipeline Parallelism degree/size? [1]:",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            if megatron_lm_config[prefix + "pp_degree"] > 1:
+                megatron_lm_config[prefix + "num_micro_batches"] = _ask_field(
+                    "What is the number of micro-batches? [1]:",
+                    int,
+                    default=1,
+                    error_message="Please enter an integer.",
+                )
+            megatron_lm_config[prefix + "recompute_activations"] = _ask_field(
+                "Do you want to enable selective activation recomputation? [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            megatron_lm_config[prefix + "use_distributed_optimizer"] = _ask_field(
+                "Do you want to use distributed optimizer "
+                "which shards optimizer state and gradients across data parallel ranks? [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            megatron_lm_config[prefix + "gradient_clipping"] = _ask_field(
+                "What is the gradient clipping value based on global L2 Norm (0 to disable)? [1.0]: ",
+                float,
+                default=1.0,
+            )
+    # TPU specific defaults
+    tpu_commands = None
+    tpu_command_file = None
+    tpu_downcast_bf16 = "no"
+    tpu_env = []
+    tpu_name = None
+    tpu_vm = None
+    tpu_zone = None
+    tpu_use_sudo = False
+    tpu_use_cluster = False
+    if distributed_type in [
+        DistributedType.MULTI_CPU,
+        DistributedType.MULTI_XPU,
+        DistributedType.MULTI_HPU,
+        DistributedType.MULTI_GPU,
+        DistributedType.MULTI_MLU,
+        DistributedType.MULTI_SDAA,
+        DistributedType.MULTI_MUSA,
+        DistributedType.MULTI_NPU,
+        DistributedType.MULTI_NEURON,
+        DistributedType.XLA,
+    ]:
+        machine_type = str(distributed_type).split(".")[1].replace("MULTI_", "")
+        if machine_type in ["TPU", "NEURON"]:
+            machine_type += " cores"
+        elif machine_type == "CPU":
+            machine_type = "processes"
+        else:
+            machine_type += "(s)"
+        num_processes = _ask_field(
+            f"How many {machine_type} should be used for distributed training? [1]:",
+            int,
+            default=1,
+            error_message="Please enter an integer.",
+        )
+    elif distributed_type in [DistributedType.FSDP, DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM]:
+        num_processes = _ask_field(
+            "How many GPU(s) should be used for distributed training? [1]:",
+            int,
+            default=1,
+            error_message="Please enter an integer.",
+        )
+    else:
+        num_processes = 1
+    if (distributed_type == DistributedType.MULTI_GPU) and (num_machines == 1) and (num_processes == 1):
+        raise ValueError(
+            f"Specified distributed type {distributed_type} but only using 1 GPU on a single machine. Please select `No distributed training` for the type of machine you are using."
+        )
+    if (
+        distributed_type
+        in [
+            DistributedType.MULTI_GPU,
+            DistributedType.MULTI_MLU,
+            DistributedType.MULTI_SDAA,
+            DistributedType.MULTI_MUSA,
+            DistributedType.MULTI_NPU,
+            DistributedType.MULTI_XPU,
+            DistributedType.MULTI_HPU,
+            DistributedType.MULTI_NEURON,
+            DistributedType.NO,
+        ]
+        and not use_cpu
+        and not use_mps
+    ):
+        if is_npu_available():
+            machine_type = "NPU(s)"
+        elif is_mlu_available():
+            machine_type = "MLU(s)"
+        elif is_sdaa_available():
+            machine_type = "SDAA(s)"
+        elif is_musa_available():
+            machine_type = "MUSA(s)"
+        elif is_xpu_available():
+            machine_type = "XPU(s)"
+        elif is_hpu_available():
+            machine_type = "HPU(s)"
+        elif is_neuron_available():
+            machine_type = "Neuron cores"
+        else:
+            machine_type = "GPU(s)"
+        gpu_ids = _ask_field(
+            f"What {machine_type} (by id) should be used for training on this machine as a comma-separated list? [all]:",
+            default="all",
+        )
+    # CPU affinity is only supported on NVIDIA hardware for now
+    enable_cpu_affinity = False
+    if distributed_type in (DistributedType.NO, DistributedType.MULTI_GPU) and not use_cpu and not use_mps:
+        enable_cpu_affinity = _ask_field(
+            "Would you like to enable numa efficiency? (Currently only supported on NVIDIA hardware). [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    fp8_config = None
+    if distributed_type == DistributedType.XLA:
+        mixed_precision = "no"
+        main_training_function = _ask_field(
+            "What is the name of the function in your script that should be launched in all parallel scripts? [main]: ",
+            default="main",
+        )
+        tpu_use_cluster = _ask_field(
+            "Are you using a TPU cluster? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if tpu_use_cluster:
+            tpu_name = _ask_field(
+                "What is the name of your TPU cluster? ",
+                default=None,
+                error_message="Please enter the name of your TPU cluster.",
+            )
+            tpu_zone = _ask_field(
+                "What is the zone of your TPU cluster? ",
+                default=None,
+                error_message="Please enter the zone of your TPU cluster.",
+            )
+            tpu_use_sudo = _ask_field(
+                "To run a python script in a TPU pod, should `sudo` be used? [yes/NO]: ",
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            run_commands = _ask_field(
+                "Do you have code you wish to run on startup in each pod? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if run_commands:
+                use_command_file = _ask_field(
+                    "Is this code located in a bash script? [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+                if use_command_file:
+                    tpu_command_file = _ask_field(
+                        "What is the path to your bash script? ",
+                        default=None,
+                        error_message="Please enter the path to your bash script.",
+                    )
+                    tpu_command_file = os.path.abspath(tpu_command_file)
+                else:
+                    print("Please enter each command separately you wish to run on startup in each pod.")
+                    tpu_commands = []
+                    another_command = True
+                    while another_command:
+                        tpu_commands.append(
+                            _ask_field(
+                                "Please enter a single command to be ran ",
+                                default=None,
+                                error_message="Please enter the commands you wish to run on startup in each pod as a single string.",
+                            )
+                        )
+                        another_command = _ask_field(
+                            "Do you wish to add another command? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                            error_message="Please enter yes or no.",
+                        )
+            tpu_vm = _ask_field(
+                "If not using an instance group, what are the names of the Compute VM instances to be used, separated by a comma: ",
+                default="",
+            ).split(",")
+            tpu_env = _ask_field(
+                "What environment variables do you wish to set in each pod, separated by a comma: ",
+                default="",
+            ).split(",")
+    else:
+        main_training_function = "main"
+        if distributed_type == DistributedType.DEEPSPEED and use_deepspeed_config:
+            mixed_precision = None
+        else:
+            mixed_precision = _ask_options(
+                "Do you wish to use mixed precision?",
+                ["no", "fp16", "bf16", "fp8"],
+                _convert_mixed_precision,
+            )
+            if mixed_precision == "fp8":
+                if not is_fp8_available():
+                    raise ValueError(
+                        "FP8 (either torchao, Transformer Engine or MSAMP) is not installed on this machine."
+                    )
+                fp8_config = {}
+                fp8_config["backend"] = _ask_options(
+                    "Which FP8 backend do you want to use?",
+                    ["ao", "te", "msamp"],
+                    _convert_fp8_backend,
+                )
+                if fp8_config["backend"] == "TE":
+                    if not is_transformer_engine_available():
+                        raise ValueError("TransformersEngine was selected, but it is not installed on this machine.")
+                    fp8_config["use_autocast_during_eval"] = _ask_field(
+                        "Do you want to use FP8 autocast during eval mode? Generally better metrics are found when this is disabled [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                    )
+                    fp8_config["margin"] = _ask_field(
+                        "What margin should be used for gradient scaling? [0]: ",
+                        int,
+                        default=0,
+                    )
+                    fp8_config["interval"] = _ask_field(
+                        "What interval should be used for for how often the scaling factor is recomputed? [1]: ",
+                        int,
+                        default=1,
+                    )
+                    fp8_config["fp8_format"] = _ask_options(
+                        "Which weight format should be used?",
+                        ["HYBRID", "E4M3", "E5M2"],
+                        lambda i: ["HYBRID", "E4M3", "E5M2"][i],
+                        default=0,
+                    )
+                    fp8_config["amax_history_length"] = _ask_field(
+                        "What length of history should be used for the amax scaling factor computation? [1024]: ",
+                        int,
+                        default=1024,
+                    )
+                    fp8_config["amax_compute_algorithm"] = _ask_options(
+                        "Which algorithm should be used for the amax scaling factor computation?",
+                        ["max", "most_recent"],
+                        lambda x: "max" if x == 0 else "most_recent",
+                        default=0,
+                    )
+                    fp8_config["override_linear_precision"] = _ask_field(
+                        "Do you want to to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                    )
+                    if fp8_config["override_linear_precision"]:
+                        fprop = _ask_field(
+                            "Should `fprop` be executed in higher precision? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                        )
+                        dgrad = _ask_field(
+                            "Should `dgrad` be executed in higher precision? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                        )
+                        wgrad = _ask_field(
+                            "Should `wgrad` be executed in higher precision? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                        )
+                        fp8_config["override_linear_precision"] = (fprop, dgrad, wgrad)
+                    else:
+                        fp8_config["override_linear_precision"] = (False, False, False)
+                elif fp8_config["backend"] == "MSAMP":
+                    if not is_msamp_available():
+                        raise ValueError("MSAMP was selected, but it is not installed on this machine.")
+                    fp8_config["optimization_level"] = _ask_options(
+                        "Which optimization level should be used?",
+                        ["O1", "O2"],
+                        lambda x: "O1" if x == 0 else "O2",
+                        default=1,
+                    )
+                elif fp8_config["backend"] == "AO":
+                    if not is_torchao_available():
+                        raise ValueError("torchao was selected, but it is not installed on this machine.")
+                    fp8_config["enable_fsdp_float8_all_gather"] = _ask_field(
+                        "Do you want to enable FSDP2 float8 all gather? This is recommended for better performance if using FSDP2. [YES/no]: ",
+                        _convert_yes_no_to_bool,
+                        default=True,
+                    )
+                    fp8_config["pad_inner_dim"] = _ask_field(
+                        "Do you want to pad the inner dimension of weight matrices before float8 matmuls? This is required for _scaled_mm which has strict alignment requirements. Note: padding may cause memory spikes. [YES/no]: ",
+                        _convert_yes_no_to_bool,
+                        default=True,
+                    )
+    if use_dynamo and mixed_precision == "no" and not use_cpu:
+        print(
+            "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
+        )
+    if distributed_type == DistributedType.XLA and mixed_precision == "bf16":
+        tpu_downcast_bf16 = _ask_field(
+            "Should `torch.float` be cast as `bfloat16` and `torch.double` remain `float32` on TPUs?", default="no"
+        )
+    return ClusterConfig(
+        compute_environment=ComputeEnvironment.LOCAL_MACHINE,
+        distributed_type=distributed_type,
+        num_processes=num_processes,
+        gpu_ids=gpu_ids,
+        mixed_precision=mixed_precision,
+        downcast_bf16=tpu_downcast_bf16,
+        machine_rank=machine_rank,
+        num_machines=num_machines,
+        main_process_ip=main_process_ip,
+        main_process_port=main_process_port,
+        main_training_function=main_training_function,
+        fp8_config=fp8_config,
+        deepspeed_config=deepspeed_config,
+        fsdp_config=fsdp_config,
+        parallelism_config=parallelism_config,
+        megatron_lm_config=megatron_lm_config,
+        mpirun_config=mpirun_config,
+        use_cpu=use_cpu,
+        rdzv_backend=rdzv_backend,
+        same_network=same_network,
+        commands=tpu_commands,
+        command_file=tpu_command_file,
+        tpu_env=tpu_env,
+        tpu_name=tpu_name,
+        tpu_vm=tpu_vm,
+        tpu_zone=tpu_zone,
+        tpu_use_sudo=tpu_use_sudo,
+        tpu_use_cluster=tpu_use_cluster,
+        dynamo_config=dynamo_config,
+        debug=debug,
+        enable_cpu_affinity=enable_cpu_affinity,
+    )

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+from accelerate.utils import ComputeEnvironment
+from .cluster import get_cluster_input
+from .config_args import cache_dir, default_config_file, default_yaml_config_file, load_config_from_file  # noqa: F401
+from .config_utils import _ask_field, _ask_options, _convert_compute_environment  # noqa: F401
+from .sagemaker import get_sagemaker_input
+description = "Launches a series of prompts to create and save a `default_config.yaml` configuration file for your training system. Should always be ran first on your machine"
+def get_user_input():
+    compute_environment = _ask_options(
+        "In which compute environment are you running?",
+        ["This machine", "AWS (Amazon SageMaker)"],
+        _convert_compute_environment,
+    )
+    if compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
+        config = get_sagemaker_input()
+    else:
+        config = get_cluster_input()
+    return config
+def config_command_parser(subparsers=None):
+    if subparsers is not None:
+        parser = subparsers.add_parser("config", description=description)
+    else:
+        parser = argparse.ArgumentParser("Accelerate config command", description=description)
+    parser.add_argument(
+        "--config_file",
+        default=None,
+        help=(
+            "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
+            "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
+            "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
+            "with 'huggingface'."
+        ),
+    )
+    if subparsers is not None:
+        parser.set_defaults(func=config_command)
+    return parser
+def config_command(args):
+    config = get_user_input()
+    if args.config_file is not None:
+        config_file = args.config_file
+    else:
+        if not os.path.isdir(cache_dir):
+            os.makedirs(cache_dir)
+        config_file = default_yaml_config_file
+    if config_file.endswith(".json"):
+        config.to_json_file(config_file)
+    else:
+        config.to_yaml_file(config_file)
+    print(f"accelerate configuration saved at {config_file}")
+def main():
+    parser = config_command_parser()
+    args = parser.parse_args()
+    config_command(args)
+if __name__ == "__main__":
+    main()

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_args.py ADDED Viewed

	@@ -0,0 +1,252 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional, Union
+import yaml
+from ...utils import ComputeEnvironment, DistributedType, SageMakerDistributedType
+from ...utils.constants import SAGEMAKER_PYTHON_VERSION, SAGEMAKER_PYTORCH_VERSION, SAGEMAKER_TRANSFORMERS_VERSION
+hf_cache_home = os.path.expanduser(
+    os.environ.get("HF_HOME", os.path.join(os.environ.get("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
+)
+cache_dir = os.path.join(hf_cache_home, "accelerate")
+default_json_config_file = os.path.join(cache_dir, "default_config.yaml")
+default_yaml_config_file = os.path.join(cache_dir, "default_config.yaml")
+# For backward compatibility: the default config is the json one if it's the only existing file.
+if os.path.isfile(default_yaml_config_file) or not os.path.isfile(default_json_config_file):
+    default_config_file = default_yaml_config_file
+else:
+    default_config_file = default_json_config_file
+def load_config_from_file(config_file):
+    if config_file is not None:
+        if not os.path.isfile(config_file):
+            raise FileNotFoundError(
+                f"The passed configuration file `{config_file}` does not exist. "
+                "Please pass an existing file to `accelerate launch`, or use the default one "
+                "created through `accelerate config` and run `accelerate launch` "
+                "without the `--config_file` argument."
+            )
+    else:
+        config_file = default_config_file
+    with open(config_file, encoding="utf-8") as f:
+        if config_file.endswith(".json"):
+            if (
+                json.load(f).get("compute_environment", ComputeEnvironment.LOCAL_MACHINE)
+                == ComputeEnvironment.LOCAL_MACHINE
+            ):
+                config_class = ClusterConfig
+            else:
+                config_class = SageMakerConfig
+            return config_class.from_json_file(json_file=config_file)
+        else:
+            if (
+                yaml.safe_load(f).get("compute_environment", ComputeEnvironment.LOCAL_MACHINE)
+                == ComputeEnvironment.LOCAL_MACHINE
+            ):
+                config_class = ClusterConfig
+            else:
+                config_class = SageMakerConfig
+            return config_class.from_yaml_file(yaml_file=config_file)
+@dataclass
+class BaseConfig:
+    compute_environment: ComputeEnvironment
+    distributed_type: Union[DistributedType, SageMakerDistributedType]
+    mixed_precision: str
+    use_cpu: bool
+    debug: bool
+    def to_dict(self):
+        result = self.__dict__
+        # For serialization, it's best to convert Enums to strings (or their underlying value type).
+        def _convert_enums(value):
+            if isinstance(value, Enum):
+                return value.value
+            if isinstance(value, dict):
+                if not bool(value):
+                    return None
+                for key1, value1 in value.items():
+                    value[key1] = _convert_enums(value1)
+            return value
+        for key, value in result.items():
+            result[key] = _convert_enums(value)
+        result = {k: v for k, v in result.items() if v is not None}
+        return result
+    @staticmethod
+    def process_config(config_dict):
+        """
+        Processes `config_dict` and sets default values for any missing keys
+        """
+        if "compute_environment" not in config_dict:
+            config_dict["compute_environment"] = ComputeEnvironment.LOCAL_MACHINE
+        if "distributed_type" not in config_dict:
+            raise ValueError("A `distributed_type` must be specified in the config file.")
+        if "num_processes" not in config_dict and config_dict["distributed_type"] == DistributedType.NO:
+            config_dict["num_processes"] = 1
+        if "mixed_precision" not in config_dict:
+            config_dict["mixed_precision"] = "fp16" if ("fp16" in config_dict and config_dict["fp16"]) else None
+        if "fp16" in config_dict:  # Convert the config to the new format.
+            del config_dict["fp16"]
+        if "dynamo_backend" in config_dict:  # Convert the config to the new format.
+            dynamo_backend = config_dict.pop("dynamo_backend")
+            config_dict["dynamo_config"] = {} if dynamo_backend == "NO" else {"dynamo_backend": dynamo_backend}
+        if "use_cpu" not in config_dict:
+            config_dict["use_cpu"] = False
+        if "debug" not in config_dict:
+            config_dict["debug"] = False
+        if "enable_cpu_affinity" not in config_dict:
+            config_dict["enable_cpu_affinity"] = False
+        return config_dict
+    @classmethod
+    def from_json_file(cls, json_file=None):
+        json_file = default_json_config_file if json_file is None else json_file
+        with open(json_file, encoding="utf-8") as f:
+            config_dict = json.load(f)
+        config_dict = cls.process_config(config_dict)
+        extra_keys = sorted(set(config_dict.keys()) - set(cls.__dataclass_fields__.keys()))
+        if len(extra_keys) > 0:
+            raise ValueError(
+                f"The config file at {json_file} had unknown keys ({extra_keys}), please try upgrading your `accelerate`"
+                " version or fix (and potentially remove) these keys from your config file."
+            )
+        return cls(**config_dict)
+    def to_json_file(self, json_file):
+        with open(json_file, "w", encoding="utf-8") as f:
+            content = json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+            f.write(content)
+    @classmethod
+    def from_yaml_file(cls, yaml_file=None):
+        yaml_file = default_yaml_config_file if yaml_file is None else yaml_file
+        with open(yaml_file, encoding="utf-8") as f:
+            config_dict = yaml.safe_load(f)
+        config_dict = cls.process_config(config_dict)
+        extra_keys = sorted(set(config_dict.keys()) - set(cls.__dataclass_fields__.keys()))
+        if len(extra_keys) > 0:
+            raise ValueError(
+                f"The config file at {yaml_file} had unknown keys ({extra_keys}), please try upgrading your `accelerate`"
+                " version or fix (and potentially remove) these keys from your config file."
+            )
+        return cls(**config_dict)
+    def to_yaml_file(self, yaml_file):
+        with open(yaml_file, "w", encoding="utf-8") as f:
+            yaml.safe_dump(self.to_dict(), f)
+    def __post_init__(self):
+        if isinstance(self.compute_environment, str):
+            self.compute_environment = ComputeEnvironment(self.compute_environment)
+        if isinstance(self.distributed_type, str):
+            if self.compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
+                self.distributed_type = SageMakerDistributedType(self.distributed_type)
+            else:
+                self.distributed_type = DistributedType(self.distributed_type)
+        if getattr(self, "dynamo_config", None) is None:
+            self.dynamo_config = {}
+@dataclass
+class ClusterConfig(BaseConfig):
+    num_processes: int = -1  # For instance if we use SLURM and the user manually passes it in
+    machine_rank: int = 0
+    num_machines: int = 1
+    gpu_ids: Optional[str] = None
+    main_process_ip: Optional[str] = None
+    main_process_port: Optional[int] = None
+    rdzv_backend: Optional[str] = "static"
+    same_network: Optional[bool] = False
+    main_training_function: str = "main"
+    enable_cpu_affinity: bool = False
+    # args for FP8 training
+    fp8_config: Optional[dict] = None
+    # args for deepspeed_plugin
+    deepspeed_config: Optional[dict] = None
+    # args for fsdp
+    fsdp_config: Optional[dict] = None
+    # args for parallelism config
+    parallelism_config: Optional[dict] = None
+    # args for megatron_lm
+    megatron_lm_config: Optional[dict] = None
+    # args for mpirun
+    mpirun_config: Optional[dict] = None
+    # args for TPU
+    downcast_bf16: bool = False
+    # args for TPU pods
+    tpu_name: Optional[str] = None
+    tpu_zone: Optional[str] = None
+    tpu_use_cluster: bool = False
+    tpu_use_sudo: bool = False
+    command_file: Optional[str] = None
+    commands: list[str] = None
+    tpu_vm: list[str] = None
+    tpu_env: list[str] = None
+    # args for dynamo
+    dynamo_config: Optional[dict] = None
+    def __post_init__(self):
+        if self.deepspeed_config is None:
+            self.deepspeed_config = {}
+        if self.fsdp_config is None:
+            self.fsdp_config = {}
+        if self.megatron_lm_config is None:
+            self.megatron_lm_config = {}
+        if self.mpirun_config is None:
+            self.mpirun_config = {}
+        if self.fp8_config is None:
+            self.fp8_config = {}
+        if self.parallelism_config is None:
+            self.parallelism_config = {}
+        return super().__post_init__()
+@dataclass
+class SageMakerConfig(BaseConfig):
+    ec2_instance_type: str
+    iam_role_name: str
+    image_uri: Optional[str] = None
+    profile: Optional[str] = None
+    region: str = "us-east-1"
+    num_machines: int = 1
+    gpu_ids: str = "all"
+    base_job_name: str = f"accelerate-sagemaker-{num_machines}"
+    pytorch_version: str = SAGEMAKER_PYTORCH_VERSION
+    transformers_version: str = SAGEMAKER_TRANSFORMERS_VERSION
+    py_version: str = SAGEMAKER_PYTHON_VERSION
+    sagemaker_inputs_file: Optional[str] = None
+    sagemaker_metrics_file: Optional[str] = None
+    additional_args: Optional[dict] = None
+    dynamo_config: Optional[dict] = None
+    enable_cpu_affinity: bool = False

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/config_utils.py ADDED Viewed

	@@ -0,0 +1,122 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from ...utils.dataclasses import (
+    ComputeEnvironment,
+    DistributedType,
+    DynamoBackend,
+    FP8BackendType,
+    PrecisionType,
+    SageMakerDistributedType,
+)
+from ..menu import BulletMenu
+DYNAMO_BACKENDS = [
+    "EAGER",
+    "AOT_EAGER",
+    "INDUCTOR",
+    "AOT_TS_NVFUSER",
+    "NVPRIMS_NVFUSER",
+    "CUDAGRAPHS",
+    "OFI",
+    "FX2TRT",
+    "ONNXRT",
+    "TENSORRT",
+    "AOT_TORCHXLA_TRACE_ONCE",
+    "TORHCHXLA_TRACE_ONCE",
+    "TVM",
+]
+def _ask_field(input_text, convert_value=None, default=None, error_message=None):
+    ask_again = True
+    while ask_again:
+        result = input(input_text)
+        try:
+            if default is not None and len(result) == 0:
+                return default
+            return convert_value(result) if convert_value is not None else result
+        except Exception:
+            if error_message is not None:
+                print(error_message)
+def _ask_options(input_text, options=[], convert_value=None, default=0):
+    menu = BulletMenu(input_text, options)
+    result = menu.run(default_choice=default)
+    return convert_value(result) if convert_value is not None else result
+def _convert_compute_environment(value):
+    value = int(value)
+    return ComputeEnvironment(["LOCAL_MACHINE", "AMAZON_SAGEMAKER"][value])
+def _convert_distributed_mode(value):
+    value = int(value)
+    return DistributedType(
+        [
+            "NO",
+            "MULTI_CPU",
+            "MULTI_XPU",
+            "MULTI_HPU",
+            "MULTI_GPU",
+            "MULTI_NPU",
+            "MULTI_MLU",
+            "MULTI_SDAA",
+            "MULTI_MUSA",
+            "MULTI_NEURON",
+            "XLA",
+        ][value]
+    )
+def _convert_dynamo_backend(value):
+    value = int(value)
+    return DynamoBackend(DYNAMO_BACKENDS[value]).value
+def _convert_mixed_precision(value):
+    value = int(value)
+    return PrecisionType(["no", "fp16", "bf16", "fp8"][value])
+def _convert_sagemaker_distributed_mode(value):
+    value = int(value)
+    return SageMakerDistributedType(["NO", "DATA_PARALLEL", "MODEL_PARALLEL"][value])
+def _convert_fp8_backend(value):
+    value = int(value)
+    return FP8BackendType(["AO", "TE", "MSAMP"][value])
+def _convert_yes_no_to_bool(value):
+    return {"yes": True, "no": False}[value.lower()]
+class SubcommandHelpFormatter(argparse.RawDescriptionHelpFormatter):
+    """
+    A custom formatter that will remove the usage line from the help message for subcommands.
+    """
+    def _format_usage(self, usage, actions, groups, prefix):
+        usage = super()._format_usage(usage, actions, groups, prefix)
+        usage = usage.replace("<command> [<args>] ", "")
+        return usage

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/default.py ADDED Viewed

	@@ -0,0 +1,172 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+import torch
+from ...utils import (
+    is_hpu_available,
+    is_mlu_available,
+    is_musa_available,
+    is_neuron_available,
+    is_npu_available,
+    is_sdaa_available,
+    is_xpu_available,
+)
+from .config_args import ClusterConfig, default_json_config_file
+from .config_utils import SubcommandHelpFormatter
+description = "Create a default config file for Accelerate with only a few flags set."
+def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file):
+    """
+    Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also
+    set CPU if it is a CPU-only machine.
+    Args:
+        mixed_precision (`str`, *optional*, defaults to "no"):
+            Mixed Precision to use. Should be one of "no", "fp16", or "bf16"
+        save_location (`str`, *optional*, defaults to `default_json_config_file`):
+            Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default
+            location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overridden by setting
+            the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`.
+    """
+    path = Path(save_location)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if path.exists():
+        print(
+            f"Configuration already exists at {save_location}, will not override. Run `accelerate config` manually or pass a different `save_location`."
+        )
+        return False
+    mixed_precision = mixed_precision.lower()
+    if mixed_precision not in ["no", "fp16", "bf16", "fp8"]:
+        raise ValueError(
+            f"`mixed_precision` should be one of 'no', 'fp16', 'bf16', or 'fp8'. Received {mixed_precision}"
+        )
+    config = {
+        "compute_environment": "LOCAL_MACHINE",
+        "mixed_precision": mixed_precision,
+    }
+    if is_mlu_available():
+        num_mlus = torch.mlu.device_count()
+        config["num_processes"] = num_mlus
+        config["use_cpu"] = False
+        if num_mlus > 1:
+            config["distributed_type"] = "MULTI_MLU"
+        else:
+            config["distributed_type"] = "NO"
+    if is_sdaa_available():
+        num_sdaas = torch.sdaa.device_count()
+        config["num_processes"] = num_sdaas
+        config["use_cpu"] = False
+        if num_sdaas > 1:
+            config["distributed_type"] = "MULTI_SDAA"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_musa_available():
+        num_musas = torch.musa.device_count()
+        config["num_processes"] = num_musas
+        config["use_cpu"] = False
+        if num_musas > 1:
+            config["distributed_type"] = "MULTI_MUSA"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_hpu_available():
+        num_hpus = torch.hpu.device_count()
+        config["num_processes"] = num_hpus
+        config["use_cpu"] = False
+        if num_hpus > 1:
+            config["distributed_type"] = "MULTI_HPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif torch.cuda.is_available():
+        num_gpus = torch.cuda.device_count()
+        config["num_processes"] = num_gpus
+        config["use_cpu"] = False
+        if num_gpus > 1:
+            config["distributed_type"] = "MULTI_GPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_xpu_available():
+        num_xpus = torch.xpu.device_count()
+        config["num_processes"] = num_xpus
+        config["use_cpu"] = False
+        if num_xpus > 1:
+            config["distributed_type"] = "MULTI_XPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_npu_available():
+        num_npus = torch.npu.device_count()
+        config["num_processes"] = num_npus
+        config["use_cpu"] = False
+        if num_npus > 1:
+            config["distributed_type"] = "MULTI_NPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_neuron_available():
+        num_neuron_cores = torch.neuron.device_count()
+        config["num_processes"] = num_neuron_cores
+        config["use_cpu"] = False
+        if num_neuron_cores > 1:
+            config["distributed_type"] = "MULTI_NEURON"
+        else:
+            config["distributed_type"] = "NO"
+    else:
+        num_xpus = 0
+        config["use_cpu"] = True
+        config["num_processes"] = 1
+        config["distributed_type"] = "NO"
+    config["debug"] = False
+    config["enable_cpu_affinity"] = False
+    config = ClusterConfig(**config)
+    config.to_json_file(path)
+    return path
+def default_command_parser(parser, parents):
+    parser = parser.add_parser("default", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
+    parser.add_argument(
+        "--config_file",
+        default=default_json_config_file,
+        help=(
+            "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
+            "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
+            "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
+            "with 'huggingface'."
+        ),
+        dest="save_location",
+    )
+    parser.add_argument(
+        "--mixed_precision",
+        choices=["no", "fp16", "bf16"],
+        type=str,
+        help="Whether or not to use mixed precision training. "
+        "Choose between FP16 and BF16 (bfloat16) training. "
+        "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.",
+        default="no",
+    )
+    parser.set_defaults(func=default_config_command)
+    return parser
+def default_config_command(args):
+    config_file = write_basic_config(args.mixed_precision, args.save_location)
+    if config_file:
+        print(f"accelerate configuration saved at {config_file}")

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/sagemaker.py ADDED Viewed

	@@ -0,0 +1,274 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+from ...utils.constants import SAGEMAKER_PARALLEL_EC2_INSTANCES, TORCH_DYNAMO_MODES
+from ...utils.dataclasses import ComputeEnvironment, SageMakerDistributedType
+from ...utils.imports import is_boto3_available
+from .config_args import SageMakerConfig
+from .config_utils import (
+    DYNAMO_BACKENDS,
+    _ask_field,
+    _ask_options,
+    _convert_dynamo_backend,
+    _convert_mixed_precision,
+    _convert_sagemaker_distributed_mode,
+    _convert_yes_no_to_bool,
+)
+if is_boto3_available():
+    import boto3  # noqa: F401
+def _create_iam_role_for_sagemaker(role_name):
+    iam_client = boto3.client("iam")
+    sagemaker_trust_policy = {
+        "Version": "2012-10-17",
+        "Statement": [
+            {"Effect": "Allow", "Principal": {"Service": "sagemaker.amazonaws.com"}, "Action": "sts:AssumeRole"}
+        ],
+    }
+    try:
+        # create the role, associated with the chosen trust policy
+        iam_client.create_role(
+            RoleName=role_name, AssumeRolePolicyDocument=json.dumps(sagemaker_trust_policy, indent=2)
+        )
+        policy_document = {
+            "Version": "2012-10-17",
+            "Statement": [
+                {
+                    "Effect": "Allow",
+                    "Action": [
+                        "sagemaker:*",
+                        "ecr:GetDownloadUrlForLayer",
+                        "ecr:BatchGetImage",
+                        "ecr:BatchCheckLayerAvailability",
+                        "ecr:GetAuthorizationToken",
+                        "cloudwatch:PutMetricData",
+                        "cloudwatch:GetMetricData",
+                        "cloudwatch:GetMetricStatistics",
+                        "cloudwatch:ListMetrics",
+                        "logs:CreateLogGroup",
+                        "logs:CreateLogStream",
+                        "logs:DescribeLogStreams",
+                        "logs:PutLogEvents",
+                        "logs:GetLogEvents",
+                        "s3:CreateBucket",
+                        "s3:ListBucket",
+                        "s3:GetBucketLocation",
+                        "s3:GetObject",
+                        "s3:PutObject",
+                    ],
+                    "Resource": "*",
+                }
+            ],
+        }
+        # attach policy to role
+        iam_client.put_role_policy(
+            RoleName=role_name,
+            PolicyName=f"{role_name}_policy_permission",
+            PolicyDocument=json.dumps(policy_document, indent=2),
+        )
+    except iam_client.exceptions.EntityAlreadyExistsException:
+        print(f"role {role_name} already exists. Using existing one")
+def _get_iam_role_arn(role_name):
+    iam_client = boto3.client("iam")
+    return iam_client.get_role(RoleName=role_name)["Role"]["Arn"]
+def get_sagemaker_input():
+    credentials_configuration = _ask_options(
+        "How do you want to authorize?",
+        ["AWS Profile", "Credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) "],
+        int,
+    )
+    aws_profile = None
+    if credentials_configuration == 0:
+        aws_profile = _ask_field("Enter your AWS Profile name: [default] ", default="default")
+        os.environ["AWS_PROFILE"] = aws_profile
+    else:
+        print(
+            "Note you will need to provide AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY when you launch you training script with,"
+            "`accelerate launch --aws_access_key_id XXX --aws_secret_access_key YYY`"
+        )
+        aws_access_key_id = _ask_field("AWS Access Key ID: ")
+        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+        aws_secret_access_key = _ask_field("AWS Secret Access Key: ")
+        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+    aws_region = _ask_field("Enter your AWS Region: [us-east-1]", default="us-east-1")
+    os.environ["AWS_DEFAULT_REGION"] = aws_region
+    role_management = _ask_options(
+        "Do you already have an IAM Role for executing Amazon SageMaker Training Jobs?",
+        ["Provide IAM Role name", "Create new IAM role using credentials"],
+        int,
+    )
+    if role_management == 0:
+        iam_role_name = _ask_field("Enter your IAM role name: ")
+    else:
+        iam_role_name = "accelerate_sagemaker_execution_role"
+        print(f'Accelerate will create an iam role "{iam_role_name}" using the provided credentials')
+        _create_iam_role_for_sagemaker(iam_role_name)
+    is_custom_docker_image = _ask_field(
+        "Do you want to use custom Docker image? [yes/NO]: ",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    docker_image = None
+    if is_custom_docker_image:
+        docker_image = _ask_field("Enter your Docker image: ", lambda x: str(x).lower())
+    is_sagemaker_inputs_enabled = _ask_field(
+        "Do you want to provide SageMaker input channels with data locations? [yes/NO]: ",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    sagemaker_inputs_file = None
+    if is_sagemaker_inputs_enabled:
+        sagemaker_inputs_file = _ask_field(
+            "Enter the path to the SageMaker inputs TSV file with columns (channel_name, data_location): ",
+            lambda x: str(x).lower(),
+        )
+    is_sagemaker_metrics_enabled = _ask_field(
+        "Do you want to enable SageMaker metrics? [yes/NO]: ",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    sagemaker_metrics_file = None
+    if is_sagemaker_metrics_enabled:
+        sagemaker_metrics_file = _ask_field(
+            "Enter the path to the SageMaker metrics TSV file with columns (metric_name, metric_regex): ",
+            lambda x: str(x).lower(),
+        )
+    distributed_type = _ask_options(
+        "What is the distributed mode?",
+        ["No distributed training", "Data parallelism"],
+        _convert_sagemaker_distributed_mode,
+    )
+    dynamo_config = {}
+    use_dynamo = _ask_field(
+        "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    if use_dynamo:
+        prefix = "dynamo_"
+        dynamo_config[prefix + "backend"] = _ask_options(
+            "Which dynamo backend would you like to use?",
+            [x.lower() for x in DYNAMO_BACKENDS],
+            _convert_dynamo_backend,
+            default=2,
+        )
+        use_custom_options = _ask_field(
+            "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_custom_options:
+            dynamo_config[prefix + "mode"] = _ask_options(
+                "Which mode do you want to use?",
+                TORCH_DYNAMO_MODES,
+                lambda x: TORCH_DYNAMO_MODES[int(x)],
+                default="default",
+            )
+            dynamo_config[prefix + "use_fullgraph"] = _ask_field(
+                "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_dynamic"] = _ask_field(
+                "Do you want to enable dynamic shape tracing? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
+                "Do you want to enable regional compilation? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+    ec2_instance_query = "Which EC2 instance type you want to use for your training?"
+    if distributed_type != SageMakerDistributedType.NO:
+        ec2_instance_type = _ask_options(
+            ec2_instance_query, SAGEMAKER_PARALLEL_EC2_INSTANCES, lambda x: SAGEMAKER_PARALLEL_EC2_INSTANCES[int(x)]
+        )
+    else:
+        ec2_instance_query += "? [ml.p3.2xlarge]:"
+        ec2_instance_type = _ask_field(ec2_instance_query, lambda x: str(x).lower(), default="ml.p3.2xlarge")
+    debug = False
+    if distributed_type != SageMakerDistributedType.NO:
+        debug = _ask_field(
+            "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    num_machines = 1
+    if distributed_type in (SageMakerDistributedType.DATA_PARALLEL, SageMakerDistributedType.MODEL_PARALLEL):
+        num_machines = _ask_field(
+            "How many machines do you want use? [1]: ",
+            int,
+            default=1,
+        )
+    mixed_precision = _ask_options(
+        "Do you wish to use FP16 or BF16 (mixed precision)?",
+        ["no", "fp16", "bf16", "fp8"],
+        _convert_mixed_precision,
+    )
+    if use_dynamo and mixed_precision == "no":
+        print(
+            "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
+        )
+    return SageMakerConfig(
+        image_uri=docker_image,
+        compute_environment=ComputeEnvironment.AMAZON_SAGEMAKER,
+        distributed_type=distributed_type,
+        use_cpu=False,
+        dynamo_config=dynamo_config,
+        ec2_instance_type=ec2_instance_type,
+        profile=aws_profile,
+        region=aws_region,
+        iam_role_name=iam_role_name,
+        mixed_precision=mixed_precision,
+        num_machines=num_machines,
+        sagemaker_inputs_file=sagemaker_inputs_file,
+        sagemaker_metrics_file=sagemaker_metrics_file,
+        debug=debug,
+    )

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/config/update.py ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/usr/bin/env python
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+from .config_args import default_config_file, load_config_from_file
+from .config_utils import SubcommandHelpFormatter
+description = "Update an existing config file with the latest defaults while maintaining the old configuration."
+def update_config(args):
+    """
+    Update an existing config file with the latest defaults while maintaining the old configuration.
+    """
+    config_file = args.config_file
+    if config_file is None and Path(default_config_file).exists():
+        config_file = default_config_file
+    elif not Path(config_file).exists():
+        raise ValueError(f"The passed config file located at {config_file} doesn't exist.")
+    config = load_config_from_file(config_file)
+    if config_file.endswith(".json"):
+        config.to_json_file(config_file)
+    else:
+        config.to_yaml_file(config_file)
+    return config_file
+def update_command_parser(parser, parents):
+    parser = parser.add_parser("update", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
+    parser.add_argument(
+        "--config_file",
+        default=None,
+        help=(
+            "The path to the config file to update. Will default to a file named default_config.yaml in the cache "
+            "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
+            "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
+            "with 'huggingface'."
+        ),
+    )
+    parser.set_defaults(func=update_config_command)
+    return parser
+def update_config_command(args):
+    config_file = update_config(args)
+    print(f"Successfully updated the configuration file at {config_file}.")

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/env.py ADDED Viewed

	@@ -0,0 +1,143 @@

+#!/usr/bin/env python
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import platform
+import subprocess
+import numpy as np
+import psutil
+import torch
+from accelerate import __version__ as version
+from accelerate.commands.config import default_config_file, load_config_from_file
+from ..utils import (
+    is_mlu_available,
+    is_musa_available,
+    is_neuron_available,
+    is_npu_available,
+    is_sdaa_available,
+    is_xpu_available,
+)
+def env_command_parser(subparsers=None):
+    if subparsers is not None:
+        parser = subparsers.add_parser("env")
+    else:
+        parser = argparse.ArgumentParser("Accelerate env command")
+    parser.add_argument(
+        "--config_file", default=None, help="The config file to use for the default values in the launching script."
+    )
+    if subparsers is not None:
+        parser.set_defaults(func=env_command)
+    return parser
+def env_command(args):
+    pt_version = torch.__version__
+    pt_cuda_available = torch.cuda.is_available()
+    pt_xpu_available = is_xpu_available()
+    pt_mlu_available = is_mlu_available()
+    pt_sdaa_available = is_sdaa_available()
+    pt_musa_available = is_musa_available()
+    pt_npu_available = is_npu_available()
+    pt_neuron_available = is_neuron_available()
+    accelerator = "N/A"
+    if pt_cuda_available:
+        accelerator = "CUDA"
+    elif pt_xpu_available:
+        accelerator = "XPU"
+    elif pt_mlu_available:
+        accelerator = "MLU"
+    elif pt_sdaa_available:
+        accelerator = "SDAA"
+    elif pt_musa_available:
+        accelerator = "MUSA"
+    elif pt_npu_available:
+        accelerator = "NPU"
+    elif pt_neuron_available:
+        accelerator = "NEURON"
+    accelerate_config = "Not found"
+    # Get the default from the config file.
+    if args.config_file is not None or os.path.isfile(default_config_file):
+        accelerate_config = load_config_from_file(args.config_file).to_dict()
+    # if we can run which, get it
+    command = None
+    bash_location = "Not found"
+    if os.name == "nt":
+        command = ["where", "accelerate"]
+    elif os.name == "posix":
+        command = ["which", "accelerate"]
+    if command is not None:
+        bash_location = subprocess.check_output(command, text=True, stderr=subprocess.STDOUT).strip()
+    info = {
+        "`Accelerate` version": version,
+        "Platform": platform.platform(),
+        "`accelerate` bash location": bash_location,
+        "Python version": platform.python_version(),
+        "Numpy version": np.__version__,
+        "PyTorch version": f"{pt_version}",
+        "PyTorch accelerator": accelerator,
+        "System RAM": f"{psutil.virtual_memory().total / 1024**3:.2f} GB",
+    }
+    if pt_cuda_available:
+        info["GPU type"] = torch.cuda.get_device_name()
+    elif pt_xpu_available:
+        info["XPU type"] = torch.xpu.get_device_name()
+    elif pt_mlu_available:
+        info["MLU type"] = torch.mlu.get_device_name()
+    elif pt_sdaa_available:
+        info["SDAA type"] = torch.sdaa.get_device_name()
+    elif pt_musa_available:
+        info["MUSA type"] = torch.musa.get_device_name()
+    elif pt_neuron_available:
+        info["NEURON type"] = torch.neuron.get_device_name()
+    elif pt_npu_available:
+        info["CANN version"] = torch.version.cann
+    print("\nCopy-and-paste the text below in your GitHub issue\n")
+    print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]))
+    print("- `Accelerate` default config:" if args.config_file is None else "- `Accelerate` config passed:")
+    accelerate_config_str = (
+        "\n".join([f"\t- {prop}: {val}" for prop, val in accelerate_config.items()])
+        if isinstance(accelerate_config, dict)
+        else f"\t{accelerate_config}"
+    )
+    print(accelerate_config_str)
+    info["`Accelerate` configs"] = accelerate_config
+    return info
+def main() -> int:
+    parser = env_command_parser()
+    args = parser.parse_args()
+    env_command(args)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/estimate.py ADDED Viewed

	@@ -0,0 +1,318 @@

+#!/usr/bin/env python
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+import torch
+from huggingface_hub import model_info
+from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
+from accelerate import init_empty_weights
+from accelerate.commands.utils import CustomArgumentParser
+from accelerate.utils import (
+    calculate_maximum_sizes,
+    convert_bytes,
+    is_timm_available,
+    is_transformers_available,
+)
+if is_transformers_available():
+    import transformers
+    from transformers import AutoConfig, AutoModel
+if is_timm_available():
+    import timm
+def verify_on_hub(repo: str, token: Optional[str] = None):
+    "Verifies that the model is on the hub and returns the model info."
+    try:
+        return model_info(repo, token=token)
+    except (OSError, GatedRepoError):
+        return "gated"
+    except RepositoryNotFoundError:
+        return "repo"
+def check_has_model(error):
+    """
+    Checks what library spawned `error` when a model is not found
+    """
+    if is_timm_available() and isinstance(error, RuntimeError) and "Unknown model" in error.args[0]:
+        return "timm"
+    elif (
+        is_transformers_available()
+        and isinstance(error, OSError)
+        and "does not appear to have a file named" in error.args[0]
+    ):
+        return "transformers"
+    else:
+        return "unknown"
+def create_empty_model(
+    model_name: str, library_name: str, trust_remote_code: bool = False, access_token: Optional[str] = None
+):
+    """
+    Creates an empty model in full precision from its parent library on the `Hub` to calculate the overall memory
+    consumption.
+    Args:
+        model_name (`str`):
+            The model name on the Hub
+        library_name (`str`):
+            The library the model has an integration with, such as `transformers`. Will be used if `model_name` has no
+            metadata on the Hub to determine the library.
+        trust_remote_code (`bool`, `optional`, defaults to `False`):
+            Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
+            should only be set to `True` for repositories you trust and in which you have read the code, as it will
+            execute code present on the Hub on your local machine.
+        access_token (`str`, `optional`, defaults to `None`):
+            The access token to use to access private or gated models on the Hub. (for use on the Gradio app)
+    Returns:
+        `torch.nn.Module`: The torch model that has been initialized on the `meta` device.
+    """
+    model_info = verify_on_hub(model_name, access_token)
+    # Simplified errors
+    if model_info == "gated":
+        raise OSError(
+            f"Repo for model `{model_name}` is gated. You must be authenticated to access it. Please run `huggingface-cli login`."
+        )
+    elif model_info == "repo":
+        raise OSError(
+            f"Repo for model `{model_name}` does not exist on the Hub. If you are trying to access a private repo,"
+            " make sure you are authenticated via `huggingface-cli login` and have access."
+        )
+    if library_name is None:
+        library_name = getattr(model_info, "library_name", False)
+        if not library_name:
+            raise ValueError(
+                f"Model `{model_name}` does not have any library metadata on the Hub, please manually pass in a `--library_name` to use (such as `transformers`)"
+            )
+    if library_name == "transformers":
+        if not is_transformers_available():
+            raise ImportError(
+                f"To check `{model_name}`, `transformers` must be installed. Please install it via `pip install transformers`"
+            )
+        print(f"Loading pretrained config for `{model_name}` from `transformers`...")
+        if model_info.config is None:
+            raise RuntimeError(f"Tried to load `{model_name}` with `transformers` but it does not have any metadata.")
+        auto_map = model_info.config.get("auto_map", False)
+        config = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code, token=access_token)
+        with init_empty_weights():
+            # remote code could specify a specific `AutoModel` class in the `auto_map`
+            constructor = AutoModel
+            if isinstance(auto_map, dict):
+                value = None
+                for key in auto_map.keys():
+                    if key.startswith("AutoModelFor"):
+                        value = key
+                        break
+                if value is not None:
+                    constructor = getattr(transformers, value)
+            # we need to pass the dtype, otherwise it is going to use the torch_dtype that is saved in the config
+            model = constructor.from_config(config, torch_dtype=torch.float32, trust_remote_code=trust_remote_code)
+    elif library_name == "timm":
+        if not is_timm_available():
+            raise ImportError(
+                f"To check `{model_name}`, `timm` must be installed. Please install it via `pip install timm`"
+            )
+        print(f"Loading pretrained config for `{model_name}` from `timm`...")
+        with init_empty_weights():
+            model = timm.create_model(model_name, pretrained=False)
+    else:
+        raise ValueError(
+            f"Library `{library_name}` is not supported yet, please open an issue on GitHub for us to add support."
+        )
+    return model
+def create_ascii_table(headers: list, rows: list, title: str):
+    "Creates a pretty table from a list of rows, minimal version of `tabulate`."
+    sep_char, in_between = "│", "─"
+    column_widths = []
+    for i in range(len(headers)):
+        column_values = [row[i] for row in rows] + [headers[i]]
+        max_column_width = max(len(value) for value in column_values)
+        column_widths.append(max_column_width)
+    formats = [f"%{column_widths[i]}s" for i in range(len(rows[0]))]
+    pattern = f"{sep_char}{sep_char.join(formats)}{sep_char}"
+    diff = 0
+    def make_row(left_char, middle_char, right_char):
+        return f"{left_char}{middle_char.join([in_between * n for n in column_widths])}{in_between * diff}{right_char}"
+    separator = make_row("├", "┼", "┤")
+    if len(title) > sum(column_widths):
+        diff = abs(len(title) - len(separator))
+        column_widths[-1] += diff
+    # Update with diff
+    separator = make_row("├", "┼", "┤")
+    initial_rows = [
+        make_row("┌", in_between, "┐"),
+        f"{sep_char}{title.center(len(separator) - 2)}{sep_char}",
+        make_row("├", "┬", "┤"),
+    ]
+    table = "\n".join(initial_rows) + "\n"
+    column_widths[-1] += diff
+    centered_line = [text.center(column_widths[i]) for i, text in enumerate(headers)]
+    table += f"{pattern % tuple(centered_line)}\n{separator}\n"
+    for i, line in enumerate(rows):
+        centered_line = [t.center(column_widths[i]) for i, t in enumerate(line)]
+        table += f"{pattern % tuple(centered_line)}\n"
+    table += f"└{'┴'.join([in_between * n for n in column_widths])}┘"
+    return table
+def estimate_command_parser(subparsers=None):
+    if subparsers is not None:
+        parser = subparsers.add_parser("estimate-memory")
+    else:
+        parser = CustomArgumentParser(
+            description="Model size estimator for fitting a model onto device(e.g. cuda, xpu) memory."
+        )
+    parser.add_argument("model_name", type=str, help="The model name on the Hugging Face Hub.")
+    parser.add_argument(
+        "--library_name",
+        type=str,
+        help="The library the model has an integration with, such as `transformers`, needed only if this information is not stored on the Hub.",
+        choices=["timm", "transformers"],
+    )
+    parser.add_argument(
+        "--dtypes",
+        type=str,
+        nargs="+",
+        default=["float32", "float16", "int8", "int4"],
+        help="The dtypes to use for the model, must be one (or many) of `float32`, `float16`, `int8`, and `int4`",
+        choices=["float32", "float16", "int8", "int4"],
+    )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="""Whether or not to allow for custom models defined on the Hub in their own modeling files. This flag
+                should only be used for repositories you trust and in which you have read the code, as it will execute
+                code present on the Hub on your local machine.""",
+        default=False,
+    )
+    if subparsers is not None:
+        parser.set_defaults(func=estimate_command)
+    return parser
+def estimate_training_usage(bytes: int, mixed_precision: str, msamp_config: Optional[str] = None) -> dict:
+    """
+    Given an amount of `bytes` and `mixed_precision`, calculates how much training memory is needed for a batch size of
+    1.
+    Args:
+        bytes (`int`):
+            The size of the model being trained.
+        mixed_precision (`str`):
+            The mixed precision that would be ran.
+        msamp_config (`str`):
+            The msamp config to estimate the training memory for if `mixed_precision` is set to `"fp8"`.
+    """
+    memory_sizes = {"model": -1, "optimizer": -1, "gradients": -1, "step": -1}
+    fp32_size = bytes
+    fp16_size = bytes // 2
+    if mixed_precision == "float32":
+        memory_sizes["model"] = fp32_size
+        memory_sizes["gradients"] = fp32_size
+        memory_sizes["optimizer"] = fp32_size * 2
+        memory_sizes["step"] = fp32_size * 4
+    elif mixed_precision in ("float16", "bfloat16") or (mixed_precision == "fp8" and msamp_config is None):
+        # With native `TransformersEngine`, there is no memory savings with FP8
+        # With mixed precision training, the model has weights stored
+        # in FP16 and FP32
+        memory_sizes["model"] = fp32_size
+        # 1.5 from weight gradient + computation (GEMM)
+        memory_sizes["gradients"] = fp32_size + fp16_size
+        # 2x from optimizer states
+        memory_sizes["optimizer"] = fp32_size * 2  # Optimizer states
+        memory_sizes["step"] = memory_sizes["optimizer"]
+    return memory_sizes
+def gather_data(args):
+    "Creates an empty model and gathers the data for the sizes"
+    try:
+        model = create_empty_model(
+            args.model_name, library_name=args.library_name, trust_remote_code=args.trust_remote_code
+        )
+    except (RuntimeError, OSError) as e:
+        library = check_has_model(e)
+        if library != "unknown":
+            raise RuntimeError(
+                f"Tried to load `{args.model_name}` with `{library}` but a possible model to load was not found inside the repo."
+            )
+        raise e
+    total_size, largest_layer = calculate_maximum_sizes(model)
+    data = []
+    for dtype in args.dtypes:
+        dtype_total_size = total_size
+        dtype_largest_layer = largest_layer[0]
+        dtype_training_size = estimate_training_usage(dtype_total_size, dtype)
+        if dtype == "float16":
+            dtype_total_size /= 2
+            dtype_largest_layer /= 2
+        elif dtype == "int8":
+            dtype_total_size /= 4
+            dtype_largest_layer /= 4
+        elif dtype == "int4":
+            dtype_total_size /= 8
+            dtype_largest_layer /= 8
+        data.append([dtype, dtype_largest_layer, dtype_total_size, dtype_training_size])
+    return data
+def estimate_command(args):
+    data = gather_data(args)
+    for row in data:
+        for i, item in enumerate(row):
+            if isinstance(item, (int, float)):
+                row[i] = convert_bytes(item)
+            elif isinstance(item, dict):
+                training_usage = max(item.values())
+                row[i] = convert_bytes(training_usage) if training_usage != -1 else "N/A"
+    headers = ["dtype", "Largest Layer", "Total Size", "Training using Adam"]
+    title = f"Memory Usage for loading `{args.model_name}`"
+    table = create_ascii_table(headers, data, title)
+    print(table)
+def main():
+    parser = estimate_command_parser()
+    args = parser.parse_args()
+    estimate_command(args)
+if __name__ == "__main__":
+    main()

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/launch.py ADDED Viewed

	@@ -0,0 +1,1415 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import importlib
+import logging
+import os
+import subprocess
+import sys
+from pathlib import Path
+import torch
+from accelerate.commands.config import default_config_file, load_config_from_file
+from accelerate.commands.config.config_args import SageMakerConfig
+from accelerate.commands.config.config_utils import DYNAMO_BACKENDS
+from accelerate.commands.utils import CustomArgumentParser
+from accelerate.state import get_int_from_env
+from accelerate.utils import (
+    ComputeEnvironment,
+    DistributedType,
+    PrepareForLaunch,
+    _filter_args,
+    check_cuda_p2p_ib_support,
+    convert_dict_to_env_variables,
+    is_bf16_available,
+    is_deepspeed_available,
+    is_hpu_available,
+    is_mlu_available,
+    is_musa_available,
+    is_neuron_available,
+    is_npu_available,
+    is_rich_available,
+    is_sagemaker_available,
+    is_sdaa_available,
+    is_torch_xla_available,
+    is_xpu_available,
+    patch_environment,
+    prepare_deepspeed_cmd_env,
+    prepare_multi_gpu_env,
+    prepare_sagemager_args_inputs,
+    prepare_simple_launcher_cmd_env,
+    prepare_tpu,
+    str_to_bool,
+)
+from accelerate.utils.constants import DEEPSPEED_MULTINODE_LAUNCHERS, TORCH_DYNAMO_MODES
+if is_rich_available():
+    from rich import get_console
+    from rich.logging import RichHandler
+    FORMAT = "%(message)s"
+    logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()])
+logger = logging.getLogger(__name__)
+options_to_group = {
+    "multi_gpu": "Distributed GPUs",
+    "tpu": "TPU",
+    "use_deepspeed": "DeepSpeed Arguments",
+    "use_fsdp": "FSDP Arguments",
+    "use_megatron_lm": "Megatron-LM Arguments",
+    "fp8_backend": "FP8 Arguments",
+}
+def clean_option(option):
+    "Finds all cases of - after the first two characters and changes them to _"
+    if "fp8_backend" in option:
+        option = "--fp8_backend"
+    if option.startswith("--"):
+        return option[2:].replace("-", "_")
+class CustomHelpFormatter(argparse.HelpFormatter):
+    """
+    This is a custom help formatter that will hide all arguments that are not used in the command line when the help is
+    called. This is useful for the case where the user is using a specific platform and only wants to see the arguments
+    for that platform.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.titles = [
+            "Hardware Selection Arguments",
+            "Resource Selection Arguments",
+            "Training Paradigm Arguments",
+            "positional arguments",
+            "optional arguments",
+        ]
+    def add_argument(self, action: argparse.Action):
+        if "accelerate" in sys.argv[0] and "launch" in sys.argv[1:]:
+            args = sys.argv[2:]
+        else:
+            args = sys.argv[1:]
+        if len(args) > 1:
+            args = list(map(clean_option, args))
+            used_platforms = [arg for arg in args if arg in options_to_group.keys()]
+            used_titles = [options_to_group[o] for o in used_platforms]
+            if action.container.title not in self.titles + used_titles:
+                action.help = argparse.SUPPRESS
+            elif action.container.title == "Hardware Selection Arguments":
+                if set(action.option_strings).isdisjoint(set(args)):
+                    action.help = argparse.SUPPRESS
+                else:
+                    action.help = action.help + " (currently selected)"
+            elif action.container.title == "Training Paradigm Arguments":
+                if set(action.option_strings).isdisjoint(set(args)):
+                    action.help = argparse.SUPPRESS
+                else:
+                    action.help = action.help + " (currently selected)"
+        action.option_strings = [s for s in action.option_strings if "-" not in s[2:]]
+        super().add_argument(action)
+    def end_section(self):
+        if len(self._current_section.items) < 2:
+            self._current_section.items = []
+            self._current_section.heading = ""
+        super().end_section()
+def launch_command_parser(subparsers=None):
+    description = "Launch a python script in a distributed scenario. Arguments can be passed in with either hyphens (`--num-processes=2`) or underscores (`--num_processes=2`)"
+    if subparsers is not None:
+        parser = subparsers.add_parser(
+            "launch", description=description, add_help=False, allow_abbrev=False, formatter_class=CustomHelpFormatter
+        )
+    else:
+        parser = CustomArgumentParser(
+            "Accelerate launch command",
+            description=description,
+            add_help=False,
+            allow_abbrev=False,
+            formatter_class=CustomHelpFormatter,
+        )
+    parser.add_argument("-h", "--help", action="help", help="Show this help message and exit.")
+    parser.add_argument(
+        "--config_file",
+        default=None,
+        help="The config file to use for the default values in the launching script.",
+    )
+    parser.add_argument(
+        "--quiet",
+        "-q",
+        action="store_true",
+        help="Silence subprocess errors from the launch stack trace and only show the relevant tracebacks. (Only applicable to DeepSpeed and single-process configurations)",
+    )
+    # Hardware selection arguments
+    hardware_args = parser.add_argument_group(
+        "Hardware Selection Arguments", "Arguments for selecting the hardware to be used."
+    )
+    hardware_args.add_argument(
+        "--cpu", default=False, action="store_true", help="Whether or not to force the training on the CPU."
+    )
+    hardware_args.add_argument(
+        "--multi_gpu",
+        default=False,
+        action="store_true",
+        help="Whether or not this should launch a distributed GPU training.",
+    )
+    hardware_args.add_argument(
+        "--tpu", default=False, action="store_true", help="Whether or not this should launch a TPU training."
+    )
+    # Resource selection arguments
+    resource_args = parser.add_argument_group(
+        "Resource Selection Arguments", "Arguments for fine-tuning how available hardware should be used."
+    )
+    resource_args.add_argument(
+        "--mixed_precision",
+        type=str,
+        choices=["no", "fp16", "bf16", "fp8"],
+        help="Whether or not to use mixed precision training. "
+        "Choose between FP16 and BF16 (bfloat16) training. "
+        "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.",
+    )
+    resource_args.add_argument(
+        "--num_processes", type=int, default=None, help="The total number of processes to be launched in parallel."
+    )
+    resource_args.add_argument(
+        "--num_machines", type=int, default=None, help="The total number of machines used in this training."
+    )
+    resource_args.add_argument(
+        "--num_cpu_threads_per_process",
+        type=int,
+        default=None,
+        help="The number of CPU threads per process. Can be tuned for optimal performance.",
+    )
+    resource_args.add_argument(
+        "--enable_cpu_affinity",
+        default=False,
+        action="store_true",
+        help="Whether or not CPU affinity and balancing should be enabled. Currently only supported on NVIDIA hardware.",
+    )
+    # Dynamo arguments
+    resource_args.add_argument(
+        "--dynamo_backend",
+        type=str,
+        choices=["no"] + [b.lower() for b in DYNAMO_BACKENDS],
+        help="Choose a backend to optimize your training with dynamo, see more at "
+        "https://github.com/pytorch/torchdynamo.",
+    )
+    resource_args.add_argument(
+        "--dynamo_mode",
+        type=str,
+        default="default",
+        choices=TORCH_DYNAMO_MODES,
+        help="Choose a mode to optimize your training with dynamo.",
+    )
+    resource_args.add_argument(
+        "--dynamo_use_fullgraph",
+        default=False,
+        action="store_true",
+        help="Whether to use full graph mode for dynamo or it is ok to break model into several subgraphs",
+    )
+    resource_args.add_argument(
+        "--dynamo_use_dynamic",
+        default=False,
+        action="store_true",
+        help="Whether to enable dynamic shape tracing.",
+    )
+    resource_args.add_argument(
+        "--dynamo_use_regional_compilation",
+        default=False,
+        action="store_true",
+        help="Whether to enable regional compilation.",
+    )
+    # Training Paradigm arguments
+    paradigm_args = parser.add_argument_group(
+        "Training Paradigm Arguments", "Arguments for selecting which training paradigm to be used."
+    )
+    paradigm_args.add_argument(
+        "--use_deepspeed",
+        default=False,
+        action="store_true",
+        help="Whether to use deepspeed.",
+    )
+    paradigm_args.add_argument(
+        "--use_fsdp",
+        default=False,
+        action="store_true",
+        help="Whether to use fsdp.",
+    )
+    paradigm_args.add_argument(
+        "--use_parallelism_config",
+        default=False,
+        action="store_true",
+        help="Whether to use the parallelism config to configure the N-d distributed training.",
+    )
+    paradigm_args.add_argument(
+        "--use_megatron_lm",
+        default=False,
+        action="store_true",
+        help="Whether to use Megatron-LM.",
+    )
+    # distributed GPU training arguments
+    distributed_args = parser.add_argument_group("Distributed GPUs", "Arguments related to distributed GPU training.")
+    distributed_args.add_argument(
+        "--gpu_ids",
+        default=None,
+        help="What GPUs (by id) should be used for training on this machine as a comma-separated list",
+    )
+    distributed_args.add_argument(
+        "--same_network",
+        default=False,
+        action="store_true",
+        help="Whether all machines used for multinode training exist on the same local network.",
+    )
+    distributed_args.add_argument(
+        "--machine_rank", type=int, default=None, help="The rank of the machine on which this script is launched."
+    )
+    distributed_args.add_argument(
+        "--main_process_ip", type=str, default=None, help="The IP address of the machine of rank 0."
+    )
+    distributed_args.add_argument(
+        "--main_process_port",
+        type=int,
+        default=None,
+        help="The port to use to communicate with the machine of rank 0.",
+    )
+    distributed_args.add_argument(
+        "-t",
+        "--tee",
+        default="0",
+        type=str,
+        help="Tee std streams into a log file and also to console.",
+    )
+    distributed_args.add_argument(
+        "--log_dir",
+        type=str,
+        default=None,
+        help=(
+            "Base directory to use for log files when using torchrun/torch.distributed.run as launcher. "
+            "Use with --tee to redirect std streams info log files."
+        ),
+    )
+    distributed_args.add_argument(
+        "--role",
+        type=str,
+        default="default",
+        help="User-defined role for the workers.",
+    )
+    # Rendezvous related arguments
+    distributed_args.add_argument(
+        "--rdzv_backend",
+        type=str,
+        default="static",
+        help="The rendezvous method to use, such as 'static' (the default) or 'c10d'",
+    )
+    distributed_args.add_argument(
+        "--rdzv_conf",
+        type=str,
+        default="",
+        help="Additional rendezvous configuration (<key1>=<value1>,<key2>=<value2>,...).",
+    )
+    distributed_args.add_argument(
+        "--max_restarts",
+        type=int,
+        default=0,
+        help="Maximum number of worker group restarts before failing.",
+    )
+    distributed_args.add_argument(
+        "--monitor_interval",
+        type=float,
+        default=0.1,
+        help="Interval, in seconds, to monitor the state of workers.",
+    )
+    parser.add_argument(
+        "-m",
+        "--module",
+        action="store_true",
+        help="Change each process to interpret the launch script as a Python module, executing with the same behavior as 'python -m'.",
+    )
+    parser.add_argument(
+        "--no_python",
+        action="store_true",
+        help="Skip prepending the training script with 'python' - just execute it directly. Useful when the script is not a Python script.",
+    )
+    # TPU arguments
+    tpu_args = parser.add_argument_group("TPU", "Arguments related to TPU.")
+    tpu_args.add_argument(
+        "--tpu_cluster",
+        action="store_true",
+        dest="tpu_use_cluster",
+        help="Whether to use a GCP TPU pod for training.",
+    )
+    tpu_args.add_argument(
+        "--no_tpu_cluster",
+        action="store_false",
+        dest="tpu_use_cluster",
+        help="Should not be passed explicitly, this is for internal use only.",
+    )
+    tpu_args.add_argument(
+        "--tpu_use_sudo",
+        action="store_true",
+        help="Whether to use `sudo` when running the TPU training script in each pod.",
+    )
+    tpu_args.add_argument(
+        "--vm",
+        type=str,
+        action="append",
+        help=(
+            "List of single Compute VM instance names. "
+            "If not provided we assume usage of instance groups. For TPU pods."
+        ),
+    )
+    tpu_args.add_argument(
+        "--env",
+        type=str,
+        action="append",
+        help="List of environment variables to set on the Compute VM instances. For TPU pods.",
+    )
+    tpu_args.add_argument(
+        "--main_training_function",
+        type=str,
+        default=None,
+        help="The name of the main function to be executed in your script (only for TPU training).",
+    )
+    tpu_args.add_argument(
+        "--downcast_bf16",
+        action="store_true",
+        help="Whether when using bf16 precision on TPUs if both float and double tensors are cast to bfloat16 or if double tensors remain as float32.",
+    )
+    # DeepSpeed arguments
+    deepspeed_args = parser.add_argument_group("DeepSpeed Arguments", "Arguments related to DeepSpeed.")
+    deepspeed_args.add_argument(
+        "--deepspeed_config_file",
+        default=None,
+        type=str,
+        help="DeepSpeed config file.",
+    )
+    deepspeed_args.add_argument(
+        "--zero_stage",
+        default=None,
+        type=int,
+        help="DeepSpeed's ZeRO optimization stage (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to `2`.",
+    )
+    deepspeed_args.add_argument(
+        "--offload_optimizer_device",
+        default=None,
+        type=str,
+        help="Decides where (none|cpu|nvme) to offload optimizer states (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to 'none'.",
+    )
+    deepspeed_args.add_argument(
+        "--offload_param_device",
+        default=None,
+        type=str,
+        help="Decides where (none|cpu|nvme) to offload parameters (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to 'none'.",
+    )
+    deepspeed_args.add_argument(
+        "--offload_optimizer_nvme_path",
+        default=None,
+        type=str,
+        help="Decides Nvme Path to offload optimizer states (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to 'none'.",
+    )
+    deepspeed_args.add_argument(
+        "--offload_param_nvme_path",
+        default=None,
+        type=str,
+        help="Decides Nvme Path to offload parameters (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to 'none'.",
+    )
+    deepspeed_args.add_argument(
+        "--gradient_accumulation_steps",
+        default=None,
+        type=int,
+        help="No of gradient_accumulation_steps used in your training script (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to `1`.",
+    )
+    deepspeed_args.add_argument(
+        "--gradient_clipping",
+        default=None,
+        type=float,
+        help="gradient clipping value used in your training script (useful only when `use_deepspeed` flag is passed). "
+        "If unspecified, will default to `1.0`.",
+    )
+    deepspeed_args.add_argument(
+        "--zero3_init_flag",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to enable `deepspeed.zero.Init` for constructing massive models. "
+        "Only applicable with DeepSpeed ZeRO Stage-3. If unspecified, will default to `true`.",
+    )
+    deepspeed_args.add_argument(
+        "--zero3_save_16bit_model",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to save 16-bit model weights when using ZeRO Stage-3. "
+        "Only applicable with DeepSpeed ZeRO Stage-3. If unspecified, will default to `false`.",
+    )
+    deepspeed_args.add_argument(
+        "--deepspeed_hostfile",
+        default=None,
+        type=str,
+        help="DeepSpeed hostfile for configuring multi-node compute resources.",
+    )
+    deepspeed_args.add_argument(
+        "--deepspeed_exclusion_filter",
+        default=None,
+        type=str,
+        help="DeepSpeed exclusion filter string when using multi-node setup.",
+    )
+    deepspeed_args.add_argument(
+        "--deepspeed_inclusion_filter",
+        default=None,
+        type=str,
+        help="DeepSpeed inclusion filter string when using multi-node setup.",
+    )
+    deepspeed_args.add_argument(
+        "--deepspeed_multinode_launcher",
+        default=None,
+        type=str,
+        help="DeepSpeed multi-node launcher to use, e.g. `pdsh`, `standard`, `openmpi`, `mvapich`, `mpich`, `slurm`, `nossh` (requires DeepSpeed >= 0.14.5). If unspecified, will default to `pdsh`.",
+    )
+    deepspeed_args.add_argument(
+        "--deepspeed_moe_layer_cls_names",
+        default=None,
+        type=str,
+        help="comma-separated list of transformer MoE layer class names (case-sensitive) to wrap ,e.g, `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ..."
+        " (useful only when `use_deepspeed` flag is passed).",
+    )
+    # fsdp arguments
+    fsdp_args = parser.add_argument_group("FSDP Arguments", "Arguments related to Fully Shared Data Parallelism.")
+    fsdp_args.add_argument(
+        "--fsdp_version",
+        type=str,
+        default="1",
+        choices=["1", "2"],
+        help="FSDP version to use. (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_offload_params",
+        default="false",
+        type=str,
+        help="Decides Whether (true|false) to offload parameters and gradients to CPU. (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_min_num_params",
+        type=int,
+        default=int(1e8),
+        help="FSDP's minimum number of parameters for Default Auto Wrapping. (useful only when `use_fsdp` flag is passed).",
+    )
+    # We enable this for backwards compatibility, throw a warning if this is set in `FullyShardedDataParallelPlugin`
+    fsdp_args.add_argument(
+        "--fsdp_sharding_strategy",
+        type=str,
+        default="FULL_SHARD",
+        help="FSDP's sharding strategy. (useful only when `use_fsdp` flag is passed and `fsdp_version=1`).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_reshard_after_forward",
+        type=str,
+        default="true",
+        help="FSDP's Reshard After Forward Strategy. (useful only when `use_fsdp` flag is passed). Supports either boolean (FSDP2) or `FULL_SHARD | SHARD_GRAD_OP | NO_RESHARD` (FSDP1).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_auto_wrap_policy",
+        type=str,
+        default=None,
+        help="FSDP's auto wrap policy. (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_transformer_layer_cls_to_wrap",
+        default=None,
+        type=str,
+        help="Transformer layer class name (case-sensitive) to wrap ,e.g, `BertLayer`, `GPTJBlock`, `T5Block` .... "
+        "(useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_backward_prefetch",
+        default=None,
+        type=str,
+        help="FSDP's backward prefetch policy. (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_state_dict_type",
+        default=None,
+        type=str,
+        help="FSDP's state dict type. (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_forward_prefetch",
+        default="false",
+        type=str,
+        help="If True, then FSDP explicitly prefetches the next upcoming "
+        "all-gather while executing in the forward pass (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_use_orig_params",
+        default="true",
+        type=str,
+        help="If True, allows non-uniform `requires_grad` during init, which means support for interspersed frozen and trainable parameters."
+        " (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_cpu_ram_efficient_loading",
+        default="true",
+        type=str,
+        help="If True, only the first process loads the pretrained model checkoint while all other processes have empty weights. "
+        "Only applicable for 🤗 Transformers. When using this, `--fsdp_sync_module_states` needs to True. "
+        "(useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_sync_module_states",
+        default="true",
+        type=str,
+        help="If True, each individually wrapped FSDP unit will broadcast module parameters from rank 0."
+        " (useful only when `use_fsdp` flag is passed).",
+    )
+    fsdp_args.add_argument(
+        "--fsdp_activation_checkpointing",
+        default="false",
+        type=str,
+        help="Decides Whether (true|false) intermediate activations are freed during the forward pass, and a checkpoint is left as a placeholder. (useful only when `use_fsdp` flag is passed).",
+    )
+    # megatron_lm args
+    megatron_lm_args = parser.add_argument_group("Megatron-LM Arguments", "Arguments related to Megatron-LM.")
+    megatron_lm_args.add_argument(
+        "--megatron_lm_tp_degree",
+        type=int,
+        default=1,
+        help="Megatron-LM's Tensor Parallelism (TP) degree. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_use_custom_fsdp",
+        type=bool,
+        default=False,
+        help="Whether to use custom FSDP. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_no_load_optim",
+        type=bool,
+        default=False,
+        help="Whether to not load optimizer. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_eod_mask_loss",
+        type=bool,
+        default=False,
+        help="Whether to use eod mask loss. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_overlap_cpu_optimizer_d2h_h2d",
+        type=bool,
+        default=False,
+        help="Whether to overlap CPU optimizer step, gradients D2H and updated parameters H2D. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_no_save_optim",
+        type=bool,
+        default=False,
+        help="Whether to not save optimizer. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_optimizer_cpu_offload",
+        type=bool,
+        default=False,
+        help="Whether to use CPU offload for optimizer. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_use_precision_aware_optimizer",
+        type=bool,
+        default=False,
+        help="Whether to use precision aware optimizer. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_decoder_last_pipeline_num_layers",
+        type=int,
+        default=None,
+        help="Megatron-LM's decoder last pipeline number of layers, default None is even split of transformer layers across all pipeline stages.",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_pp_degree",
+        type=int,
+        default=1,
+        help="Megatron-LM's Pipeline Parallelism (PP) degree. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_num_micro_batches",
+        type=int,
+        default=None,
+        help="Megatron-LM's number of micro batches when PP degree > 1. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_sequence_parallelism",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to enable Sequence Parallelism when TP degree > 1. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_recompute_activations",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to enable Selective Activation Recomputation. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_use_distributed_optimizer",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to use distributed optimizer "
+        "which shards optimizer state and gradients across Data Pralellel (DP) ranks. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_gradient_clipping",
+        default=1.0,
+        type=float,
+        help="Megatron-LM's gradient clipping value based on global L2 Norm (0 to disable). "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_recompute_granularity",
+        default=None,
+        type=str,
+        help="Megatron-LM's recompute granularity (full, selective). "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_recompute_method",
+        default=None,
+        type=str,
+        help="Megatron-LM's recompute method (uniform, block). (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_recompute_num_layers",
+        default=None,
+        type=int,
+        help="Megatron-LM's number of layers to recompute. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_attention_backend",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to enable attention backend. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_expert_model_parallel_size",
+        default=None,
+        type=int,
+        help="Megatron-LM's expert model parallel size. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_context_parallel_size",
+        default=None,
+        type=int,
+        help="Megatron-LM's context parallel size. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_attention_dropout",
+        default=None,
+        type=float,
+        help="Megatron-LM's attention dropout rate. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_hidden_dropout",
+        default=None,
+        type=float,
+        help="Megatron-LM's hidden dropout rate. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_attention_softmax_in_fp32",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to use fp32 for attention softmax. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_expert_tensor_parallel_size",
+        default=None,
+        type=int,
+        help="Megatron-LM's expert tensor parallel size. (useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_calculate_per_token_loss",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to calculate per token loss. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    megatron_lm_args.add_argument(
+        "--megatron_lm_use_rotary_position_embeddings",
+        default=None,
+        type=str,
+        help="Decides Whether (true|false) to use rotary position embeddings. "
+        "(useful only when `use_megatron_lm` flag is passed).",
+    )
+    # FP8 arguments
+    fp8_args = parser.add_argument_group(
+        "FP8 Arguments", "Arguments related to FP8 training (requires `--mixed_precision=fp8`)"
+    )
+    fp8_args.add_argument(
+        "--fp8_backend",
+        type=str,
+        choices=["ao", "te", "msamp"],
+        help="Choose a backend to train with FP8 (ao: torchao, te: TransformerEngine, msamp: MS-AMP)",
+    )
+    fp8_args.add_argument(
+        "--fp8_use_autocast_during_eval",
+        default=False,
+        action="store_true",
+        help="Whether to use FP8 autocast during eval mode (useful only when `--fp8_backend=te` is passed). Generally better metrics are found when this is not passed.",
+    )
+    fp8_args.add_argument(
+        "--fp8_margin",
+        type=int,
+        default=0,
+        help="The margin to use for the gradient scaling (useful only when `--fp8_backend=te` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_interval",
+        type=int,
+        default=1,
+        help="The interval to use for how often the scaling factor is recomputed (useful only when `--fp8_backend=te` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_format",
+        type=str,
+        default="HYBRID",
+        choices=["HYBRID", "E4M3", "E5M2"],
+        help="The format to use for the FP8 recipe (useful only when `--fp8_backend=te` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_amax_history_len",
+        type=int,
+        default=1024,
+        help="The length of the history to use for the scaling factor computation (useful only when `--fp8_backend=te` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_amax_compute_algo",
+        type=str,
+        default="most_recent",
+        choices=["max", "most_recent"],
+        help="The algorithm to use for the scaling factor computation. (useful only when `--fp8_backend=te` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_override_linear_precision",
+        type=lambda x: tuple(map(str_to_bool, x.split(","))),
+        default=(False, False, False),
+        help="Whether or not to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision. Should be passed in a comma-separated string of booleans (useful only when `--fp8_backend=te` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_opt_level",
+        type=str,
+        default="O2",
+        choices=["O1", "O2"],
+        help="What level of 8-bit collective communication should be used with MS-AMP (useful only when `--fp8_backend=msamp` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_enable_fsdp_float8_all_gather",
+        default="true",
+        type=str_to_bool,
+        help="Whether to enable FSDP2 float8 all gather (useful only when `--fp8_backend=ao` is passed).",
+    )
+    fp8_args.add_argument(
+        "--fp8_pad_inner_dim",
+        default="true",
+        type=str_to_bool,
+        help="Whether to pad the inner dimension for FP8 GEMMs (useful only when `--fp8_backend=ao` is passed).",
+    )
+    # AWS arguments
+    aws_args = parser.add_argument_group("AWS Arguments", "Arguments related to AWS.")
+    aws_args.add_argument(
+        "--aws_access_key_id",
+        type=str,
+        default=None,
+        help="The AWS_ACCESS_KEY_ID used to launch the Amazon SageMaker training job",
+    )
+    aws_args.add_argument(
+        "--aws_secret_access_key",
+        type=str,
+        default=None,
+        help="The AWS_SECRET_ACCESS_KEY used to launch the Amazon SageMaker training job.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Whether to print out the torch.distributed stack trace when something fails.",
+    )
+    parser.add_argument(
+        "training_script",
+        type=str,
+        help=(
+            "The full path to the script to be launched in parallel, followed by all the arguments for the training "
+            "script."
+        ),
+    )
+    # MPI arguments
+    mpirun_args = parser.add_argument_group("MPI Arguments", "Arguments related to mpirun for Multi-CPU")
+    mpirun_args.add_argument(
+        "--mpirun_hostfile",
+        type=str,
+        default=None,
+        help="Location for a hostfile for using Accelerate to launch a multi-CPU training job with mpirun. This will "
+        "get passed to the MPI --hostfile or -f parameter, depending on which MPI program is installed.",
+    )
+    # ParallelismConfig arguments
+    parallelism_config_args = parser.add_argument_group(
+        "ParallelismConfig Arguments",
+        "Arguments related to the ParallelismConfig used for distributed training.",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_dp_replicate_size",
+        type=int,
+        default=1,
+        help="The number of processes for data parallel training. Defaults to 1 (no data parallelism).",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_dp_shard_size",
+        type=int,
+        default=1,
+        help="The number of processes for FSDP sharding. Defaults to 1 (No FSDP sharding).",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_tp_size",
+        type=int,
+        default=1,
+        help="The number of processes for tensor parallel training. Defaults to 1 (no tensor parallelism).",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_cp_size",
+        type=int,
+        default=1,
+        help="The number of processese for context parallel training. Defaults to 1 (no context parallelism).",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_cp_backend",
+        type=str,
+        choices=["torch"],
+        default="torch",
+        help="Context Parallelism backend: torch (FSDP2) or deepspeed (ALST/Ulysses)",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_cp_comm_strategy",
+        type=str,
+        default="allgather",
+        help="The communication strategy for context parallel training. Defaults to 'allgather'. Other option is alltoall",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_sp_size",
+        type=int,
+        default=1,
+        help="The number of processese for context parallel training. Defaults to 1 (no context parallelism).",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_sp_backend",
+        type=str,
+        choices=["deepspeed"],
+        default="deepspeed",
+        help="Sequence Parallelism backend: deepspeed (ALST/Ulysses)",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_sp_seq_length",
+        type=str,
+        default=None,
+        help="Sequence length for when batches are all of the same length. For variable sequence lengths across batches set `parallelism_config_sp_seq_length_is_variable=True`",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_sp_seq_length_is_variable",
+        type=bool,
+        default=True,
+        help="If `True` will work with a sequence length that may change between batches, in which case `parallelism_config_sp_seq_length` value can be set to anything divisible by sp size or remain unset. If `False` then `parallelism_config_sp_seq_length` needs to match the batch's sequence length dimension. The default is `True`.",
+    )
+    parallelism_config_args.add_argument(
+        "--parallelism_config_sp_attn_implementation",
+        type=str,
+        default="sdpa",
+        help="Attention implementation to use. Can be one of 'flash_attention_2', 'flash_attention_3' or 'sdpa'. Defaults to `sdpa`.",
+    )
+    # Other arguments of the training scripts
+    parser.add_argument("training_script_args", nargs=argparse.REMAINDER, help="Arguments of the training script.")
+    if subparsers is not None:
+        parser.set_defaults(func=launch_command)
+    return parser
+def simple_launcher(args):
+    cmd, current_env = prepare_simple_launcher_cmd_env(args)
+    process = subprocess.Popen(cmd, env=current_env)
+    process.wait()
+    if process.returncode != 0:
+        if not args.quiet:
+            raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
+        else:
+            sys.exit(1)
+def multi_gpu_launcher(args):
+    import torch.distributed.run as distrib_run
+    current_env = prepare_multi_gpu_env(args)
+    if not check_cuda_p2p_ib_support():
+        message = "Using RTX 4000 series which doesn't support faster communication speedups. Ensuring P2P and IB communications are disabled."
+        warn = False
+        if "NCCL_P2P_DISABLE" not in current_env:
+            current_env["NCCL_P2P_DISABLE"] = "1"
+            warn = True
+        if "NCCL_IB_DISABLE" not in current_env:
+            current_env["NCCL_IB_DISABLE"] = "1"
+            warn = True
+        if warn:
+            logger.warning(message)
+    debug = getattr(args, "debug", False)
+    args = _filter_args(
+        args,
+        distrib_run.get_args_parser(),
+        ["--training_script", args.training_script, "--training_script_args", args.training_script_args],
+    )
+    with patch_environment(**current_env):
+        try:
+            distrib_run.run(args)
+        except Exception:
+            if is_rich_available() and debug:
+                console = get_console()
+                console.print("\n[bold red]Using --debug, `torch.distributed` Stack Trace:[/bold red]")
+                console.print_exception(suppress=[__file__], show_locals=False)
+            else:
+                raise
+def deepspeed_launcher(args):
+    import torch.distributed.run as distrib_run
+    if not is_deepspeed_available():
+        raise ImportError("DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source.")
+    else:
+        from deepspeed.launcher.runner import DEEPSPEED_ENVIRONMENT_NAME
+    cmd, current_env = prepare_deepspeed_cmd_env(args)
+    if not check_cuda_p2p_ib_support():
+        message = "Using RTX 4000 series which doesn't support faster communication speedups. Ensuring P2P and IB communications are disabled."
+        warn = False
+        if "NCCL_P2P_DISABLE" not in current_env:
+            current_env["NCCL_P2P_DISABLE"] = "1"
+            warn = True
+        if "NCCL_IB_DISABLE" not in current_env:
+            current_env["NCCL_IB_DISABLE"] = "1"
+            warn = True
+        if warn:
+            logger.warning(message)
+    if args.num_machines > 1 and args.deepspeed_multinode_launcher != DEEPSPEED_MULTINODE_LAUNCHERS[1]:
+        with open(DEEPSPEED_ENVIRONMENT_NAME, "a") as f:
+            valid_env_items = convert_dict_to_env_variables(current_env)
+            if len(valid_env_items) > 1:
+                f.writelines(valid_env_items)
+        process = subprocess.Popen(cmd, env=current_env)
+        process.wait()
+        if process.returncode != 0:
+            if not args.quiet:
+                raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
+            else:
+                sys.exit(1)
+    else:
+        debug = getattr(args, "debug", False)
+        args = _filter_args(
+            args,
+            distrib_run.get_args_parser(),
+            ["--training_script", args.training_script, "--training_script_args", args.training_script_args],
+        )
+        with patch_environment(**current_env):
+            try:
+                distrib_run.run(args)
+            except Exception:
+                if is_rich_available() and debug:
+                    console = get_console()
+                    console.print("\n[bold red]Using --debug, `torch.distributed` Stack Trace:[/bold red]")
+                    console.print_exception(suppress=[__file__], show_locals=False)
+                else:
+                    raise
+def tpu_launcher(args):
+    import torch_xla.distributed.xla_multiprocessing as xmp
+    if args.no_python:
+        raise ValueError("--no_python cannot be used with TPU launcher")
+    args, current_env = prepare_tpu(args, {})
+    if args.module:
+        mod_name = args.training_script
+    else:
+        # Import training_script as a module
+        script_path = Path(args.training_script)
+        sys.path.append(str(script_path.parent.resolve()))
+        mod_name = script_path.stem
+    mod = importlib.import_module(mod_name)
+    if not hasattr(mod, args.main_training_function):
+        raise ValueError(
+            f"Your training script should have a function named {args.main_training_function}, or you should pass a "
+            "different value to `--main_training_function`."
+        )
+    # Patch sys.argv
+    sys.argv = [mod.__file__] + args.training_script_args
+    main_function = getattr(mod, args.main_training_function)
+    with patch_environment(**current_env):
+        xmp.spawn(PrepareForLaunch(main_function), args=())
+def tpu_pod_launcher(args):
+    from torch_xla.distributed import xla_dist
+    current_env = {}
+    args, current_env = prepare_tpu(args, current_env, True)
+    debug = getattr(args, "debug", False)
+    training_script = args.training_script
+    training_script_args = args.training_script_args
+    new_args = _filter_args(
+        args, xla_dist.get_args_parser(), ["--tpu", args.tpu_name, "--positional", "", "--restart-tpuvm-pod-server"]
+    )
+    if args.tpu_use_sudo:
+        new_cmd = ["sudo"]
+    else:
+        new_cmd = []
+    new_cmd += [
+        "accelerate-launch",
+        "--tpu",
+        "--no_tpu_cluster",
+        "--num_machines",
+        "1",
+        "--mixed_precision",
+        "no",
+        "--dynamo_backend",
+        "no",
+        "--num_processes",
+        str(args.num_processes),
+        "--main_training_function",
+        str(args.main_training_function),
+        training_script,
+    ] + training_script_args
+    new_args.positional = new_cmd
+    bad_flags = ""
+    for arg in vars(new_args):
+        if arg.startswith("docker_"):
+            value = getattr(new_args, arg)
+            if value != "" and value is not None:
+                bad_flags += f'{arg}="{value}"\n'
+    if bad_flags != "":
+        raise ValueError(
+            f"Docker containers are not supported for TPU pod launcher currently, please remove the following flags:\n{bad_flags}"
+        )
+    new_args.env = [f"{k}={v}" for k, v in current_env.items()]
+    new_args.env.append("ACCELERATE_IN_TPU_POD=1")
+    try:
+        xla_dist.resolve_and_execute(new_args)
+    except Exception:
+        if is_rich_available() and debug:
+            console = get_console()
+            console.print("\n[bold red]Using --debug, `torch_xla.xla_dist` Stack Trace:[/bold red]")
+            console.print_exception(suppress=[__file__], show_locals=False)
+        else:
+            raise
+def sagemaker_launcher(sagemaker_config: SageMakerConfig, args):
+    if not is_sagemaker_available():
+        raise ImportError(
+            "Please install sagemaker to be able to launch training on Amazon SageMaker with `pip install accelerate[sagemaker]`"
+        )
+    if args.module or args.no_python:
+        raise ValueError(
+            "SageMaker requires a python training script file and cannot be used with --module or --no_python"
+        )
+    from sagemaker.huggingface import HuggingFace
+    args, sagemaker_inputs = prepare_sagemager_args_inputs(sagemaker_config, args)
+    huggingface_estimator = HuggingFace(**args)
+    huggingface_estimator.fit(inputs=sagemaker_inputs)
+    print(f"You can find your model data at: {huggingface_estimator.model_data}")
+def _validate_launch_command(args):
+    # Sanity checks
+    if sum([args.multi_gpu, args.cpu, args.tpu, args.use_deepspeed, args.use_fsdp]) > 1:
+        raise ValueError(
+            "You can only use one of `--cpu`, `--multi_gpu`, `--tpu`, `--use_deepspeed`, `--use_fsdp` at a time."
+        )
+    if args.multi_gpu and (args.num_processes is not None) and (args.num_processes < 2):
+        raise ValueError("You need to use at least 2 processes to use `--multi_gpu`.")
+    if (not args.use_fsdp or args.fsdp_version == 1) and args.use_parallelism_config:
+        raise ValueError("You cannot use `--use_parallelism_config` without `--use_fsdp` and `--fsdp_version=2`. ")
+    defaults = None
+    warned = []
+    mp_from_config_flag = False
+    # Get the default from the config file.
+    if args.config_file is not None or os.path.isfile(default_config_file) and not args.cpu:
+        defaults = load_config_from_file(args.config_file)
+        if (
+            not args.multi_gpu
+            and not args.tpu
+            and not args.tpu_use_cluster
+            and not args.use_deepspeed
+            and not args.use_fsdp
+            and not args.use_megatron_lm
+        ):
+            args.use_deepspeed = defaults.distributed_type == DistributedType.DEEPSPEED
+            args.multi_gpu = (
+                True
+                if defaults.distributed_type
+                in (
+                    DistributedType.MULTI_GPU,
+                    DistributedType.MULTI_NPU,
+                    DistributedType.MULTI_MLU,
+                    DistributedType.MULTI_SDAA,
+                    DistributedType.MULTI_MUSA,
+                    DistributedType.MULTI_XPU,
+                    DistributedType.MULTI_HPU,
+                    DistributedType.MULTI_NEURON,
+                )
+                else False
+            )
+            args.tpu = defaults.distributed_type == DistributedType.XLA
+            args.use_fsdp = defaults.distributed_type == DistributedType.FSDP
+            args.use_megatron_lm = defaults.distributed_type == DistributedType.MEGATRON_LM
+            args.tpu_use_cluster = defaults.tpu_use_cluster if args.tpu else False
+            args.use_parallelism_config = defaults.parallelism_config != {}
+        if args.gpu_ids is None:
+            if defaults.gpu_ids is not None:
+                args.gpu_ids = defaults.gpu_ids
+            else:
+                args.gpu_ids = "all"
+        if args.multi_gpu and args.num_machines is None:
+            args.num_machines = defaults.num_machines
+        if len(args.gpu_ids.split(",")) < 2 and (args.gpu_ids != "all") and args.multi_gpu and args.num_machines <= 1:
+            raise ValueError(
+                "Less than two GPU ids were configured and tried to run on on multiple GPUs. "
+                "Please ensure at least two are specified for `--gpu_ids`, or use `--gpu_ids='all'`."
+            )
+        if defaults.compute_environment == ComputeEnvironment.LOCAL_MACHINE:
+            # Update args with the defaults
+            for name, attr in defaults.__dict__.items():
+                if isinstance(attr, dict):
+                    # Copy defaults.somedict.somearg to args.somearg and
+                    # defaults.fsdp_config.x to args.fsdp_x
+                    for key, value in attr.items():
+                        if name == "fsdp_config" and not key.startswith("fsdp"):
+                            key = "fsdp_" + key
+                        elif name == "fp8_config" and not key.startswith("fp8"):
+                            key = "fp8_" + key
+                        if hasattr(args, "nondefault") and key not in args.nondefault:
+                            setattr(args, key, value)
+                elif (
+                    name not in ["compute_environment", "mixed_precision", "distributed_type"]
+                    and getattr(args, name, None) is None
+                ):
+                    # Those args are handled separately
+                    setattr(args, name, attr)
+        if not args.debug:
+            args.debug = defaults.debug
+        if not args.mixed_precision:
+            if defaults.mixed_precision is None:
+                args.mixed_precision = "no"
+            else:
+                args.mixed_precision = defaults.mixed_precision
+                mp_from_config_flag = True
+        else:
+            native_amp = is_bf16_available(True)
+            if (
+                args.mixed_precision == "bf16"
+                and not native_amp
+                and not (args.tpu and is_torch_xla_available(check_is_tpu=True))
+            ):
+                raise ValueError("bf16 mixed precision requires PyTorch >= 1.10 and a supported device.")
+        # Silently set the default here
+        if args.dynamo_backend is None:
+            args.dynamo_backend = "no"
+        if args.num_processes == -1:
+            raise ValueError("You need to manually pass in `--num_processes` using this config yaml.")
+    else:
+        if args.num_processes is None:
+            if is_xpu_available():
+                args.num_processes = torch.xpu.device_count()
+            elif is_mlu_available():
+                args.num_processes = torch.mlu.device_count()
+            elif is_sdaa_available():
+                args.num_processes = torch.sdaa.device_count()
+            elif is_musa_available():
+                args.num_processes = torch.musa.device_count()
+            elif is_npu_available():
+                args.num_processes = torch.npu.device_count()
+            elif is_hpu_available():
+                args.num_processes = torch.hpu.device_count()
+            elif is_neuron_available():
+                args.num_processes = torch.neuron.device_count()
+            else:
+                args.num_processes = torch.cuda.device_count()
+            warned.append(f"\t`--num_processes` was set to a value of `{args.num_processes}`")
+        if args.debug is None:
+            args.debug = False
+        if (
+            not args.multi_gpu
+            and args.num_processes > 1
+            and (
+                (is_xpu_available() and torch.xpu.device_count() > 1)
+                or (is_npu_available() and torch.npu.device_count() > 1)
+                or (is_hpu_available() and torch.hpu.device_count() > 1)
+                or (is_mlu_available() and torch.mlu.device_count() > 1)
+                or (is_sdaa_available() and torch.sdaa.device_count() > 1)
+                or (is_musa_available() and torch.musa.device_count() > 1)
+                or (is_neuron_available() and torch.neuron.device_count() > 1)
+                or (torch.cuda.is_available() and torch.cuda.device_count() > 1)
+            )
+        ):
+            warned.append(
+                "\t\tMore than one GPU was found, enabling multi-GPU training.\n"
+                "\t\tIf this was unintended please pass in `--num_processes=1`."
+            )
+            args.multi_gpu = True
+        if args.num_machines is None:
+            warned.append("\t`--num_machines` was set to a value of `1`")
+            args.num_machines = 1
+        if args.mixed_precision is None:
+            warned.append("\t`--mixed_precision` was set to a value of `'no'`")
+            args.mixed_precision = "no"
+        if not hasattr(args, "use_cpu"):
+            args.use_cpu = args.cpu
+        if args.dynamo_backend is None:
+            warned.append("\t`--dynamo_backend` was set to a value of `'no'`")
+            args.dynamo_backend = "no"
+    if args.debug:
+        logger.debug("Running script in debug mode, expect distributed operations to be slightly slower.")
+    is_aws_env_disabled = defaults is None or (
+        defaults is not None and defaults.compute_environment != ComputeEnvironment.AMAZON_SAGEMAKER
+    )
+    if is_aws_env_disabled and args.num_cpu_threads_per_process is None:
+        args.num_cpu_threads_per_process = get_int_from_env(["OMP_NUM_THREADS"], 1)
+        if args.use_cpu and args.num_processes >= 1 and get_int_from_env(["OMP_NUM_THREADS"], 0) == 0:
+            local_size = get_int_from_env(
+                ["MPI_LOCALNRANKS", "OMPI_COMM_WORLD_LOCAL_SIZE", "MV2_COMM_WORLD_LOCAL_SIZE"],
+                max(int(args.num_processes / args.num_machines), 1),
+            )
+            import psutil
+            threads_per_process = int(psutil.cpu_count(logical=False) / local_size)
+            if threads_per_process > 1:
+                args.num_cpu_threads_per_process = threads_per_process
+                warned.append(
+                    f"\t`--num_cpu_threads_per_process` was set to `{args.num_cpu_threads_per_process}` to improve out-of-box performance when training on CPUs"
+                )
+    if any(warned):
+        message = "The following values were not passed to `accelerate launch` and had defaults used instead:\n"
+        message += "\n".join(warned)
+        message += (
+            "\nTo avoid this warning pass in values for each of the problematic parameters or run `accelerate config`."
+        )
+        logger.warning(message)
+    return args, defaults, mp_from_config_flag
+def launch_command(args):
+    args, defaults, mp_from_config_flag = _validate_launch_command(args)
+    # Use the proper launcher
+    if args.use_deepspeed and not args.cpu:
+        args.deepspeed_fields_from_accelerate_config = list(defaults.deepspeed_config.keys()) if defaults else []
+        if mp_from_config_flag:
+            args.deepspeed_fields_from_accelerate_config.append("mixed_precision")
+        args.deepspeed_fields_from_accelerate_config = ",".join(args.deepspeed_fields_from_accelerate_config)
+        deepspeed_launcher(args)
+    elif args.use_fsdp and not args.cpu:
+        multi_gpu_launcher(args)
+    elif args.use_megatron_lm and not args.cpu:
+        multi_gpu_launcher(args)
+    elif args.multi_gpu and not args.cpu:
+        multi_gpu_launcher(args)
+    elif args.tpu and not args.cpu:
+        if args.tpu_use_cluster:
+            tpu_pod_launcher(args)
+        else:
+            tpu_launcher(args)
+    elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
+        sagemaker_launcher(defaults, args)
+    else:
+        simple_launcher(args)
+def main():
+    parser = launch_command_parser()
+    args = parser.parse_args()
+    launch_command(args)
+if __name__ == "__main__":
+    main()

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .selection_menu import BulletMenu

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (270 Bytes). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/cursor.cpython-312.pyc ADDED Viewed

Binary file (3.05 kB). View file

URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/helpers.cpython-312.pyc ADDED Viewed

Binary file (2.2 kB). View file