compute_environment: LOCAL_MACHINE deepspeed_config: gradient_accumulation_steps: 1 offload_optimizer_device: cpu offload_param_device: cpu zero3_init_flag: false zero_stage: 2 distributed_type: DEEPSPEED downcast_bf16: 'no' fsdp_config: {} machine_rank: 0 main_process_ip: null main_process_port: null main_training_function: main mixed_precision: 'no' num_machines: 1 num_processes: 1 use_cpu: false