accelerate launch \ --mixed_precision bf16 \ --num_cpu_threads_per_process 1 \ sd-scripts/flux_train_network.py \ --pretrained_model_name_or_path "/workspace/fluxgym/models/unet/flux1-dev.sft" \ --clip_l "/workspace/fluxgym/models/clip/clip_l.safetensors" \ --t5xxl "/workspace/fluxgym/models/clip/t5xxl_fp16.safetensors" \ --ae "/workspace/fluxgym/models/vae/ae.sft" \ --cache_latents_to_disk \ --save_model_as safetensors \ --sdpa --persistent_data_loader_workers \ --max_data_loader_n_workers 2 \ --seed 42 \ --gradient_checkpointing \ --mixed_precision bf16 \ --save_precision bf16 \ --network_module networks.lora_flux \ --network_dim 64 \ --optimizer_type adamw8bit \--sample_prompts="/workspace/fluxgym/outputs/penbul/sample_prompts.txt" --sample_every_n_steps="250" \ --learning_rate 1e-4 \ --cache_text_encoder_outputs \ --cache_text_encoder_outputs_to_disk \ --fp8_base \ --highvram \ --max_train_epochs 20 \ --save_every_n_epochs 1 \ --dataset_config "/workspace/fluxgym/outputs/penbul/dataset.toml" \ --output_dir "/workspace/fluxgym/outputs/penbul" \ --output_name penbul \ --timestep_sampling shift \ --discrete_flow_shift 3.1582 \ --model_prediction_type raw \ --guidance_scale 1 \ --loss_type l2 \ --adaptive_noise_scale 1.0 \ --bucket_reso_steps 64 \ --cache_latents \ --clip_skip 2 \ --console_log_level DEBUG \ --enable_bucket \ --gradient_accumulation_steps 2 \ --lr_scheduler_timescale 0 \ --max_bucket_reso 1024 \ --max_train_steps 7000 \ --max_validation_steps 10 \ --mem_eff_attn \ --min_bucket_reso 256 \ --network_alpha 32 \ --network_dropout 0.1 \ --noise_offset 0.1 \ --offload_optimizer_device cpu \ --resolution 768,768 \ --train_batch_size 4 \ --xformers