VideoModelStudio
/
docs
/finetrainers-src-codebase
/examples
/inference
/flux
/flux_text_to_image.sh
| set -e -x | |
| # export TORCH_LOGS="+dynamo,recompiles,graph_breaks" | |
| # export TORCHDYNAMO_VERBOSE=1 | |
| # export WANDB_MODE="offline" | |
| export WANDB_MODE="disabled" | |
| export NCCL_P2P_DISABLE=1 | |
| export NCCL_IB_DISABLE=1 | |
| export TORCH_NCCL_ENABLE_MONITORING=0 | |
| export FINETRAINERS_LOG_LEVEL="DEBUG" | |
| BACKEND="ptd" | |
| NUM_GPUS=4 | |
| CUDA_VISIBLE_DEVICES="0,1,2,3" | |
| # Check the JSON files for the expected JSON format | |
| DATASET_FILE="examples/inference/flux/dummy_text_to_image.json" | |
| # Depending on how many GPUs you have available, choose your degree of parallelism and technique! | |
| DDP_1="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 1 --tp_degree 1" | |
| DDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 1 --cp_degree 1 --tp_degree 1" | |
| DDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 4 --dp_shards 1 --cp_degree 1 --tp_degree 1" | |
| DDP_8="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 8 --dp_shards 1 --cp_degree 1 --tp_degree 1" | |
| CP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 2 --tp_degree 1" | |
| CP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 4 --tp_degree 1" | |
| # FSDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 2 --cp_degree 1 --tp_degree 1" | |
| # FSDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 4 --cp_degree 1 --tp_degree 1" | |
| # HSDP_2_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 2 --cp_degree 1 --tp_degree 1" | |
| # Parallel arguments | |
| parallel_cmd=( | |
| $CP_4 | |
| ) | |
| # Model arguments | |
| model_cmd=( | |
| --model_name "flux" | |
| --pretrained_model_name_or_path "black-forest-labs/FLUX.1-dev" | |
| --cache_dir /raid/.cache/huggingface | |
| --enable_slicing | |
| --enable_tiling | |
| ) | |
| # Inference arguments | |
| inference_cmd=( | |
| --inference_type text_to_image | |
| --dataset_file "$DATASET_FILE" | |
| ) | |
| # Attention provider arguments | |
| attn_provider_cmd=( | |
| --attn_provider flash_varlen | |
| ) | |
| # Torch config arguments | |
| torch_config_cmd=( | |
| --allow_tf32 | |
| --float32_matmul_precision high | |
| ) | |
| # Miscellaneous arguments | |
| miscellaneous_cmd=( | |
| --seed 31337 | |
| --tracker_name "finetrainers-inference" | |
| --output_dir "/raid/aryan/flux-inference" | |
| --init_timeout 600 | |
| --nccl_timeout 600 | |
| --report_to "wandb" | |
| ) | |
| # Execute the inference script | |
| export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES | |
| torchrun \ | |
| --standalone \ | |
| --nnodes=1 \ | |
| --nproc_per_node=$NUM_GPUS \ | |
| --rdzv_backend c10d \ | |
| --rdzv_endpoint="localhost:19242" \ | |
| examples/inference/inference.py \ | |
| "${parallel_cmd[@]}" \ | |
| "${model_cmd[@]}" \ | |
| "${inference_cmd[@]}" \ | |
| "${attn_provider_cmd[@]}" \ | |
| "${torch_config_cmd[@]}" \ | |
| "${miscellaneous_cmd[@]}" | |
| echo -ne "-------------------- Finished executing script --------------------\n\n" | |