Spaces:

jbilcke-hf
/

VideoModelStudio

Paused

VideoModelStudio / docs /finetrainers-src-codebase /examples /inference /flux /flux_text_to_image.sh

Julian Bilcke

we are going to hack into finetrainers

9fd1204 5 months ago

2.79 kB

	#!/bin/bash

	set -e -x

	# export TORCH_LOGS="+dynamo,recompiles,graph_breaks"
	# export TORCHDYNAMO_VERBOSE=1
	# export WANDB_MODE="offline"
	export WANDB_MODE="disabled"
	export NCCL_P2P_DISABLE=1
	export NCCL_IB_DISABLE=1
	export TORCH_NCCL_ENABLE_MONITORING=0
	export FINETRAINERS_LOG_LEVEL="DEBUG"

	BACKEND="ptd"

	NUM_GPUS=4
	CUDA_VISIBLE_DEVICES="0,1,2,3"

	# Check the JSON files for the expected JSON format
	DATASET_FILE="examples/inference/flux/dummy_text_to_image.json"

	# Depending on how many GPUs you have available, choose your degree of parallelism and technique!
	DDP_1="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	DDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	DDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 4 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	DDP_8="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 8 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	CP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 2 --tp_degree 1"
	CP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 4 --tp_degree 1"
	# FSDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 2 --cp_degree 1 --tp_degree 1"
	# FSDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 4 --cp_degree 1 --tp_degree 1"
	# HSDP_2_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 2 --cp_degree 1 --tp_degree 1"

	# Parallel arguments
	parallel_cmd=(
	$CP_4
	)

	# Model arguments
	model_cmd=(
	--model_name "flux"
	--pretrained_model_name_or_path "black-forest-labs/FLUX.1-dev"
	--cache_dir /raid/.cache/huggingface
	--enable_slicing
	--enable_tiling
	)

	# Inference arguments
	inference_cmd=(
	--inference_type text_to_image
	--dataset_file "$DATASET_FILE"
	)

	# Attention provider arguments
	attn_provider_cmd=(
	--attn_provider flash_varlen
	)

	# Torch config arguments
	torch_config_cmd=(
	--allow_tf32
	--float32_matmul_precision high
	)

	# Miscellaneous arguments
	miscellaneous_cmd=(
	--seed 31337
	--tracker_name "finetrainers-inference"
	--output_dir "/raid/aryan/flux-inference"
	--init_timeout 600
	--nccl_timeout 600
	--report_to "wandb"
	)

	# Execute the inference script
	export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES

	torchrun \
	--standalone \
	--nnodes=1 \
	--nproc_per_node=$NUM_GPUS \
	--rdzv_backend c10d \
	--rdzv_endpoint="localhost:19242" \
	examples/inference/inference.py \
	"${parallel_cmd[@]}" \
	"${model_cmd[@]}" \
	"${inference_cmd[@]}" \
	"${attn_provider_cmd[@]}" \
	"${torch_config_cmd[@]}" \
	"${miscellaneous_cmd[@]}"

	echo -ne "-------------------- Finished executing script --------------------\n\n"