export MODEL_NAME="CompVis/stable-diffusion-v1-4" export dataset_name="lambdalabs/pokemon-blip-captions" accelerate launch --mixed_precision="fp16" train_text_to_image.py \ --pretrained_model_name_or_path=$MODEL_NAME \ --dataset_name=$dataset_name \ --use_ema \ --resolution=512 --center_crop --random_flip \ --train_batch_size=1 \ --gradient_accumulation_steps=4 \ --gradient_checkpointing \ --learning_rate=1e-05 \ --max_grad_norm=1 \ --num_train_epochs=1 \ --lr_scheduler="constant" --lr_warmup_steps=0 \ --output_dir="sd-pokemon-model"