export WANDB_KEY="" export ENTITY="" export PROJECT="t2v-f17-256-img4-videovae488-bf16-ckpt-xformers-bs4-lr2e-5-t5" accelerate launch \ --config_file scripts/accelerate_configs/deepspeed_zero2_config.yaml \ opensora/train/train_t2v.py \ --model LatteT2V-XL/122 \ --text_encoder_name DeepFloyd/t5-v1_1-xxl \ --dataset t2v \ --ae CausalVAEModel_4x8x8 \ --ae_path CausalVAEModel_4x8x8 \ --data_path /remote-home1/dataset/sharegpt4v_path_cap_64x512x512.json \ --video_folder /remote-home1/dataset/data_split_tt \ --sample_rate 1 \ --num_frames 17 \ --max_image_size 256 \ --gradient_checkpointing \ --attention_mode xformers \ --train_batch_size=4 \ --dataloader_num_workers 10 \ --gradient_accumulation_steps=1 \ --max_train_steps=1000000 \ --learning_rate=2e-05 \ --lr_scheduler="constant" \ --lr_warmup_steps=0 \ --mixed_precision="bf16" \ --report_to="wandb" \ --checkpointing_steps=500 \ --output_dir="t2v-f17-256-img4-videovae488-bf16-ckpt-xformers-bs4-lr2e-5-t5" \ --allow_tf32 \ --pretrained t2v.pt \ --use_deepspeed \ --model_max_length 300 \ --use_image_num 4 \ --use_img_from_vid