python -m torch.distributed.launch --nproc_per_node=16 --nnodes=8 \ --node_rank=$OMPI_COMM_WORLD_RANK --master_addr="$MASTER_IP" --master_port=$MASTER_PORT train.py /mnt/unilm/shaohanh/data/tnlg_config/ \ --task vl_gpt_pretraining \ --activation-fn gelu \ --share-decoder-input-output-embed \ --save-interval-updates 5000 \ --no-epoch-checkpoints \ --memory-efficient-fp16 \ --fp16-init-scale 4 \ --arch lm_base \ --sample-break-mode none \ --tokens-per-sample 2048 \ --optimizer adam --adam-betas "(0.9, 0.98)" \ --adam-eps 1e-08 \ --clip-norm 0.0 \ --lr 6e-4 \ --lr-scheduler polynomial_decay \ --warmup-updates 750 \ --dropout 0.1 \ --attention-dropout 0.1 \ --weight-decay 0.01 \ --batch-size 1 \ --update-freq 2 \ --log-format simple --log-interval 50 --disable-validation \ --required-batch-size-multiple 1 \ --total-num-update 300000 \ --max-update 300000 \ --seed 1 \ --ddp-backend=legacy_ddp \ --batch-read-ahead 100 \ --rel-pos-buckets 32 \ --max-rel-pos 128 \ --dict-path /mnt/unilm/shumma/data/16g/dict.txt \ --spm-model /mnt/unilm/shumma/data/16g/sentencepiece.bpe.model \ --save-dir /mnt/unilm/shaohanh/exp/unigpt_exp/torchscale_base_laion_gpt \ --tensorboard-logdir /mnt/unilm/shaohanh/exp/unigpt_exp/torchscale_base_laion_gpt/tb-logs \ --laion-data-dir /mnt/conversationhub/shaohanh/bvt/data/laion_dataloader_config/ \ --laion-batch-size 8 \ --checkpoint-activations \ --subln \ --criterion vl_cross_entropy \ --decoder-embed-dim 768 \ --decoder-ffn-embed-dim 3072 \ --decoder-layers 12 \ --decoder-attention-heads 12