File size: 1,401 Bytes
3711e10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
#! /bin/bash
script_path=$(realpath $BASH_SOURCE)
script_dir=$(dirname $script_path)
config_json="./config/config_block_large_chinese.json"
gpt_options=" \
--block-lm \
--task-mask \
--bert-prob 0.4 \
--gap-sentence-prob 0.3 \
--avg-block-length 3 \
--gpt-min-ratio 0.25 \
--block-mask-prob 0.1 \
--short-seq-prob 0.02 \
--experiment-name blocklm-large-chinese \
--model-parallel-size ${MP_SIZE} \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 512 \
--max-position-embeddings 1024 \
--save ../model_save/checkpoints/ \
--load ../model_save/checkpoints/
--log-interval 50 \
--eval-interval 1000 \
--save-interval 2000 \
--train-iters 250000000 \
--train-data wudao \
--resume-dataloader \
--loader-scatter 4 \
--no-lazy-loader \
--tokenizer-type ChineseSPTokenizer \
--fix-command-token \
--split 949,50,1 \
--distributed-backend nccl \
--lr-decay-style cosine \
--lr-decay-ratio 0.1 \
--lr-decay-iters 200000 \
--warmup 0.04 \
--checkpoint-activations \
--deepspeed-activation-checkpointing \
--fp16 \
"
gpt_options="${gpt_options}
--deepspeed \
--deepspeed_config ${config_json} \
"
|