guoqiang wang
commited on
Commit
•
3711e10
1
Parent(s):
5841beb
Upload ds_block_large_chinese.sh
Browse files- ds_block_large_chinese.sh +48 -0
ds_block_large_chinese.sh
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /bin/bash
|
2 |
+
|
3 |
+
script_path=$(realpath $BASH_SOURCE)
|
4 |
+
script_dir=$(dirname $script_path)
|
5 |
+
|
6 |
+
config_json="./config/config_block_large_chinese.json"
|
7 |
+
gpt_options=" \
|
8 |
+
--block-lm \
|
9 |
+
--task-mask \
|
10 |
+
--bert-prob 0.4 \
|
11 |
+
--gap-sentence-prob 0.3 \
|
12 |
+
--avg-block-length 3 \
|
13 |
+
--gpt-min-ratio 0.25 \
|
14 |
+
--block-mask-prob 0.1 \
|
15 |
+
--short-seq-prob 0.02 \
|
16 |
+
--experiment-name blocklm-large-chinese \
|
17 |
+
--model-parallel-size ${MP_SIZE} \
|
18 |
+
--num-layers 24 \
|
19 |
+
--hidden-size 1024 \
|
20 |
+
--num-attention-heads 16 \
|
21 |
+
--seq-length 512 \
|
22 |
+
--max-position-embeddings 1024 \
|
23 |
+
--save ../model_save/checkpoints/ \
|
24 |
+
--load ../model_save/checkpoints/
|
25 |
+
--log-interval 50 \
|
26 |
+
--eval-interval 1000 \
|
27 |
+
--save-interval 2000 \
|
28 |
+
--train-iters 250000000 \
|
29 |
+
--train-data wudao \
|
30 |
+
--resume-dataloader \
|
31 |
+
--loader-scatter 4 \
|
32 |
+
--no-lazy-loader \
|
33 |
+
--tokenizer-type ChineseSPTokenizer \
|
34 |
+
--fix-command-token \
|
35 |
+
--split 949,50,1 \
|
36 |
+
--distributed-backend nccl \
|
37 |
+
--lr-decay-style cosine \
|
38 |
+
--lr-decay-ratio 0.1 \
|
39 |
+
--lr-decay-iters 200000 \
|
40 |
+
--warmup 0.04 \
|
41 |
+
--checkpoint-activations \
|
42 |
+
--deepspeed-activation-checkpointing \
|
43 |
+
--fp16 \
|
44 |
+
"
|
45 |
+
gpt_options="${gpt_options}
|
46 |
+
--deepspeed \
|
47 |
+
--deepspeed_config ${config_json} \
|
48 |
+
"
|