guoqiang wang commited on
Commit
3711e10
1 Parent(s): 5841beb

Upload ds_block_large_chinese.sh

Browse files
Files changed (1) hide show
  1. ds_block_large_chinese.sh +48 -0
ds_block_large_chinese.sh ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/bash
2
+
3
+ script_path=$(realpath $BASH_SOURCE)
4
+ script_dir=$(dirname $script_path)
5
+
6
+ config_json="./config/config_block_large_chinese.json"
7
+ gpt_options=" \
8
+ --block-lm \
9
+ --task-mask \
10
+ --bert-prob 0.4 \
11
+ --gap-sentence-prob 0.3 \
12
+ --avg-block-length 3 \
13
+ --gpt-min-ratio 0.25 \
14
+ --block-mask-prob 0.1 \
15
+ --short-seq-prob 0.02 \
16
+ --experiment-name blocklm-large-chinese \
17
+ --model-parallel-size ${MP_SIZE} \
18
+ --num-layers 24 \
19
+ --hidden-size 1024 \
20
+ --num-attention-heads 16 \
21
+ --seq-length 512 \
22
+ --max-position-embeddings 1024 \
23
+ --save ../model_save/checkpoints/ \
24
+ --load ../model_save/checkpoints/
25
+ --log-interval 50 \
26
+ --eval-interval 1000 \
27
+ --save-interval 2000 \
28
+ --train-iters 250000000 \
29
+ --train-data wudao \
30
+ --resume-dataloader \
31
+ --loader-scatter 4 \
32
+ --no-lazy-loader \
33
+ --tokenizer-type ChineseSPTokenizer \
34
+ --fix-command-token \
35
+ --split 949,50,1 \
36
+ --distributed-backend nccl \
37
+ --lr-decay-style cosine \
38
+ --lr-decay-ratio 0.1 \
39
+ --lr-decay-iters 200000 \
40
+ --warmup 0.04 \
41
+ --checkpoint-activations \
42
+ --deepspeed-activation-checkpointing \
43
+ --fp16 \
44
+ "
45
+ gpt_options="${gpt_options}
46
+ --deepspeed \
47
+ --deepspeed_config ${config_json} \
48
+ "