uer commited on
Commit
1380449
1 Parent(s): f9ddf5f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -7
README.md CHANGED
@@ -87,14 +87,14 @@ Stage1:
87
  ```
88
  python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
89
  --spm_model_path models/clue_6.model \
90
- --dataset_path cluecorpussmall_seq128_dataset.pt \
91
  --processes_num 32 --seq_length 128 \
92
  --dynamic_masking --target mlm
93
  ```
94
 
95
  ```
96
- python3 pretrain.py --dataset_path cluecorpussmall_seq128_dataset.pt \
97
- --spm_model_path models/clue_6.model \
98
  --config_path models/bert/base_config.json \
99
  --output_model_path models/cluecorpussmall_word_roberta_base_128.bin \
100
  --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
@@ -108,16 +108,16 @@ Stage2:
108
 
109
  ```
110
  python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
111
- --spm_model_path models/clue_6.model \
112
- --dataset_path cluecorpussmall_seq512_dataset.pt \
113
  --processes_num 32 --seq_length 512 \
114
  --dynamic_masking --target mlm
115
  ```
116
 
117
  ```
118
- python3 pretrain.py --dataset_path cluecorpussmall_seq128_dataset.pt \
119
  --pretrained_model_path models/cluecorpussmall_word_roberta_base_128.bin-1000000 \
120
- --spm_model_path models/clue_6.model \
121
  --config_path models/bert/base_config.json \
122
  --output_model_path models/cluecorpussmall_word_roberta_base_512.bin \
123
  --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
 
87
  ```
88
  python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
89
  --spm_model_path models/clue_6.model \
90
+ --dataset_path cluecorpussmall_word_seq128_dataset.pt \
91
  --processes_num 32 --seq_length 128 \
92
  --dynamic_masking --target mlm
93
  ```
94
 
95
  ```
96
+ python3 pretrain.py --dataset_path cluecorpussmall_word_seq128_dataset.pt \
97
+ --spm_model_path models/cluecorpussmall_spm.model \
98
  --config_path models/bert/base_config.json \
99
  --output_model_path models/cluecorpussmall_word_roberta_base_128.bin \
100
  --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
 
108
 
109
  ```
110
  python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
111
+ --spm_model_path models/cluecorpussmall_spm.model \
112
+ --dataset_path cluecorpussmall_word_seq512_dataset.pt \
113
  --processes_num 32 --seq_length 512 \
114
  --dynamic_masking --target mlm
115
  ```
116
 
117
  ```
118
+ python3 pretrain.py --dataset_path cluecorpussmall_word_seq512_dataset.pt \
119
  --pretrained_model_path models/cluecorpussmall_word_roberta_base_128.bin-1000000 \
120
+ --spm_model_path models/cluecorpussmall_spm.model \
121
  --config_path models/bert/base_config.json \
122
  --output_model_path models/cluecorpussmall_word_roberta_base_512.bin \
123
  --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \