Update README.md
Browse files
README.md
CHANGED
@@ -87,14 +87,14 @@ Stage1:
|
|
87 |
```
|
88 |
python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
|
89 |
--spm_model_path models/clue_6.model \
|
90 |
-
--dataset_path
|
91 |
--processes_num 32 --seq_length 128 \
|
92 |
--dynamic_masking --target mlm
|
93 |
```
|
94 |
|
95 |
```
|
96 |
-
python3 pretrain.py --dataset_path
|
97 |
-
--spm_model_path models/
|
98 |
--config_path models/bert/base_config.json \
|
99 |
--output_model_path models/cluecorpussmall_word_roberta_base_128.bin \
|
100 |
--world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
|
@@ -108,16 +108,16 @@ Stage2:
|
|
108 |
|
109 |
```
|
110 |
python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
|
111 |
-
--spm_model_path models/
|
112 |
-
--dataset_path
|
113 |
--processes_num 32 --seq_length 512 \
|
114 |
--dynamic_masking --target mlm
|
115 |
```
|
116 |
|
117 |
```
|
118 |
-
python3 pretrain.py --dataset_path
|
119 |
--pretrained_model_path models/cluecorpussmall_word_roberta_base_128.bin-1000000 \
|
120 |
-
--spm_model_path models/
|
121 |
--config_path models/bert/base_config.json \
|
122 |
--output_model_path models/cluecorpussmall_word_roberta_base_512.bin \
|
123 |
--world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
|
|
|
87 |
```
|
88 |
python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
|
89 |
--spm_model_path models/clue_6.model \
|
90 |
+
--dataset_path cluecorpussmall_word_seq128_dataset.pt \
|
91 |
--processes_num 32 --seq_length 128 \
|
92 |
--dynamic_masking --target mlm
|
93 |
```
|
94 |
|
95 |
```
|
96 |
+
python3 pretrain.py --dataset_path cluecorpussmall_word_seq128_dataset.pt \
|
97 |
+
--spm_model_path models/cluecorpussmall_spm.model \
|
98 |
--config_path models/bert/base_config.json \
|
99 |
--output_model_path models/cluecorpussmall_word_roberta_base_128.bin \
|
100 |
--world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
|
|
|
108 |
|
109 |
```
|
110 |
python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
|
111 |
+
--spm_model_path models/cluecorpussmall_spm.model \
|
112 |
+
--dataset_path cluecorpussmall_word_seq512_dataset.pt \
|
113 |
--processes_num 32 --seq_length 512 \
|
114 |
--dynamic_masking --target mlm
|
115 |
```
|
116 |
|
117 |
```
|
118 |
+
python3 pretrain.py --dataset_path cluecorpussmall_word_seq512_dataset.pt \
|
119 |
--pretrained_model_path models/cluecorpussmall_word_roberta_base_128.bin-1000000 \
|
120 |
+
--spm_model_path models/cluecorpussmall_spm.model \
|
121 |
--config_path models/bert/base_config.json \
|
122 |
--output_model_path models/cluecorpussmall_word_roberta_base_512.bin \
|
123 |
--world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
|