uer commited on
Commit
a43f522
1 Parent(s): 713c2e5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -25
README.md CHANGED
@@ -138,57 +138,71 @@ Taking the case of word-based RoBERTa-Medium
138
  Stage1:
139
 
140
  ```
141
- python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
142
- --spm_model_path models/cluecorpussmall_spm.model \
143
- --dataset_path cluecorpussmall_word_seq128_dataset.pt \
144
- --processes_num 32 --seq_length 128 \
145
  --dynamic_masking --target mlm
146
  ```
147
 
148
  ```
149
- python3 pretrain.py --dataset_path cluecorpussmall_word_seq128_dataset.pt \
150
- --spm_model_path models/cluecorpussmall_spm.model \
151
- --config_path models/bert/medium_config.json \
152
- --output_model_path models/cluecorpussmall_word_roberta_medium_seq128_model.bin \
153
- --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
154
- --total_steps 1000000 --save_checkpoint_steps 100000 --report_steps 50000 \
155
- --learning_rate 1e-4 --batch_size 64 \
156
  --embedding word_pos_seg --encoder transformer --mask fully_visible --target mlm --tie_weights
157
  ```
158
 
159
  Stage2:
160
 
161
  ```
162
- python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
163
- --spm_model_path models/cluecorpussmall_spm.model \
164
- --dataset_path cluecorpussmall_word_seq512_dataset.pt \
165
- --processes_num 32 --seq_length 512 \
166
  --dynamic_masking --target mlm
167
  ```
168
 
169
  ```
170
- python3 pretrain.py --dataset_path cluecorpussmall_word_seq512_dataset.pt \
171
- --pretrained_model_path models/cluecorpussmall_word_roberta_medium_seq128_model.bin-1000000 \
172
- --spm_model_path models/cluecorpussmall_spm.model \
173
- --config_path models/bert/medium_config.json \
174
- --output_model_path models/cluecorpussmall_word_roberta_medium_seq512_model.bin \
175
- --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
176
- --total_steps 250000 --save_checkpoint_steps 50000 --report_steps 10000 \
177
- --learning_rate 5e-5 --batch_size 16 \
178
  --embedding word_pos_seg --encoder transformer --mask fully_visible --target mlm --tie_weights
179
  ```
180
 
181
  Finally, we convert the pre-trained model into Huggingface's format:
182
 
183
  ```
184
- python3 scripts/convert_bert_from_uer_to_huggingface.py --input_model_path models/cluecorpussmall_word_roberta_medium_seq128_model.bin-250000 \
185
- --output_model_path pytorch_model.bin \
186
  --layers_num 8 --target mlm
187
  ```
188
 
189
  ### BibTeX entry and citation info
190
 
191
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  @article{zhao2019uer,
193
  title={UER: An Open-Source Toolkit for Pre-training Models},
194
  author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},
 
138
  Stage1:
139
 
140
  ```
141
+ python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \\
142
+ --spm_model_path models/cluecorpussmall_spm.model \\
143
+ --dataset_path cluecorpussmall_word_seq128_dataset.pt \\
144
+ --processes_num 32 --seq_length 128 \\
145
  --dynamic_masking --target mlm
146
  ```
147
 
148
  ```
149
+ python3 pretrain.py --dataset_path cluecorpussmall_word_seq128_dataset.pt \\
150
+ --spm_model_path models/cluecorpussmall_spm.model \\
151
+ --config_path models/bert/medium_config.json \\
152
+ --output_model_path models/cluecorpussmall_word_roberta_medium_seq128_model.bin \\
153
+ --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \\
154
+ --total_steps 1000000 --save_checkpoint_steps 100000 --report_steps 50000 \\
155
+ --learning_rate 1e-4 --batch_size 64 \\
156
  --embedding word_pos_seg --encoder transformer --mask fully_visible --target mlm --tie_weights
157
  ```
158
 
159
  Stage2:
160
 
161
  ```
162
+ python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \\
163
+ --spm_model_path models/cluecorpussmall_spm.model \\
164
+ --dataset_path cluecorpussmall_word_seq512_dataset.pt \\
165
+ --processes_num 32 --seq_length 512 \\
166
  --dynamic_masking --target mlm
167
  ```
168
 
169
  ```
170
+ python3 pretrain.py --dataset_path cluecorpussmall_word_seq512_dataset.pt \\
171
+ --pretrained_model_path models/cluecorpussmall_word_roberta_medium_seq128_model.bin-1000000 \\
172
+ --spm_model_path models/cluecorpussmall_spm.model \\
173
+ --config_path models/bert/medium_config.json \\
174
+ --output_model_path models/cluecorpussmall_word_roberta_medium_seq512_model.bin \\
175
+ --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \\
176
+ --total_steps 250000 --save_checkpoint_steps 50000 --report_steps 10000 \\
177
+ --learning_rate 5e-5 --batch_size 16 \\
178
  --embedding word_pos_seg --encoder transformer --mask fully_visible --target mlm --tie_weights
179
  ```
180
 
181
  Finally, we convert the pre-trained model into Huggingface's format:
182
 
183
  ```
184
+ python3 scripts/convert_bert_from_uer_to_huggingface.py --input_model_path models/cluecorpussmall_word_roberta_medium_seq128_model.bin-250000 \\
185
+ --output_model_path pytorch_model.bin \\
186
  --layers_num 8 --target mlm
187
  ```
188
 
189
  ### BibTeX entry and citation info
190
 
191
  ```
192
+ @article{devlin2018bert,
193
+ title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
194
+ author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
195
+ journal={arXiv preprint arXiv:1810.04805},
196
+ year={2018}
197
+ }
198
+
199
+ @article{turc2019,
200
+ title={Well-Read Students Learn Better: On the Importance of Pre-training Compact Models},
201
+ author={Turc, Iulia and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
202
+ journal={arXiv preprint arXiv:1908.08962v2 },
203
+ year={2019}
204
+ }
205
+
206
  @article{zhao2019uer,
207
  title={UER: An Open-Source Toolkit for Pre-training Models},
208
  author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},