uer commited on
Commit
2c216c9
1 Parent(s): 6fa1e5b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -7
README.md CHANGED
@@ -65,8 +65,7 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
65
  --learning_rate 1e-3 --batch_size 64 \
66
  --span_masking --span_geo_prob 0.3 --span_max_length 5 \
67
  --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
68
- --encoder transformer --mask fully_visible --layernorm_positioning pre\
69
- --remove_transformer_bias --decoder transformer \
70
  --target t5 --tie_weights
71
 
72
  ```
@@ -77,12 +76,12 @@ Stage2:
77
  python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
78
  --vocab_path models/google_zh_with_sentinel_vocab.txt \
79
  --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
80
- --seq_length 512 --processes_num 32 --target t5 \
81
- --dynamic_masking
82
  ```
83
 
84
  ```
85
- python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
86
  --pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
87
  --vocab_path models/google_zh_with_sentinel_vocab.txt \
88
  --config_path models/t5/small_config.json \
@@ -92,8 +91,7 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
92
  --learning_rate 1e-3 --batch_size 16 \
93
  --span_masking --span_geo_prob 0.3 --span_max_length 5 \
94
  --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
95
- --encoder transformer --mask fully_visible --layernorm_positioning pre\
96
- --remove_transformer_bias --decoder transformer \
97
  --target t5 --tie_weights
98
  ```
99
 
@@ -110,6 +108,14 @@ python3 scripts/convert_t5_from_uer_to_huggingface.py --input_model_path cluecor
110
  ### BibTeX entry and citation info
111
 
112
  ```
 
 
 
 
 
 
 
 
113
  @article{zhao2019uer,
114
  title={UER: An Open-Source Toolkit for Pre-training Models},
115
  author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},
65
  --learning_rate 1e-3 --batch_size 64 \
66
  --span_masking --span_geo_prob 0.3 --span_max_length 5 \
67
  --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
68
+ --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
 
69
  --target t5 --tie_weights
70
 
71
  ```
76
  python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
77
  --vocab_path models/google_zh_with_sentinel_vocab.txt \
78
  --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
79
+ --processes_num 32 --seq_length 512 \
80
+ --dynamic_masking --target t5
81
  ```
82
 
83
  ```
84
+ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
85
  --pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
86
  --vocab_path models/google_zh_with_sentinel_vocab.txt \
87
  --config_path models/t5/small_config.json \
91
  --learning_rate 1e-3 --batch_size 16 \
92
  --span_masking --span_geo_prob 0.3 --span_max_length 5 \
93
  --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
94
+ --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
 
95
  --target t5 --tie_weights
96
  ```
97
 
108
  ### BibTeX entry and citation info
109
 
110
  ```
111
+ @article{2020t5,
112
+ title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
113
+ author = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
114
+ journal = {Journal of Machine Learning Research},
115
+ pages = {1-67},
116
+ year = {2020}
117
+ }
118
+
119
  @article{zhao2019uer,
120
  title={UER: An Open-Source Toolkit for Pre-training Models},
121
  author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},