pere commited on
Commit
dfaeaf7
1 Parent(s): d457bb4
norwegian_byt5_speedtest_1part_base.gin ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ include 't5x/examples/t5/byt5/base.gin'
2
+ include 'pretrain_cont.gin'
3
+ #include 't5x/configs/runs/pretrain.gin'
4
+ #iinclude 't5x/configs/runs/finetune.gin'
5
+
6
+
7
+ # Register necessary SeqIO Tasks/Mixtures.
8
+ import t5.data.mixtures
9
+ import tasks
10
+
11
+ MIXTURE_OR_TASK_NAME = "byt5_ncc_english_span_corruption_stream"
12
+ TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 512}
13
+ TRAIN_STEPS = 1_500_000
14
+ DROPOUT_RATE = 0.0 # Changed from the default since T5-1.1 recomments this.
15
+ INITIAL_CHECKPOINT_PATH = "gs://t5-data/pretrained_models/byt5/base/model.ckpt-1000000"
16
+ PjitPartitioner.num_partitions = 1
17
+
18
+ # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
19
+ # # using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be
20
+ # # set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1:
21
+ # # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
22
+
23
+ # The instructions above is from T5X. We here have to convert the Mesh Tensorflow byt5-model, so this needs to be set
24
+ LOSS_NORMALIZING_FACTOR = 193536
25
+
26
+
27
+
28
+
29
+
train_byt5_speedtest_1part_base.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/pk-nb-t5x"
2
+ T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
+ MODEL_DIR="gs://t5x-training/pretrained_models/speedtest_1part_byt5x_base"
4
+ export PYTHONPATH=${PROJECT_DIR}
5
+
6
+ python3 ${T5X_DIR}/t5x/train.py \
7
+ --gin_search_paths=${PROJECT_DIR} \
8
+ --gin_file="norwegian_byt5_speedtest_1part_base.gin" \
9
+ --gin.MODEL_DIR="'${MODEL_DIR}'" \