pere commited on
Commit
af0a405
1 Parent(s): 6d37e83

all batches

Browse files
batch_parliament_large.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+ INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_large/checkpoint_1500000\"
4
+ TRAIN_STEPS=1505000
5
+
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_large_v1\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_large_v2\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_large_v3\" &&
9
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_large_v4\" &&
10
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_large_v5\"
11
+
12
+
batch_parliament_small.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+ INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_small/checkpoint_1500000\"
4
+ TRAIN_STEPS=1505000
5
+
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_small_v1\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_small_v2\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_small_v3\" &&
9
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_small_v4\" &&
10
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_small_v5\"
11
+
12
+
batch_parliament_xl.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+ INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_xl/checkpoint_1500000\"
4
+ TRAIN_STEPS=1505000
5
+
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xl_v1\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xl_v2\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xl_v3\" &&
9
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xl_v4\" &&
10
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xl_v5\"
11
+
12
+
batch_parliament_xxl.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+ INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_xxl/checkpoint_1500000\"
4
+ TRAIN_STEPS=1505000
5
+
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xxl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xxl_v1\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xxl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xxl_v2\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xxl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xxl_v3\" &&
9
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xxl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xxl_v4\" &&
10
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_categorisation_xxl.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/parliament_NCC_xxl_v5\"
11
+
12
+
finetune_categorisation_large.gin CHANGED
@@ -10,42 +10,30 @@ from t5x import utils
10
  include "t5x/examples/t5/mt5/large.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
- MIXTURE_OR_TASK_NAME = "dane"
14
- TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 512}
15
- TRAIN_STEPS = 1_720_000 # 1700000 pre-trained steps + 20000 fine-tuning steps.
 
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.1
18
  RANDOM_SEED = 0
19
 
20
- # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
21
- # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
22
- # using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be
23
- # set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1:
24
- # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
25
- #LOSS_NORMALIZING_FACTOR = 234496
26
-
27
- INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_pluss200k_scandinavian_t5x_large/checkpoint_1700000"
28
-
29
-
30
  #Fixing a small error
31
  infer_eval/utils.DatasetConfig:
32
  task_feature_lengths = %TASK_FEATURE_LENGTHS
33
 
34
- #Saving every 2000 steps
35
  utils.SaveCheckpointConfig:
36
- period = 2000
37
 
38
 
 
 
 
 
 
 
39
 
40
-
41
- #train_script.train:
42
- # eval_period = 500
43
- # partitioner = @partitioning.ModelBasedPjitPartitioner()
44
- partitioning.PjitPartitioner.num_partitions = 1
45
-
46
- # `num_decodes` is equivalent to a beam size in a beam search decoding.
47
- # models.EncoderDecoderModel.predict_batch_with_aux.num_decodes = 1
48
-
49
- #mesh_transformer.learning_rate_schedules.constant_learning_rate.learning_rate = 0.0005
50
- #run.learning_rate_schedule = @learning_rate_schedules.constant_learning_rate
51
 
 
10
  include "t5x/examples/t5/mt5/large.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
+ MIXTURE_OR_TASK_NAME = %gin.REQUIRED
14
+ TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
15
+ INITIAL_CHECKPOINT_PATH = %gin.REQUIRED #"gs://t5-data/pretrained_models/t5x/mt5_base/checkpoint_1000000"
16
+ TRAIN_STEPS = %gin.REQUIRED #1_010_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
17
  USE_CACHED_TASKS = False
18
  DROPOUT_RATE = 0.1
19
  RANDOM_SEED = 0
20
 
 
 
 
 
 
 
 
 
 
 
21
  #Fixing a small error
22
  infer_eval/utils.DatasetConfig:
23
  task_feature_lengths = %TASK_FEATURE_LENGTHS
24
 
25
+ #Saving every 1000 steps
26
  utils.SaveCheckpointConfig:
27
+ period = 1000
28
 
29
 
30
+ # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
31
+ # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
32
+ # using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be
33
+ # set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1:
34
+ # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
35
+ # LOSS_NORMALIZING_FACTOR = 234496
36
 
37
+ # Might have to ba changed based on architecture
38
+ # partitioning.PjitPartitioner.num_partitions = 1
 
 
 
 
 
 
 
 
 
39
 
finetune_categorisation_small.gin ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __gin__ import dynamic_registration
2
+ import tasks
3
+
4
+ import __main__ as train_script
5
+ from t5.data import mixtures
6
+ from t5x import models
7
+ from t5x import partitioning
8
+ from t5x import utils
9
+
10
+ include "t5x/examples/t5/mt5/small.gin"
11
+ include "t5x/configs/runs/finetune.gin"
12
+
13
+ MIXTURE_OR_TASK_NAME = %gin.REQUIRED
14
+ TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
15
+ INITIAL_CHECKPOINT_PATH = %gin.REQUIRED #"gs://t5-data/pretrained_models/t5x/mt5_small/checkpoint_1000000"
16
+ TRAIN_STEPS = %gin.REQUIRED #1_010_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
17
+ USE_CACHED_TASKS = False
18
+ DROPOUT_RATE = 0.1
19
+ RANDOM_SEED = 0
20
+
21
+ #Fixing a small error
22
+ infer_eval/utils.DatasetConfig:
23
+ task_feature_lengths = %TASK_FEATURE_LENGTHS
24
+
25
+ #Saving every 1000 steps
26
+ utils.SaveCheckpointConfig:
27
+ period = 1000
28
+
29
+
30
+ # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
31
+ # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
32
+ # using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be
33
+ # set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1:
34
+ # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
35
+ # LOSS_NORMALIZING_FACTOR = 234496
36
+
37
+ # Might have to ba changed based on architecture
38
+ # partitioning.PjitPartitioner.num_partitions = 1
39
+
finetune_categorisation_xl.gin ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __gin__ import dynamic_registration
2
+ import tasks
3
+
4
+ import __main__ as train_script
5
+ from t5.data import mixtures
6
+ from t5x import models
7
+ from t5x import partitioning
8
+ from t5x import utils
9
+
10
+ include "t5x/examples/t5/mt5/xl.gin"
11
+ include "t5x/configs/runs/finetune.gin"
12
+
13
+ MIXTURE_OR_TASK_NAME = %gin.REQUIRED
14
+ TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
15
+ INITIAL_CHECKPOINT_PATH = %gin.REQUIRED #"gs://t5-data/pretrained_models/t5x/mt5_base/checkpoint_1000000"
16
+ TRAIN_STEPS = %gin.REQUIRED #1_010_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
17
+ USE_CACHED_TASKS = False
18
+ DROPOUT_RATE = 0.1
19
+ RANDOM_SEED = 0
20
+
21
+ #Fixing a small error
22
+ infer_eval/utils.DatasetConfig:
23
+ task_feature_lengths = %TASK_FEATURE_LENGTHS
24
+
25
+ #Saving every 1000 steps
26
+ utils.SaveCheckpointConfig:
27
+ period = 1000
28
+
29
+
30
+ # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
31
+ # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
32
+ # using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be
33
+ # set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1:
34
+ # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
35
+ # LOSS_NORMALIZING_FACTOR = 234496
36
+
37
+ # Might have to ba changed based on architecture
38
+ # partitioning.PjitPartitioner.num_partitions = 1
39
+
finetune_categorisation_xxl.gin ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __gin__ import dynamic_registration
2
+ import tasks
3
+
4
+ import __main__ as train_script
5
+ from t5.data import mixtures
6
+ from t5x import models
7
+ from t5x import partitioning
8
+ from t5x import utils
9
+
10
+ include "t5x/examples/t5/mt5/xxl.gin"
11
+ include "t5x/configs/runs/finetune.gin"
12
+
13
+ MIXTURE_OR_TASK_NAME = %gin.REQUIRED
14
+ TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
15
+ INITIAL_CHECKPOINT_PATH = %gin.REQUIRED #"gs://t5-data/pretrained_models/t5x/mt5_base/checkpoint_1000000"
16
+ TRAIN_STEPS = %gin.REQUIRED #1_010_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
17
+ USE_CACHED_TASKS = False
18
+ DROPOUT_RATE = 0.1
19
+ RANDOM_SEED = 0
20
+
21
+ #Fixing a small error
22
+ infer_eval/utils.DatasetConfig:
23
+ task_feature_lengths = %TASK_FEATURE_LENGTHS
24
+
25
+ #Saving every 1000 steps
26
+ utils.SaveCheckpointConfig:
27
+ period = 1000
28
+
29
+
30
+ # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
31
+ # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
32
+ # using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be
33
+ # set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1:
34
+ # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
35
+ # LOSS_NORMALIZING_FACTOR = 234496
36
+
37
+ # Might have to ba changed based on architecture
38
+ # partitioning.PjitPartitioner.num_partitions = 1
39
+