pere commited on
Commit
f811c13
β€’
1 Parent(s): ca12f78

new norbench

Browse files
my_metrics.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import sklearn.metrics
2
+ import numpy as np
3
+
4
+ def f1_macro(targets, predictions):
5
+ targets, predictions = np.asarray(targets), np.asarray(predictions)
6
+ unique_labels = np.unique(np.concatenate((targets, predictions)))
7
+ return {"f1_macro": 100 * sklearn.metrics.f1_score(targets, predictions, labels=unique_labels, average='macro')}
8
+
my_metrics_old.py DELETED
@@ -1,7 +0,0 @@
1
- import sklearn.metrics
2
- import numpy as np
3
-
4
- def f1_macro(targets, predictions):
5
- targets, predictions = np.asarray(targets).astype(str), np.asarray(predictions).astype(str)
6
- return {"f1_macro": 100*sklearn.metrics.f1_score(targets, predictions, average='macro')}
7
-
 
 
 
 
 
 
 
 
finetune_mt5_128.gin β†’ norbench_sentence_sentiment_bs128.gin RENAMED
@@ -21,7 +21,7 @@ USE_CACHED_TASKS = False
21
  DROPOUT_RATE = 0.1
22
  RANDOM_SEED = 0
23
  BATCH_SIZE = 128
24
- EVAL_PERIOD = 316
25
 
26
  #Fixing a small error
27
  infer_eval/utils.DatasetConfig:
@@ -29,7 +29,7 @@ infer_eval/utils.DatasetConfig:
29
 
30
  #Saving every 500 steps
31
  utils.SaveCheckpointConfig:
32
- period = 316
33
  keep = 1 # number of checkpoints to keep
34
 
35
 
 
21
  DROPOUT_RATE = 0.1
22
  RANDOM_SEED = 0
23
  BATCH_SIZE = 128
24
+ EVAL_PERIOD = 124
25
 
26
  #Fixing a small error
27
  infer_eval/utils.DatasetConfig:
 
29
 
30
  #Saving every 500 steps
31
  utils.SaveCheckpointConfig:
32
+ period = 124
33
  keep = 1 # number of checkpoints to keep
34
 
35
 
batch_lrtest_sentence_sentiment_bs32.sh β†’ norbench_sentence_sentiment_bs128.sh RENAMED
@@ -5,7 +5,7 @@ echo "PROJECT_DIR is set to: ${PROJECT_DIR}"
5
 
6
 
7
  FINETUNE_STEPS=5000
8
- EVAL_PREFIX="norbench/evalB_lr_semtiment_bs32"
9
  MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
 
11
  CHECKPOINT_LIST=(
@@ -31,51 +31,30 @@ LR_LIST=(0.0001 0.0002 0.0004 0.0006 0.0008 0.001)
31
 
32
 
33
  GIN_LIST=(
34
- "finetune_mt5_32.gin"
35
- "finetune_mt5_32.gin"
36
- "finetune_mt5_32.gin"
37
- "finetune_mt5_32.gin"
38
- "finetune_mt5_32.gin"
39
- "finetune_mt5_32.gin"
40
- "finetune_mt5_32.gin"
41
  )
42
 
43
  START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
44
- EXP_LIST=(901 902 903 904 905 906)
45
 
46
 
47
- VERSION_LIST=("1")
48
-
49
- index=$(($1 + 0))
50
-
51
- if [ $index -lt 1 ] || [ $index -gt ${#CHECKPOINT_LIST[@]} ] || [ $# -ne 1 ]; then
52
- echo "Error: You need to provide the number of the checkpoints below as a parameter."
53
- for i in "${!CHECKPOINT_LIST[@]}"; do
54
- echo "$((i+1)). ${CHECKPOINT_LIST[i]}"
55
- done
56
-
57
- exit 1
58
- fi
59
-
60
- for v in "${VERSION_LIST[@]}"; do
61
- i=($index-1)
62
  INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
63
-
64
  TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
65
  GIN_FILE=${GIN_LIST[i]}
66
  MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
67
  LR=${LR_LIST[i]}
68
- MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP_LIST[i]}_${NAME_LIST[i]}_v${v}"
 
69
 
70
  command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
71
  echo "${command}"
72
  # Uncomment the next line to run the command:
73
- eval "${command}"
74
  done
75
-
76
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v1\" &&
77
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v2\" &&
78
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v3\" &&
79
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v4\" &&
80
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v5\"
81
-
 
5
 
6
 
7
  FINETUNE_STEPS=5000
8
+ EVAL_PREFIX="norbench/norbenc_sentence_sentiment_bs128"
9
  MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
 
11
  CHECKPOINT_LIST=(
 
31
 
32
 
33
  GIN_LIST=(
34
+ "norbench_sentence_sentiment_bs128.gin"
35
+ "norbench_sentence_sentiment_bs128.gin"
36
+ "norbench_sentence_sentiment_bs128.gin"
37
+ "norbench_sentence_sentiment_bs128.gin"
38
+ "norbench_sentence_sentiment_bs128.gin"
39
+ "norbench_sentence_sentiment_bs128.gin"
 
40
  )
41
 
42
  START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
43
+ EXP_LIST=(1 2 3 4 5 6)
44
 
45
 
46
+ op through each experiment in EXP_LIST
47
+ for i in "${!EXP_LIST[@]}"; do
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
 
49
  TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
50
  GIN_FILE=${GIN_LIST[i]}
51
  MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
52
  LR=${LR_LIST[i]}
53
+ EXP=${EXP_LIST[i]}
54
+ MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP}_${NAME_LIST[i]}"
55
 
56
  command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
57
  echo "${command}"
58
  # Uncomment the next line to run the command:
59
+ # eval "${command}"
60
  done
 
 
 
 
 
 
 
norbench_sentence_sentiment_bs16.gin ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __gin__ import dynamic_registration
2
+ import tasks
3
+ import seqio
4
+ import optax
5
+
6
+ import __main__ as train_script
7
+ from t5.data import mixtures
8
+ from t5x import models
9
+ from t5x import partitioning
10
+ from t5x import utils
11
+
12
+ include 't5x/examples/t5/mt5/base.gin'
13
+ include "t5x/configs/runs/finetune.gin"
14
+
15
+ MIXTURE_OR_TASK_NAME = %gin.REQUIRED
16
+ TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
17
+ INITIAL_CHECKPOINT_PATH = %gin.REQUIRED
18
+ LR = %gin.REQUIRED
19
+ TRAIN_STEPS = %gin.REQUIRED # 1000000 pre-trained steps + 10000 fine-tuning steps.
20
+ USE_CACHED_TASKS = False
21
+ DROPOUT_RATE = 0.1
22
+ RANDOM_SEED = 0
23
+ BATCH_SIZE = 16
24
+ EVAL_PERIOD = 124
25
+
26
+ #Fixing a small error
27
+ infer_eval/utils.DatasetConfig:
28
+ task_feature_lengths = %TASK_FEATURE_LENGTHS
29
+
30
+ #Saving every 500 steps
31
+ utils.SaveCheckpointConfig:
32
+ period = 124
33
+ keep = 1 # number of checkpoints to keep
34
+
35
+
36
+ #optax.adamw.weight_decay = 0.1
37
+ #OPTIMIZER = @optax.adamw
38
+ #import t5x.optimizers
39
+ #OPTIMIZER = @optax.adamw
40
+ #optax.adamw.learning_rate = %LR
41
+ #optax.adamw.weight_decay = 0.1
42
+
43
+
44
+ utils.create_learning_rate_scheduler:
45
+ factors = 'constant'
46
+ base_learning_rate = %LR
47
+ warmup_steps = 1000
48
+
49
+ # Might have to ba changed based on architecture
50
+ # partitioning.PjitPartitioner.num_partitions = 1
51
+
52
+
batch_lrtest_sentence_sentiment_bs64.sh β†’ norbench_sentence_sentiment_bs16.sh RENAMED
@@ -5,7 +5,7 @@ echo "PROJECT_DIR is set to: ${PROJECT_DIR}"
5
 
6
 
7
  FINETUNE_STEPS=5000
8
- EVAL_PREFIX="norbench/evalB_lr_semtiment_bs64"
9
  MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
 
11
  CHECKPOINT_LIST=(
@@ -31,51 +31,30 @@ LR_LIST=(0.0001 0.0002 0.0004 0.0006 0.0008 0.001)
31
 
32
 
33
  GIN_LIST=(
34
- "finetune_mt5_64.gin"
35
- "finetune_mt5_64.gin"
36
- "finetune_mt5_64.gin"
37
- "finetune_mt5_64.gin"
38
- "finetune_mt5_64.gin"
39
- "finetune_mt5_64.gin"
40
- "finetune_mt5_64.gin"
41
  )
42
 
43
  START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
44
- EXP_LIST=(907 908 909 910 911 912)
45
 
46
 
47
- VERSION_LIST=("1")
48
-
49
- index=$(($1 + 0))
50
-
51
- if [ $index -lt 1 ] || [ $index -gt ${#CHECKPOINT_LIST[@]} ] || [ $# -ne 1 ]; then
52
- echo "Error: You need to provide the number of the checkpoints below as a parameter."
53
- for i in "${!CHECKPOINT_LIST[@]}"; do
54
- echo "$((i+1)). ${CHECKPOINT_LIST[i]}"
55
- done
56
-
57
- exit 1
58
- fi
59
-
60
- for v in "${VERSION_LIST[@]}"; do
61
- i=($index-1)
62
  INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
63
-
64
  TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
65
  GIN_FILE=${GIN_LIST[i]}
66
  MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
67
  LR=${LR_LIST[i]}
68
- MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP_LIST[i]}_${NAME_LIST[i]}_v${v}"
 
69
 
70
  command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
71
  echo "${command}"
72
  # Uncomment the next line to run the command:
73
- eval "${command}"
74
  done
75
-
76
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v1\" &&
77
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v2\" &&
78
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v3\" &&
79
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v4\" &&
80
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v5\"
81
-
 
5
 
6
 
7
  FINETUNE_STEPS=5000
8
+ EVAL_PREFIX="norbench/norbenc_sentence_sentiment_bs16"
9
  MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
 
11
  CHECKPOINT_LIST=(
 
31
 
32
 
33
  GIN_LIST=(
34
+ "norbench_sentence_sentiment_bs16.gin"
35
+ "norbench_sentence_sentiment_bs16.gin"
36
+ "norbench_sentence_sentiment_bs16.gin"
37
+ "norbench_sentence_sentiment_bs16.gin"
38
+ "norbench_sentence_sentiment_bs16.gin"
39
+ "norbench_sentence_sentiment_bs16.gin"
 
40
  )
41
 
42
  START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
43
+ EXP_LIST=(1 2 3 4 5 6)
44
 
45
 
46
+ op through each experiment in EXP_LIST
47
+ for i in "${!EXP_LIST[@]}"; do
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
 
49
  TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
50
  GIN_FILE=${GIN_LIST[i]}
51
  MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
52
  LR=${LR_LIST[i]}
53
+ EXP=${EXP_LIST[i]}
54
+ MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP}_${NAME_LIST[i]}"
55
 
56
  command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
57
  echo "${command}"
58
  # Uncomment the next line to run the command:
59
+ # eval "${command}"
60
  done
 
 
 
 
 
 
 
finetune_mt5_32.gin β†’ norbench_sentence_sentiment_bs32.gin RENAMED
@@ -21,7 +21,7 @@ USE_CACHED_TASKS = False
21
  DROPOUT_RATE = 0.1
22
  RANDOM_SEED = 0
23
  BATCH_SIZE = 32
24
- EVAL_PERIOD = 316
25
 
26
  #Fixing a small error
27
  infer_eval/utils.DatasetConfig:
@@ -29,7 +29,7 @@ infer_eval/utils.DatasetConfig:
29
 
30
  #Saving every 500 steps
31
  utils.SaveCheckpointConfig:
32
- period = 316
33
  keep = 1 # number of checkpoints to keep
34
 
35
 
 
21
  DROPOUT_RATE = 0.1
22
  RANDOM_SEED = 0
23
  BATCH_SIZE = 32
24
+ EVAL_PERIOD = 124
25
 
26
  #Fixing a small error
27
  infer_eval/utils.DatasetConfig:
 
29
 
30
  #Saving every 500 steps
31
  utils.SaveCheckpointConfig:
32
+ period = 124
33
  keep = 1 # number of checkpoints to keep
34
 
35
 
batch_lrtest_sentence_sentiment_bs128.sh β†’ norbench_sentence_sentiment_bs32.sh RENAMED
@@ -4,8 +4,8 @@ export PYTHONPATH=${PROJECT_DIR}
4
  echo "PROJECT_DIR is set to: ${PROJECT_DIR}"
5
 
6
 
7
- FINETUNE_STEPS=10000
8
- EVAL_PREFIX="norbench/evalB_lr_semtiment_bs128"
9
  MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
 
11
  CHECKPOINT_LIST=(
@@ -31,51 +31,30 @@ LR_LIST=(0.0001 0.0002 0.0004 0.0006 0.0008 0.001)
31
 
32
 
33
  GIN_LIST=(
34
- "finetune_mt5_128.gin"
35
- "finetune_mt5_128.gin"
36
- "finetune_mt5_128.gin"
37
- "finetune_mt5_128.gin"
38
- "finetune_mt5_128.gin"
39
- "finetune_mt5_128.gin"
40
- "finetune_mt5_128.gin"
41
  )
42
 
43
  START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
44
- EXP_LIST=(913 914 915 916 917 918)
45
 
46
 
47
- VERSION_LIST=("1")
48
-
49
- index=$(($1 + 0))
50
-
51
- if [ $index -lt 1 ] || [ $index -gt ${#CHECKPOINT_LIST[@]} ] || [ $# -ne 1 ]; then
52
- echo "Error: You need to provide the number of the checkpoints below as a parameter."
53
- for i in "${!CHECKPOINT_LIST[@]}"; do
54
- echo "$((i+1)). ${CHECKPOINT_LIST[i]}"
55
- done
56
-
57
- exit 1
58
- fi
59
-
60
- for v in "${VERSION_LIST[@]}"; do
61
- i=($index-1)
62
  INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
63
-
64
  TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
65
  GIN_FILE=${GIN_LIST[i]}
66
  MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
67
  LR=${LR_LIST[i]}
68
- MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP_LIST[i]}_${NAME_LIST[i]}_v${v}"
 
69
 
70
  command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
71
  echo "${command}"
72
  # Uncomment the next line to run the command:
73
- eval "${command}"
74
  done
75
-
76
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v1\" &&
77
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v2\" &&
78
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v3\" &&
79
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v4\" &&
80
- #python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_base.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_NCC_base_v5\"
81
-
 
4
  echo "PROJECT_DIR is set to: ${PROJECT_DIR}"
5
 
6
 
7
+ FINETUNE_STEPS=5000
8
+ EVAL_PREFIX="norbench/norbenc_sentence_sentiment_bs32"
9
  MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
 
11
  CHECKPOINT_LIST=(
 
31
 
32
 
33
  GIN_LIST=(
34
+ "norbench_sentence_sentiment_bs32.gin"
35
+ "norbench_sentence_sentiment_bs32.gin"
36
+ "norbench_sentence_sentiment_bs32.gin"
37
+ "norbench_sentence_sentiment_bs32.gin"
38
+ "norbench_sentence_sentiment_bs32.gin"
39
+ "norbench_sentence_sentiment_bs32.gin"
 
40
  )
41
 
42
  START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
43
+ EXP_LIST=(1 2 3 4 5 6)
44
 
45
 
46
+ op through each experiment in EXP_LIST
47
+ for i in "${!EXP_LIST[@]}"; do
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
 
49
  TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
50
  GIN_FILE=${GIN_LIST[i]}
51
  MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
52
  LR=${LR_LIST[i]}
53
+ EXP=${EXP_LIST[i]}
54
+ MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP}_${NAME_LIST[i]}"
55
 
56
  command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
57
  echo "${command}"
58
  # Uncomment the next line to run the command:
59
+ # eval "${command}"
60
  done
 
 
 
 
 
 
 
finetune_mt5_64.gin β†’ norbench_sentence_sentiment_bs64.gin RENAMED
@@ -21,7 +21,7 @@ USE_CACHED_TASKS = False
21
  DROPOUT_RATE = 0.1
22
  RANDOM_SEED = 0
23
  BATCH_SIZE = 64
24
- EVAL_PERIOD = 316
25
 
26
  #Fixing a small error
27
  infer_eval/utils.DatasetConfig:
@@ -29,7 +29,7 @@ infer_eval/utils.DatasetConfig:
29
 
30
  #Saving every 500 steps
31
  utils.SaveCheckpointConfig:
32
- period = 316
33
  keep = 1 # number of checkpoints to keep
34
 
35
 
 
21
  DROPOUT_RATE = 0.1
22
  RANDOM_SEED = 0
23
  BATCH_SIZE = 64
24
+ EVAL_PERIOD = 124
25
 
26
  #Fixing a small error
27
  infer_eval/utils.DatasetConfig:
 
29
 
30
  #Saving every 500 steps
31
  utils.SaveCheckpointConfig:
32
+ period = 124
33
  keep = 1 # number of checkpoints to keep
34
 
35
 
norbench_sentence_sentiment_bs64.sh ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ PROJECT_DIR=${HOME}"/models/t5-nynorsk-norbench"
3
+ export PYTHONPATH=${PROJECT_DIR}
4
+ echo "PROJECT_DIR is set to: ${PROJECT_DIR}"
5
+
6
+
7
+ FINETUNE_STEPS=5000
8
+ EVAL_PREFIX="norbench/norbenc_sentence_sentiment_bs64"
9
+ MODEL_BUCKET_DIR="gs://pere-north-t5x/finetuned/"
10
+
11
+ CHECKPOINT_LIST=(
12
+ "pere-north-t5x/pretrained_models/base/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
13
+ "pere-north-t5x/pretrained_models/base/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
14
+ "pere-north-t5x/pretrained_models/base/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
15
+ "pere-north-t5x/pretrained_models/base/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
16
+ "pere-north-t5x/pretrained_models/base/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
17
+ "pere-north-t5x/pretrained_models/base/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
18
+ )
19
+
20
+ NAME_LIST=(
21
+ "north_t5_base_NCC_LR_0_0001"
22
+ "north_t5_base_NCC_LR_0_0002"
23
+ "north_t5_base_NCC_LR_0_0004"
24
+ "north_t5_base_NCC_LR_0_0006"
25
+ "north_t5_base_NCC_LR_0_0008"
26
+ "north_t5_base_NCC_LR_0_001"
27
+ )
28
+
29
+ TASK_LIST=("sentence_sentiment" "sentence_sentiment" "sentence_sentiment" "sentence_sentiment" "sentence_sentiment" "sentence_sentiment")
30
+ LR_LIST=(0.0001 0.0002 0.0004 0.0006 0.0008 0.001)
31
+
32
+
33
+ GIN_LIST=(
34
+ "norbench_sentence_sentiment_bs64.gin"
35
+ "norbench_sentence_sentiment_bs64.gin"
36
+ "norbench_sentence_sentiment_bs64.gin"
37
+ "norbench_sentence_sentiment_bs64.gin"
38
+ "norbench_sentence_sentiment_bs64.gin"
39
+ "norbench_sentence_sentiment_bs64.gin"
40
+ )
41
+
42
+ START_LIST=(1500000 1500000 1500000 1500000 1500000 1500000)
43
+ EXP_LIST=(1 2 3 4 5 6)
44
+
45
+
46
+ op through each experiment in EXP_LIST
47
+ for i in "${!EXP_LIST[@]}"; do
48
+ INITIAL_CHECKPOINT_PATH="gs://${CHECKPOINT_LIST[i]}"
49
+ TRAIN_STEPS=$((START_LIST[i]+FINETUNE_STEPS))
50
+ GIN_FILE=${GIN_LIST[i]}
51
+ MIXTURE_OR_TASK_NAME=${TASK_LIST[i]}
52
+ LR=${LR_LIST[i]}
53
+ EXP=${EXP_LIST[i]}
54
+ MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}_exp${EXP}_${NAME_LIST[i]}"
55
+
56
+ command="python3 ../../t5x/t5x/train.py --gin_search_paths=\"./\" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin.LR=${LR} --gin_file=${GIN_FILE} --gin.INITIAL_CHECKPOINT_PATH=\\\"${INITIAL_CHECKPOINT_PATH}\\\" --gin.MIXTURE_OR_TASK_NAME=\\\"${MIXTURE_OR_TASK_NAME}\\\" --gin.MODEL_DIR=\\\"${MODEL_DIR}\\\""
57
+ echo "${command}"
58
+ # Uncomment the next line to run the command:
59
+ # eval "${command}"
60
+ done
tasks.py CHANGED
@@ -1,12 +1,12 @@
1
  # /home/perk/mymodel/categorisation-mt5x/tasks.py
2
 
3
-
4
  import functools
5
  import seqio
6
  import tensorflow_datasets as tfds
7
  from t5.evaluation import metrics
8
  from t5.data import preprocessors
9
  #import my_preprocessors
 
10
  import t5
11
  import tensorflow.compat.v1 as tf
12
 
@@ -193,7 +193,7 @@ seqio.TaskRegistry.add(
193
  categorise_preprocessor,
194
  seqio.preprocessors.tokenize_and_append_eos,
195
  ],
196
- metric_fns=[metrics.accuracy,metrics.mean_multiclass_f1],
197
  output_features=MT5_OUTPUT_FEATURES,
198
  )
199
 
 
1
  # /home/perk/mymodel/categorisation-mt5x/tasks.py
2
 
 
3
  import functools
4
  import seqio
5
  import tensorflow_datasets as tfds
6
  from t5.evaluation import metrics
7
  from t5.data import preprocessors
8
  #import my_preprocessors
9
+ import my_metrics
10
  import t5
11
  import tensorflow.compat.v1 as tf
12
 
 
193
  categorise_preprocessor,
194
  seqio.preprocessors.tokenize_and_append_eos,
195
  ],
196
+ metric_fns=[metrics.accuracy,my_metrics.f1_macro],
197
  output_features=MT5_OUTPUT_FEATURES,
198
  )
199