pere commited on
Commit
08a2592
1 Parent(s): 2b404b0

updated batch run

Browse files
eval_base.sh CHANGED
@@ -1,13 +1,13 @@
1
  #PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
  #T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
- CHECKPOINT_PATH="gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000"
4
  #export PYTHONPATH=${PROJECT_DIR}
5
 
6
  python3 eval.py \
7
  --gin_search_paths="./" \
8
  --gin_file="eval_categorisation_base.gin" \
9
  --gin.SPLIT=\"validation\" \
10
- --gin.CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1505000\" \
11
 
12
  #"gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000" \
13
  #--gin.SPLIT="validation" \
 
1
  #PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
  #T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
+ #CHECKPOINT_PATH="gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000"
4
  #export PYTHONPATH=${PROJECT_DIR}
5
 
6
  python3 eval.py \
7
  --gin_search_paths="./" \
8
  --gin_file="eval_categorisation_base.gin" \
9
  --gin.SPLIT=\"validation\" \
10
+ --gin.CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000\" \
11
 
12
  #"gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000" \
13
  #--gin.SPLIT="validation" \
eval_categorisation_base.gin CHANGED
@@ -12,7 +12,7 @@ CHECKPOINT_PATH = %gin.REQUIRED # passed via commandline
12
  SPLIT = %gin.REQUIRED # passed via commandline
13
  EVAL_OUTPUT_DIR = "./log/"
14
  DROPOUT_RATE = 0.0 # unused boilerplate
15
- MIXTURE_OR_TASK_NAME = "categorise"
16
 
17
  eval_script.evaluate:
18
  model = %MODEL # imported from separate gin file
 
12
  SPLIT = %gin.REQUIRED # passed via commandline
13
  EVAL_OUTPUT_DIR = "./log/"
14
  DROPOUT_RATE = 0.0 # unused boilerplate
15
+ MIXTURE_OR_TASK_NAME = "parliament"
16
 
17
  eval_script.evaluate:
18
  model = %MODEL # imported from separate gin file
finetune_base.sh CHANGED
@@ -1,11 +1,10 @@
1
  PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
- T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
- #Needs to be updated when moving to tpu-v4 it should then be in another zone
4
- MODEL_DIR="gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_sentiment"
5
  export PYTHONPATH=${PROJECT_DIR}
6
 
7
- python3 ${T5X_DIR}/t5x/train.py \
8
- --gin_search_paths=${PROJECT_DIR} \
9
  --gin_file="finetune_categorisation_base.gin" \
10
- --gin.MODEL_DIR="'${MODEL_DIR}'"
 
 
11
 
 
1
  PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
 
 
 
2
  export PYTHONPATH=${PROJECT_DIR}
3
 
4
+ python3 ../../t5x/t5x/train.py \
5
+ --gin_search_paths="./" \
6
  --gin_file="finetune_categorisation_base.gin" \
7
+ --gin.MIXTURE_OR_TASK_NAME=\"parliament\" \
8
+ --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v1_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" \
9
+
10
 
finetune_batch_base.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+
4
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v1_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
5
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v2_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v3_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v4_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v5_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
9
+
10
+
finetune_categorisation_base.gin CHANGED
@@ -10,8 +10,9 @@ from t5x import utils
10
  include "t5x/examples/t5/mt5/base.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
- MIXTURE_OR_TASK_NAME = "sentiment"
14
  TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
 
15
  TRAIN_STEPS = 1_510_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.1
@@ -25,7 +26,6 @@ infer_eval/utils.DatasetConfig:
25
  utils.SaveCheckpointConfig:
26
  period = 1000
27
 
28
- INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
29
 
30
  # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
31
  # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
@@ -34,6 +34,6 @@ INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5
34
  # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
35
  # LOSS_NORMALIZING_FACTOR = 234496
36
 
37
- # Might have to ba chaned based on architecture
38
  # partitioning.PjitPartitioner.num_partitions = 1
39
 
 
10
  include "t5x/examples/t5/mt5/base.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
+ MIXTURE_OR_TASK_NAME = %gin.REQUIRED
14
  TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
15
+ INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
16
  TRAIN_STEPS = 1_510_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
17
  USE_CACHED_TASKS = False
18
  DROPOUT_RATE = 0.1
 
26
  utils.SaveCheckpointConfig:
27
  period = 1000
28
 
 
29
 
30
  # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
31
  # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
 
34
  # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
35
  # LOSS_NORMALIZING_FACTOR = 234496
36
 
37
+ # Might have to ba changed based on architecture
38
  # partitioning.PjitPartitioner.num_partitions = 1
39
 
log/config.gin CHANGED
@@ -12,12 +12,12 @@ import tasks
12
  # Macros:
13
  # ==============================================================================
14
  CHECKPOINT_PATH = \
15
- 'gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1505000'
16
  DROPOUT_RATE = 0.0
17
  EVAL_OUTPUT_DIR = './log/'
18
  LABEL_SMOOTHING = 0.0
19
  LOSS_NORMALIZING_FACTOR = None
20
- MIXTURE_OR_TASK_NAME = 'categorise'
21
  MODEL = @models.EncoderDecoderModel()
22
  OPTIMIZER = @adafactor.Adafactor()
23
  SPLIT = 'validation'
 
12
  # Macros:
13
  # ==============================================================================
14
  CHECKPOINT_PATH = \
15
+ 'gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000'
16
  DROPOUT_RATE = 0.0
17
  EVAL_OUTPUT_DIR = './log/'
18
  LABEL_SMOOTHING = 0.0
19
  LOSS_NORMALIZING_FACTOR = None
20
+ MIXTURE_OR_TASK_NAME = 'parliament'
21
  MODEL = @models.EncoderDecoderModel()
22
  OPTIMIZER = @adafactor.Adafactor()
23
  SPLIT = 'validation'
log/eval_results_t1v-n-b482f629-w-0.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"model": "gs://nb-t5x-us-central2/finetuned/test_dd_norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 15:25:26", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 85.58333333333333, "f1_macro": 85.58332332175232}}
2
+ {"model": "gs://nb-t5x-us-central2/finetuned/test_dd_norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 15:34:57", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 85.58333333333333, "f1_macro": 85.58332332175232}}
3
+ {"model": "gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 16:04:05", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 82.0, "f1_macro": 81.98193188169247}}