pere
/

t5-parliament-categorisation

Model card Files Files and versions Community

pere commited on Apr 12, 2022

Commit

08a2592

•

1 Parent(s): 2b404b0

updated batch run

Browse files

Files changed (7) hide show

eval_base.sh +2 -2
eval_categorisation_base.gin +1 -1
finetune_base.sh +5 -6
finetune_batch_base.sh +10 -0
finetune_categorisation_base.gin +3 -3
log/config.gin +2 -2
log/eval_results_t1v-n-b482f629-w-0.jsonl +3 -0

eval_base.sh CHANGED Viewed

@@ -1,13 +1,13 @@
 #PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
 #T5X_DIR="../../t5x"  # directory where the t5x is cloned.
-CHECKPOINT_PATH="gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000"
 #export PYTHONPATH=${PROJECT_DIR}
 python3 eval.py \
   --gin_search_paths="./" \
   --gin_file="eval_categorisation_base.gin" \
   --gin.SPLIT=\"validation\" \
-  --gin.CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1505000\" \
 #"gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000" \
 #--gin.SPLIT="validation" \

 #PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
 #T5X_DIR="../../t5x"  # directory where the t5x is cloned.
+#CHECKPOINT_PATH="gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000"
 #export PYTHONPATH=${PROJECT_DIR}
 python3 eval.py \
   --gin_search_paths="./" \
   --gin_file="eval_categorisation_base.gin" \
   --gin.SPLIT=\"validation\" \
+  --gin.CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000\" \
 #"gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000" \
 #--gin.SPLIT="validation" \

eval_categorisation_base.gin CHANGED Viewed

@@ -12,7 +12,7 @@ CHECKPOINT_PATH = %gin.REQUIRED  # passed via commandline
 SPLIT = %gin.REQUIRED  # passed via commandline
 EVAL_OUTPUT_DIR = "./log/"
 DROPOUT_RATE = 0.0  # unused boilerplate
-MIXTURE_OR_TASK_NAME = "categorise"
 eval_script.evaluate:
   model = %MODEL  # imported from separate gin file

 SPLIT = %gin.REQUIRED  # passed via commandline
 EVAL_OUTPUT_DIR = "./log/"
 DROPOUT_RATE = 0.0  # unused boilerplate
+MIXTURE_OR_TASK_NAME = "parliament"
 eval_script.evaluate:
   model = %MODEL  # imported from separate gin file

finetune_base.sh CHANGED Viewed

@@ -1,11 +1,10 @@
 PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
-T5X_DIR="../../t5x"  # directory where the t5x is cloned.
-#Needs to be updated when moving to tpu-v4  it should then be in another zone
-MODEL_DIR="gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_sentiment"
 export PYTHONPATH=${PROJECT_DIR}
-python3 ${T5X_DIR}/t5x/train.py \
-  --gin_search_paths=${PROJECT_DIR} \
   --gin_file="finetune_categorisation_base.gin" \
-  --gin.MODEL_DIR="'${MODEL_DIR}'"

 PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
 export PYTHONPATH=${PROJECT_DIR}
+python3 ../../t5x/t5x/train.py \
+  --gin_search_paths="./" \
   --gin_file="finetune_categorisation_base.gin" \
+  --gin.MIXTURE_OR_TASK_NAME=\"parliament\" \
+  --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v1_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" \

finetune_batch_base.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
+export PYTHONPATH=${PROJECT_DIR}
+python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v1_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
+python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v2_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
+python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v3_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
+python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v4_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
+python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v5_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&

finetune_categorisation_base.gin CHANGED Viewed

@@ -10,8 +10,9 @@ from t5x import utils
 include "t5x/examples/t5/mt5/base.gin"
 include "t5x/configs/runs/finetune.gin"
-MIXTURE_OR_TASK_NAME = "sentiment"
 TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
 TRAIN_STEPS = 1_510_000  # 1000000 pre-trained steps + 10000 fine-tuning steps.
 USE_CACHED_TASKS = False
 DROPOUT_RATE = 0.1
@@ -25,7 +26,6 @@ infer_eval/utils.DatasetConfig:
 utils.SaveCheckpointConfig:
   period = 1000
-INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
 # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
 # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
@@ -34,6 +34,6 @@ INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5
 # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
 # LOSS_NORMALIZING_FACTOR = 234496
-# Might have to ba chaned based on architecture
 # partitioning.PjitPartitioner.num_partitions = 1

 include "t5x/examples/t5/mt5/base.gin"
 include "t5x/configs/runs/finetune.gin"
+MIXTURE_OR_TASK_NAME = %gin.REQUIRED
 TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
+INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
 TRAIN_STEPS = 1_510_000  # 1000000 pre-trained steps + 10000 fine-tuning steps.
 USE_CACHED_TASKS = False
 DROPOUT_RATE = 0.1
 utils.SaveCheckpointConfig:
   period = 1000
 # Pere: Only necessary if we load a t5 model. We can start with an t5x model here
 # `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
 # `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
 # LOSS_NORMALIZING_FACTOR = 234496
+# Might have to ba changed based on architecture
 # partitioning.PjitPartitioner.num_partitions = 1

log/config.gin CHANGED Viewed

@@ -12,12 +12,12 @@ import tasks
 # Macros:
 # ==============================================================================
 CHECKPOINT_PATH = \
-    'gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1505000'
 DROPOUT_RATE = 0.0
 EVAL_OUTPUT_DIR = './log/'
 LABEL_SMOOTHING = 0.0
 LOSS_NORMALIZING_FACTOR = None
-MIXTURE_OR_TASK_NAME = 'categorise'
 MODEL = @models.EncoderDecoderModel()
 OPTIMIZER = @adafactor.Adafactor()
 SPLIT = 'validation'

 # Macros:
 # ==============================================================================
 CHECKPOINT_PATH = \
+    'gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000'
 DROPOUT_RATE = 0.0
 EVAL_OUTPUT_DIR = './log/'
 LABEL_SMOOTHING = 0.0
 LOSS_NORMALIZING_FACTOR = None
+MIXTURE_OR_TASK_NAME = 'parliament'
 MODEL = @models.EncoderDecoderModel()
 OPTIMIZER = @adafactor.Adafactor()
 SPLIT = 'validation'

log/eval_results_t1v-n-b482f629-w-0.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+{"model": "gs://nb-t5x-us-central2/finetuned/test_dd_norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 15:25:26", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 85.58333333333333, "f1_macro": 85.58332332175232}}
+{"model": "gs://nb-t5x-us-central2/finetuned/test_dd_norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 15:34:57", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 85.58333333333333, "f1_macro": 85.58332332175232}}
+{"model": "gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 16:04:05", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 82.0, "f1_macro": 81.98193188169247}}