updated batch run
Browse files- eval_base.sh +2 -2
- eval_categorisation_base.gin +1 -1
- finetune_base.sh +5 -6
- finetune_batch_base.sh +10 -0
- finetune_categorisation_base.gin +3 -3
- log/config.gin +2 -2
- log/eval_results_t1v-n-b482f629-w-0.jsonl +3 -0
eval_base.sh
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
#PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
|
2 |
#T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
-
CHECKPOINT_PATH="gs://nb-t5x/
|
4 |
#export PYTHONPATH=${PROJECT_DIR}
|
5 |
|
6 |
python3 eval.py \
|
7 |
--gin_search_paths="./" \
|
8 |
--gin_file="eval_categorisation_base.gin" \
|
9 |
--gin.SPLIT=\"validation\" \
|
10 |
-
--gin.CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/finetuned/
|
11 |
|
12 |
#"gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000" \
|
13 |
#--gin.SPLIT="validation" \
|
|
|
1 |
#PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
|
2 |
#T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
+
#CHECKPOINT_PATH="gs://nb-t5x-us-central2/finetuned/norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000"
|
4 |
#export PYTHONPATH=${PROJECT_DIR}
|
5 |
|
6 |
python3 eval.py \
|
7 |
--gin_search_paths="./" \
|
8 |
--gin_file="eval_categorisation_base.gin" \
|
9 |
--gin.SPLIT=\"validation\" \
|
10 |
+
--gin.CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000\" \
|
11 |
|
12 |
#"gs://nb-t5x/eval_norwegian_NCC_2_000_000/checkpoint_2005000" \
|
13 |
#--gin.SPLIT="validation" \
|
eval_categorisation_base.gin
CHANGED
@@ -12,7 +12,7 @@ CHECKPOINT_PATH = %gin.REQUIRED # passed via commandline
|
|
12 |
SPLIT = %gin.REQUIRED # passed via commandline
|
13 |
EVAL_OUTPUT_DIR = "./log/"
|
14 |
DROPOUT_RATE = 0.0 # unused boilerplate
|
15 |
-
MIXTURE_OR_TASK_NAME = "
|
16 |
|
17 |
eval_script.evaluate:
|
18 |
model = %MODEL # imported from separate gin file
|
|
|
12 |
SPLIT = %gin.REQUIRED # passed via commandline
|
13 |
EVAL_OUTPUT_DIR = "./log/"
|
14 |
DROPOUT_RATE = 0.0 # unused boilerplate
|
15 |
+
MIXTURE_OR_TASK_NAME = "parliament"
|
16 |
|
17 |
eval_script.evaluate:
|
18 |
model = %MODEL # imported from separate gin file
|
finetune_base.sh
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
|
2 |
-
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
-
#Needs to be updated when moving to tpu-v4 it should then be in another zone
|
4 |
-
MODEL_DIR="gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_sentiment"
|
5 |
export PYTHONPATH=${PROJECT_DIR}
|
6 |
|
7 |
-
python3
|
8 |
-
--gin_search_paths
|
9 |
--gin_file="finetune_categorisation_base.gin" \
|
10 |
-
--gin.
|
|
|
|
|
11 |
|
|
|
1 |
PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
|
|
|
|
|
|
|
2 |
export PYTHONPATH=${PROJECT_DIR}
|
3 |
|
4 |
+
python3 ../../t5x/t5x/train.py \
|
5 |
+
--gin_search_paths="./" \
|
6 |
--gin_file="finetune_categorisation_base.gin" \
|
7 |
+
--gin.MIXTURE_OR_TASK_NAME=\"parliament\" \
|
8 |
+
--gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v1_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" \
|
9 |
+
|
10 |
|
finetune_batch_base.sh
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROJECT_DIR=${HOME}"/models/t5-parliament-categorisation"
|
2 |
+
export PYTHONPATH=${PROJECT_DIR}
|
3 |
+
|
4 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v1_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
|
5 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v2_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
|
6 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v3_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
|
7 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v4_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
|
8 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin_file="finetune_categorisation_base.gin" --gin.MIXTURE_OR_TASK_NAME=\"parliament\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/v5_norwegian_NCC_plus_English_t5x_base_1_500_000_parliament\" &&
|
9 |
+
|
10 |
+
|
finetune_categorisation_base.gin
CHANGED
@@ -10,8 +10,9 @@ from t5x import utils
|
|
10 |
include "t5x/examples/t5/mt5/base.gin"
|
11 |
include "t5x/configs/runs/finetune.gin"
|
12 |
|
13 |
-
MIXTURE_OR_TASK_NAME =
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
|
|
|
15 |
TRAIN_STEPS = 1_510_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.1
|
@@ -25,7 +26,6 @@ infer_eval/utils.DatasetConfig:
|
|
25 |
utils.SaveCheckpointConfig:
|
26 |
period = 1000
|
27 |
|
28 |
-
INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
|
29 |
|
30 |
# Pere: Only necessary if we load a t5 model. We can start with an t5x model here
|
31 |
# `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
|
@@ -34,6 +34,6 @@ INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5
|
|
34 |
# `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
|
35 |
# LOSS_NORMALIZING_FACTOR = 234496
|
36 |
|
37 |
-
# Might have to ba
|
38 |
# partitioning.PjitPartitioner.num_partitions = 1
|
39 |
|
|
|
10 |
include "t5x/examples/t5/mt5/base.gin"
|
11 |
include "t5x/configs/runs/finetune.gin"
|
12 |
|
13 |
+
MIXTURE_OR_TASK_NAME = %gin.REQUIRED
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 2}
|
15 |
+
INITIAL_CHECKPOINT_PATH = "gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000"
|
16 |
TRAIN_STEPS = 1_510_000 # 1000000 pre-trained steps + 10000 fine-tuning steps.
|
17 |
USE_CACHED_TASKS = False
|
18 |
DROPOUT_RATE = 0.1
|
|
|
26 |
utils.SaveCheckpointConfig:
|
27 |
period = 1000
|
28 |
|
|
|
29 |
|
30 |
# Pere: Only necessary if we load a t5 model. We can start with an t5x model here
|
31 |
# `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained
|
|
|
34 |
# `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`.
|
35 |
# LOSS_NORMALIZING_FACTOR = 234496
|
36 |
|
37 |
+
# Might have to ba changed based on architecture
|
38 |
# partitioning.PjitPartitioner.num_partitions = 1
|
39 |
|
log/config.gin
CHANGED
@@ -12,12 +12,12 @@ import tasks
|
|
12 |
# Macros:
|
13 |
# ==============================================================================
|
14 |
CHECKPOINT_PATH = \
|
15 |
-
'gs://nb-t5x-us-central2/finetuned/
|
16 |
DROPOUT_RATE = 0.0
|
17 |
EVAL_OUTPUT_DIR = './log/'
|
18 |
LABEL_SMOOTHING = 0.0
|
19 |
LOSS_NORMALIZING_FACTOR = None
|
20 |
-
MIXTURE_OR_TASK_NAME = '
|
21 |
MODEL = @models.EncoderDecoderModel()
|
22 |
OPTIMIZER = @adafactor.Adafactor()
|
23 |
SPLIT = 'validation'
|
|
|
12 |
# Macros:
|
13 |
# ==============================================================================
|
14 |
CHECKPOINT_PATH = \
|
15 |
+
'gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000'
|
16 |
DROPOUT_RATE = 0.0
|
17 |
EVAL_OUTPUT_DIR = './log/'
|
18 |
LABEL_SMOOTHING = 0.0
|
19 |
LOSS_NORMALIZING_FACTOR = None
|
20 |
+
MIXTURE_OR_TASK_NAME = 'parliament'
|
21 |
MODEL = @models.EncoderDecoderModel()
|
22 |
OPTIMIZER = @adafactor.Adafactor()
|
23 |
SPLIT = 'validation'
|
log/eval_results_t1v-n-b482f629-w-0.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{"model": "gs://nb-t5x-us-central2/finetuned/test_dd_norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 15:25:26", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 85.58333333333333, "f1_macro": 85.58332332175232}}
|
2 |
+
{"model": "gs://nb-t5x-us-central2/finetuned/test_dd_norwegian_NCC_pluss_english_1_500_000/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 15:34:57", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 85.58333333333333, "f1_macro": 85.58332332175232}}
|
3 |
+
{"model": "gs://nb-t5x-us-central2/finetuned/norwegian_NCC_plus_English_t5x_base_1_500_000_parliament/checkpoint_1510000", "task": "parliament", "eval_date": "12-04-2022 16:04:05", "split": "validation", "feature_length": {"inputs": 512, "targets": 2}, "eval_batch_size": 16, "result": {"accuracy": 82.0, "f1_macro": 81.98193188169247}}
|