test
Browse files
batch_nynorsk_NCC_base.sh → batch_political_base.sh
RENAMED
@@ -1,5 +1,5 @@
|
|
1 |
#!/bin/bash
|
2 |
-
PROJECT_DIR=${HOME}"/models/t5-
|
3 |
export PYTHONPATH=${PROJECT_DIR}
|
4 |
#INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000\"
|
5 |
#TRAIN_STEPS=1505000
|
@@ -7,7 +7,7 @@ export PYTHONPATH=${PROJECT_DIR}
|
|
7 |
|
8 |
FINETUNE_STEPS=5000
|
9 |
MODEL_BUCKET_DIR="gs://nb-t5x-us-central2/finetuned/"
|
10 |
-
EVAL_PREFIX="ul2test/
|
11 |
CHECKPOINT_BUCKET_DIR="gs://nb-t5x-us-central2/"
|
12 |
CHECKPOINT_LIST=("exp1-t5-base-ul2-engvoc" "exp2-t5-base-ul2-scandvoc" "exp3-t5-base-span-engvoc" "exp4-t5-base-span-scandvoc" "exp5-t5-base-ul2-scandvoc-full" "exp6-t5-base-span-scandvoc-full" "exp7-t5-base-ul2-511-scandvoc" "exp8-t5-base-span-511-scandvoc" "exp9-t5-base-ul2-mt5voc" "exp10-t5-base-span-mt5voc")
|
13 |
|
@@ -32,11 +32,11 @@ for v in "${VERSION_LIST[@]}"; do
|
|
32 |
INITIAL_CHECKPOINT_PATH="${CHECKPOINT_BUCKET_DIR}${CHECKPOINT_LIST[$((index-1))]}/checkpoint_${s}"
|
33 |
TRAIN_STEPS=$((s+FINETUNE_STEPS))
|
34 |
if [[ "$INITIAL_CHECKPOINT_PATH" == *"engvoc"* ]]; then
|
35 |
-
GIN_FILE="
|
36 |
-
MIXTURE_OR_TASK_NAME="
|
37 |
else
|
38 |
-
GIN_FILE="
|
39 |
-
MIXTURE_OR_TASK_NAME="
|
40 |
fi
|
41 |
|
42 |
MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}v${v}_${CHECKPOINT_LIST[$((index-1))]}_${s}"
|
|
|
1 |
#!/bin/bash
|
2 |
+
PROJECT_DIR=${HOME}"/models/t5-political-evaluator"
|
3 |
export PYTHONPATH=${PROJECT_DIR}
|
4 |
#INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000\"
|
5 |
#TRAIN_STEPS=1505000
|
|
|
7 |
|
8 |
FINETUNE_STEPS=5000
|
9 |
MODEL_BUCKET_DIR="gs://nb-t5x-us-central2/finetuned/"
|
10 |
+
EVAL_PREFIX="ul2test/eval_political_"
|
11 |
CHECKPOINT_BUCKET_DIR="gs://nb-t5x-us-central2/"
|
12 |
CHECKPOINT_LIST=("exp1-t5-base-ul2-engvoc" "exp2-t5-base-ul2-scandvoc" "exp3-t5-base-span-engvoc" "exp4-t5-base-span-scandvoc" "exp5-t5-base-ul2-scandvoc-full" "exp6-t5-base-span-scandvoc-full" "exp7-t5-base-ul2-511-scandvoc" "exp8-t5-base-span-511-scandvoc" "exp9-t5-base-ul2-mt5voc" "exp10-t5-base-span-mt5voc")
|
13 |
|
|
|
32 |
INITIAL_CHECKPOINT_PATH="${CHECKPOINT_BUCKET_DIR}${CHECKPOINT_LIST[$((index-1))]}/checkpoint_${s}"
|
33 |
TRAIN_STEPS=$((s+FINETUNE_STEPS))
|
34 |
if [[ "$INITIAL_CHECKPOINT_PATH" == *"engvoc"* ]]; then
|
35 |
+
GIN_FILE="finetune_classification_base.gin"
|
36 |
+
MIXTURE_OR_TASK_NAME="parliament_max300"
|
37 |
else
|
38 |
+
GIN_FILE="finetune_classification_base_scand.gin"
|
39 |
+
MIXTURE_OR_TASK_NAME="parliament_max300_scand"
|
40 |
fi
|
41 |
|
42 |
MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}v${v}_${CHECKPOINT_LIST[$((index-1))]}_${s}"
|
finetune_classification_base_scand.git → finetune_classification_base_scand.gin
RENAMED
File without changes
|
finetune_translate_base_scand.gin
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
from __gin__ import dynamic_registration
|
2 |
-
import tasks
|
3 |
-
import seqio
|
4 |
-
|
5 |
-
import __main__ as train_script
|
6 |
-
from t5.data import mixtures
|
7 |
-
from t5x import models
|
8 |
-
from t5x import partitioning
|
9 |
-
from t5x import utils
|
10 |
-
|
11 |
-
include "t5x/examples/t5/t5_1_1/base.gin"
|
12 |
-
include "t5x/configs/runs/finetune.gin"
|
13 |
-
|
14 |
-
MIXTURE_OR_TASK_NAME = %gin.REQUIRED
|
15 |
-
TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 512}
|
16 |
-
INITIAL_CHECKPOINT_PATH = %gin.REQUIRED
|
17 |
-
TRAIN_STEPS = %gin.REQUIRED # 1000000 pre-trained steps + 10000 fine-tuning steps.
|
18 |
-
USE_CACHED_TASKS = False
|
19 |
-
DROPOUT_RATE = 0.1
|
20 |
-
RANDOM_SEED = 0
|
21 |
-
|
22 |
-
#Fixing a small error
|
23 |
-
infer_eval/utils.DatasetConfig:
|
24 |
-
task_feature_lengths = %TASK_FEATURE_LENGTHS
|
25 |
-
|
26 |
-
#Saving every 1000 steps
|
27 |
-
utils.SaveCheckpointConfig:
|
28 |
-
period = 1000
|
29 |
-
keep = 1 # number of checkpoints to keep
|
30 |
-
|
31 |
-
# Might have to ba changed based on architecture
|
32 |
-
# partitioning.PjitPartitioner.num_partitions = 1
|
33 |
-
|
34 |
-
VOCABULARY = @seqio.SentencePieceVocabulary()
|
35 |
-
seqio.SentencePieceVocabulary.sentencepiece_model_file = "gs://nb-t5/t5/vocabs/wikipedia/no-da-en-sv-nn-is_32000_unigram.sp.model"
|
36 |
-
seqio.SentencePieceVocabulary.extra_ids = 100
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|