pere commited on
Commit
1eca025
•
1 Parent(s): 5c54fc2
batch_nynorsk_NCC_base.sh → batch_political_base.sh RENAMED
@@ -1,5 +1,5 @@
1
  #!/bin/bash
2
- PROJECT_DIR=${HOME}"/models/t5-nynorsk-evaluator"
3
  export PYTHONPATH=${PROJECT_DIR}
4
  #INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000\"
5
  #TRAIN_STEPS=1505000
@@ -7,7 +7,7 @@ export PYTHONPATH=${PROJECT_DIR}
7
 
8
  FINETUNE_STEPS=5000
9
  MODEL_BUCKET_DIR="gs://nb-t5x-us-central2/finetuned/"
10
- EVAL_PREFIX="ul2test/eval_nynorsk_"
11
  CHECKPOINT_BUCKET_DIR="gs://nb-t5x-us-central2/"
12
  CHECKPOINT_LIST=("exp1-t5-base-ul2-engvoc" "exp2-t5-base-ul2-scandvoc" "exp3-t5-base-span-engvoc" "exp4-t5-base-span-scandvoc" "exp5-t5-base-ul2-scandvoc-full" "exp6-t5-base-span-scandvoc-full" "exp7-t5-base-ul2-511-scandvoc" "exp8-t5-base-span-511-scandvoc" "exp9-t5-base-ul2-mt5voc" "exp10-t5-base-span-mt5voc")
13
 
@@ -32,11 +32,11 @@ for v in "${VERSION_LIST[@]}"; do
32
  INITIAL_CHECKPOINT_PATH="${CHECKPOINT_BUCKET_DIR}${CHECKPOINT_LIST[$((index-1))]}/checkpoint_${s}"
33
  TRAIN_STEPS=$((s+FINETUNE_STEPS))
34
  if [[ "$INITIAL_CHECKPOINT_PATH" == *"engvoc"* ]]; then
35
- GIN_FILE="finetune_translate_base.gin"
36
- MIXTURE_OR_TASK_NAME="translate_long"
37
  else
38
- GIN_FILE="finetune_translate_base_scand.gin"
39
- MIXTURE_OR_TASK_NAME="translate_long_scand"
40
  fi
41
 
42
  MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}v${v}_${CHECKPOINT_LIST[$((index-1))]}_${s}"
 
1
  #!/bin/bash
2
+ PROJECT_DIR=${HOME}"/models/t5-political-evaluator"
3
  export PYTHONPATH=${PROJECT_DIR}
4
  #INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_t5x_base/checkpoint_1500000\"
5
  #TRAIN_STEPS=1505000
 
7
 
8
  FINETUNE_STEPS=5000
9
  MODEL_BUCKET_DIR="gs://nb-t5x-us-central2/finetuned/"
10
+ EVAL_PREFIX="ul2test/eval_political_"
11
  CHECKPOINT_BUCKET_DIR="gs://nb-t5x-us-central2/"
12
  CHECKPOINT_LIST=("exp1-t5-base-ul2-engvoc" "exp2-t5-base-ul2-scandvoc" "exp3-t5-base-span-engvoc" "exp4-t5-base-span-scandvoc" "exp5-t5-base-ul2-scandvoc-full" "exp6-t5-base-span-scandvoc-full" "exp7-t5-base-ul2-511-scandvoc" "exp8-t5-base-span-511-scandvoc" "exp9-t5-base-ul2-mt5voc" "exp10-t5-base-span-mt5voc")
13
 
 
32
  INITIAL_CHECKPOINT_PATH="${CHECKPOINT_BUCKET_DIR}${CHECKPOINT_LIST[$((index-1))]}/checkpoint_${s}"
33
  TRAIN_STEPS=$((s+FINETUNE_STEPS))
34
  if [[ "$INITIAL_CHECKPOINT_PATH" == *"engvoc"* ]]; then
35
+ GIN_FILE="finetune_classification_base.gin"
36
+ MIXTURE_OR_TASK_NAME="parliament_max300"
37
  else
38
+ GIN_FILE="finetune_classification_base_scand.gin"
39
+ MIXTURE_OR_TASK_NAME="parliament_max300_scand"
40
  fi
41
 
42
  MODEL_DIR="${MODEL_BUCKET_DIR}${EVAL_PREFIX}v${v}_${CHECKPOINT_LIST[$((index-1))]}_${s}"
finetune_classification_base_scand.git → finetune_classification_base_scand.gin RENAMED
File without changes
finetune_translate_base_scand.gin DELETED
@@ -1,37 +0,0 @@
1
- from __gin__ import dynamic_registration
2
- import tasks
3
- import seqio
4
-
5
- import __main__ as train_script
6
- from t5.data import mixtures
7
- from t5x import models
8
- from t5x import partitioning
9
- from t5x import utils
10
-
11
- include "t5x/examples/t5/t5_1_1/base.gin"
12
- include "t5x/configs/runs/finetune.gin"
13
-
14
- MIXTURE_OR_TASK_NAME = %gin.REQUIRED
15
- TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 512}
16
- INITIAL_CHECKPOINT_PATH = %gin.REQUIRED
17
- TRAIN_STEPS = %gin.REQUIRED # 1000000 pre-trained steps + 10000 fine-tuning steps.
18
- USE_CACHED_TASKS = False
19
- DROPOUT_RATE = 0.1
20
- RANDOM_SEED = 0
21
-
22
- #Fixing a small error
23
- infer_eval/utils.DatasetConfig:
24
- task_feature_lengths = %TASK_FEATURE_LENGTHS
25
-
26
- #Saving every 1000 steps
27
- utils.SaveCheckpointConfig:
28
- period = 1000
29
- keep = 1 # number of checkpoints to keep
30
-
31
- # Might have to ba changed based on architecture
32
- # partitioning.PjitPartitioner.num_partitions = 1
33
-
34
- VOCABULARY = @seqio.SentencePieceVocabulary()
35
- seqio.SentencePieceVocabulary.sentencepiece_model_file = "gs://nb-t5/t5/vocabs/wikipedia/no-da-en-sv-nn-is_32000_unigram.sp.model"
36
- seqio.SentencePieceVocabulary.extra_ids = 100
37
-