pere commited on
Commit
819dece
1 Parent(s): cfa2632

updated training tasks for big corpus

Browse files
__pycache__/tasks_v4.cpython-38.pyc CHANGED
Binary files a/__pycache__/tasks_v4.cpython-38.pyc and b/__pycache__/tasks_v4.cpython-38.pyc differ
 
finetune_large_mt5_sentencefix_v4_16.gin CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_300_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
tasks.py CHANGED
@@ -9,9 +9,9 @@ import t5
9
  import tensorflow.compat.v1 as tf
10
 
11
  tsv_path = {
12
- "train": "gs://nb-t5x/corpus/train/train.tsv",
13
- "validation": "gs://nb-t5x/corpus/eval/eval.tsv",
14
- "test": "gs://nb-t5x/corpus/test/test.tsv"
15
  }
16
 
17
  vocabulary = seqio.SentencePieceVocabulary(
 
9
  import tensorflow.compat.v1 as tf
10
 
11
  tsv_path = {
12
+ "train": "gs://nb-t5x-us-central2/corpus_big/train.tsv",
13
+ "validation": "gs://nb-t5x-us-central2/corpus_big/eval.tsv",
14
+ "test": "gs://nb-t5x-us-central2/corpus_big/test.tsv"
15
  }
16
 
17
  vocabulary = seqio.SentencePieceVocabulary(
train_large_v4_16.sh CHANGED
@@ -1,6 +1,6 @@
1
  PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
  T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
- MODEL_DIR="gs://nb-t5x-us-central2/model_mT5X_large_16_d"
4
  export PYTHONPATH=${PROJECT_DIR}
5
 
6
  python3 ${T5X_DIR}/t5x/train.py \
 
1
  PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
  T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
+ MODEL_DIR="gs://nb-t5x-us-central2/model_mT5X_large_16_e"
4
  export PYTHONPATH=${PROJECT_DIR}
5
 
6
  python3 ${T5X_DIR}/t5x/train.py \