pere commited on
Commit
016a37d
1 Parent(s): 055d439

multiple fixes

Browse files
__pycache__/tasks_v4.cpython-38.pyc CHANGED
Binary files a/__pycache__/tasks_v4.cpython-38.pyc and b/__pycache__/tasks_v4.cpython-38.pyc differ
 
finetune_large_mt5_sentencefix.gin CHANGED
@@ -7,12 +7,12 @@ from t5x import models
7
  from t5x import partitioning
8
  from t5x import utils
9
 
10
- include "t5x/examples/t5/mt5/small.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_100_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
7
  from t5x import partitioning
8
  from t5x import utils
9
 
10
+ include "t5x/examples/t5/mt5/large.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
finetune_large_mt5_sentencefix_v4.gin CHANGED
@@ -7,12 +7,12 @@ from t5x import models
7
  from t5x import partitioning
8
  from t5x import utils
9
 
10
- include "t5x/examples/t5/mt5/small.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_100_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
7
  from t5x import partitioning
8
  from t5x import utils
9
 
10
+ include "t5x/examples/t5/mt5/large.gin"
11
  include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
finetune_mt5_sentencefix.gin CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_100_050 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
finetune_mt5_sentencefix_v4.gin CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_100_050 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
finetune_small_mt5_sentencefix.gin CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_100_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
finetune_small_mt5_sentencefix_v4.gin CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
- TRAIN_STEPS = 1_100_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
 
12
 
13
  MIXTURE_OR_TASK_NAME = "sentencefix"
14
  TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
15
+ TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 200000 fine-tuning steps.
16
  USE_CACHED_TASKS = False
17
  DROPOUT_RATE = 0.0
18
  RANDOM_SEED = 0
tasks_v4.py CHANGED
@@ -59,7 +59,7 @@ seqio.TaskRegistry.add(
59
  sentencefix_preprocessor,
60
  seqio.preprocessors.tokenize_and_append_eos,
61
  ],
62
- #metric_fns=[t5.evaluation.metrics.bleu],
63
  output_features=DEFAULT_OUTPUT_FEATURES,
64
  )
65
 
 
59
  sentencefix_preprocessor,
60
  seqio.preprocessors.tokenize_and_append_eos,
61
  ],
62
+ metric_fns=[metrics.bleu],
63
  output_features=DEFAULT_OUTPUT_FEATURES,
64
  )
65
 
train_large_v4.sh CHANGED
@@ -1,6 +1,5 @@
1
  PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
  T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
- TFDS_DATA_DIR="gs://nb-t5x-us-central2/corpus_multi_sentencefix_mt5"
4
  MODEL_DIR="gs://nb-t5x-us-central2/large_model_multi_sentencefix_mt5"
5
  export PYTHONPATH=${PROJECT_DIR}
6
 
 
1
  PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
  T5X_DIR="../../t5x" # directory where the t5x is cloned.
 
3
  MODEL_DIR="gs://nb-t5x-us-central2/large_model_multi_sentencefix_mt5"
4
  export PYTHONPATH=${PROJECT_DIR}
5
 
train_small_v4.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
+ T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
+ MODEL_DIR="gs://nb-t5x-us-central2/small_model_multi_sentencefix_mt5"
4
+ export PYTHONPATH=${PROJECT_DIR}
5
+
6
+ python3 ${T5X_DIR}/t5x/train.py \
7
+ --gin_search_paths=${PROJECT_DIR} \
8
+ --gin_file="finetune_small_mt5_sentencefix_v4.gin" \
9
+ --gin.MODEL_DIR="'${MODEL_DIR}'" \
10
+ --tfds_data_dir=${TFDS_DATA_DIR}
11
+
train_v4.sh CHANGED
@@ -1,6 +1,5 @@
1
  PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
  T5X_DIR="../../t5x" # directory where the t5x is cloned.
3
- TFDS_DATA_DIR="gs://nb-t5x-us-central2/corpus_multi_sentencefix_mt5"
4
  MODEL_DIR="gs://nb-t5x-us-central2/model_multi_sentencefix_mt5"
5
  export PYTHONPATH=${PROJECT_DIR}
6
 
 
1
  PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
2
  T5X_DIR="../../t5x" # directory where the t5x is cloned.
 
3
  MODEL_DIR="gs://nb-t5x-us-central2/model_multi_sentencefix_mt5"
4
  export PYTHONPATH=${PROJECT_DIR}
5