long
Browse files- batch_nynorsk_balanced_small_long.sh +12 -0
- tasks.py +23 -0
batch_nynorsk_balanced_small_long.sh
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROJECT_DIR=${HOME}"/models/t5-nynorsk-oversetter"
|
2 |
+
export PYTHONPATH=${PROJECT_DIR}
|
3 |
+
INITIAL_CHECKPOINT_PATH=\"gs://nb-t5x-us-central2/norwegian_NCC_plus_English_pluss200k_balanced_bokmaal_nynorsk_t5x_small/checkpoint_1700000\"
|
4 |
+
TRAIN_STEPS=1705000
|
5 |
+
|
6 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_balanced_small_long_v1\"
|
7 |
+
|
8 |
+
#python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_balanced_small_long_v2\" &&
|
9 |
+
#python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_balanced_small_long_v3\" &&
|
10 |
+
#python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_balanced_small_long_v4\" &&
|
11 |
+
#python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_translate_small.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"translate_long\" --gin.MODEL_DIR=\"gs://nb-t5x-us-central2/finetuned/nynorsk_balanced_small_long_v5\"
|
12 |
+
|
tasks.py
CHANGED
@@ -25,6 +25,12 @@ tsv_translate_path = {
|
|
25 |
"test": "gs://nb-t5x-us-central2/corpus_bokmal_nynorsk/test.tsv"
|
26 |
}
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
tsv_sentiment_path = {
|
29 |
"train": "gs://notram-public/finetune_datasets/norec_sentiment/train.tsv",
|
30 |
"validation": "gs://notram-public/finetune_datasets/norec_sentiment/dev.tsv",
|
@@ -182,3 +188,20 @@ seqio.TaskRegistry.add(
|
|
182 |
output_features=DEFAULT_OUTPUT_FEATURES,
|
183 |
)
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
"test": "gs://nb-t5x-us-central2/corpus_bokmal_nynorsk/test.tsv"
|
26 |
}
|
27 |
|
28 |
+
tsv_translate_long_path = {
|
29 |
+
"train": "gs://nb-t5x-us-central2/corpus_bokmal_nynorsk/train_long.tsv",
|
30 |
+
"validation": "gs://nb-t5x-us-central2/corpus_bokmal_nynorsk/dev_long.tsv",
|
31 |
+
"test": "gs://nb-t5x-us-central2/corpus_bokmal_nynorsk/test_long.tsv"
|
32 |
+
}
|
33 |
+
|
34 |
tsv_sentiment_path = {
|
35 |
"train": "gs://notram-public/finetune_datasets/norec_sentiment/train.tsv",
|
36 |
"validation": "gs://notram-public/finetune_datasets/norec_sentiment/dev.tsv",
|
|
|
188 |
output_features=DEFAULT_OUTPUT_FEATURES,
|
189 |
)
|
190 |
|
191 |
+
seqio.TaskRegistry.add(
|
192 |
+
"translate_long",
|
193 |
+
source=seqio.TextLineDataSource(
|
194 |
+
split_to_filepattern=tsv_translate_long_path,
|
195 |
+
#num_input_examples=num_nq_examples
|
196 |
+
),
|
197 |
+
preprocessors=[
|
198 |
+
functools.partial(
|
199 |
+
t5.data.preprocessors.parse_tsv,
|
200 |
+
field_names=["source","target"]),
|
201 |
+
categorise_preprocessor,
|
202 |
+
seqio.preprocessors.tokenize_and_append_eos,
|
203 |
+
],
|
204 |
+
metric_fns=[metrics.accuracy,my_metrics.f1_macro,metrics.bleu],
|
205 |
+
output_features=DEFAULT_OUTPUT_FEATURES,
|
206 |
+
)
|
207 |
+
|