training on extra pretrain models
Browse files- batch_finetune_eu_jav_10M_large.sh +11 -0
- batch_finetune_eu_jav_500k_large.sh +11 -0
- tasks.py +1 -1
batch_finetune_eu_jav_10M_large.sh
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROJECT_DIR=${HOME}"/models/eu-jav-categorisation"
|
2 |
+
export PYTHONPATH=${PROJECT_DIR}
|
3 |
+
INITIAL_CHECKPOINT_PATH=\"gs://eu-jav-t5x/pretrained_models/italian_tweets_10M_t5x_large/checkpoint_1100000\"
|
4 |
+
TRAIN_STEPS=1102000
|
5 |
+
|
6 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v1\" &&
|
7 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v2\" &&
|
8 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v3\" &&
|
9 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v4\" &&
|
10 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v5\"
|
11 |
+
|
batch_finetune_eu_jav_500k_large.sh
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROJECT_DIR=${HOME}"/models/eu-jav-categorisation"
|
2 |
+
export PYTHONPATH=${PROJECT_DIR}
|
3 |
+
INITIAL_CHECKPOINT_PATH=\"gs://eu-jav-t5x/pretrained_models/italian_tweets_500k_t5x_large/checkpoint_1100000\"
|
4 |
+
TRAIN_STEPS=1102000
|
5 |
+
|
6 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v1\" &&
|
7 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v2\" &&
|
8 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v3\" &&
|
9 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v4\" &&
|
10 |
+
python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v5\"
|
11 |
+
|
tasks.py
CHANGED
@@ -12,7 +12,7 @@ import tensorflow.compat.v1 as tf
|
|
12 |
tsv_path = {
|
13 |
"train": "gs://eu-jav-t5x/corpus/labeled/datasetA_train_3categories.tsv",
|
14 |
"validation": "gs://eu-jav-t5x/corpus/labeled/datasetA_dev_3categories.tsv",
|
15 |
-
"test": "gs://eu-jav-t5x/corpus/labeled/
|
16 |
}
|
17 |
|
18 |
vocabulary = seqio.SentencePieceVocabulary(
|
|
|
12 |
tsv_path = {
|
13 |
"train": "gs://eu-jav-t5x/corpus/labeled/datasetA_train_3categories.tsv",
|
14 |
"validation": "gs://eu-jav-t5x/corpus/labeled/datasetA_dev_3categories.tsv",
|
15 |
+
"test": "gs://eu-jav-t5x/corpus/labeled/datasetA_test_3categories.tsv"
|
16 |
}
|
17 |
|
18 |
vocabulary = seqio.SentencePieceVocabulary(
|