pere commited on
Commit
dd741fa
1 Parent(s): 9db40a9

training on extra pretrain models

Browse files
batch_finetune_eu_jav_10M_large.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/eu-jav-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+ INITIAL_CHECKPOINT_PATH=\"gs://eu-jav-t5x/pretrained_models/italian_tweets_10M_t5x_large/checkpoint_1100000\"
4
+ TRAIN_STEPS=1102000
5
+
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v1\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v2\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v3\" &&
9
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v4\" &&
10
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_10M_v5\"
11
+
batch_finetune_eu_jav_500k_large.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROJECT_DIR=${HOME}"/models/eu-jav-categorisation"
2
+ export PYTHONPATH=${PROJECT_DIR}
3
+ INITIAL_CHECKPOINT_PATH=\"gs://eu-jav-t5x/pretrained_models/italian_tweets_500k_t5x_large/checkpoint_1100000\"
4
+ TRAIN_STEPS=1102000
5
+
6
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v1\" &&
7
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v2\" &&
8
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v3\" &&
9
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v4\" &&
10
+ python3 ../../t5x/t5x/train.py --gin_search_paths="./" --gin.TRAIN_STEPS=${TRAIN_STEPS} --gin_file="finetune_classification_large.gin" --gin.INITIAL_CHECKPOINT_PATH=${INITIAL_CHECKPOINT_PATH} --gin.MIXTURE_OR_TASK_NAME=\"classify_tweets\" --gin.MODEL_DIR=\"gs://eu-jav-t5x/finetuned/italian_tweets/classify_tweets_large_500k_v5\"
11
+
tasks.py CHANGED
@@ -12,7 +12,7 @@ import tensorflow.compat.v1 as tf
12
  tsv_path = {
13
  "train": "gs://eu-jav-t5x/corpus/labeled/datasetA_train_3categories.tsv",
14
  "validation": "gs://eu-jav-t5x/corpus/labeled/datasetA_dev_3categories.tsv",
15
- "test": "gs://eu-jav-t5x/corpus/labeled/ datasetA_test_3categories.tsv"
16
  }
17
 
18
  vocabulary = seqio.SentencePieceVocabulary(
 
12
  tsv_path = {
13
  "train": "gs://eu-jav-t5x/corpus/labeled/datasetA_train_3categories.tsv",
14
  "validation": "gs://eu-jav-t5x/corpus/labeled/datasetA_dev_3categories.tsv",
15
+ "test": "gs://eu-jav-t5x/corpus/labeled/datasetA_test_3categories.tsv"
16
  }
17
 
18
  vocabulary = seqio.SentencePieceVocabulary(