Saving weights and logs of step 10000

Files changed (6) hide show

events.out.tfevents.1629913928.t1v-n-1ae8dadb-w-0.220173.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ba48449025afa6e28d08421224bf053c3a32ec691cfc7318dec5bd9a48fb65c3
-size 3246967

events.out.tfevents.1629925854.t1v-n-1ae8dadb-w-0.258038.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d3ba93cc73524ee91a324c9c60c9b1944661982351b153371e82ee189b547e08
-size 8166703

events.out.tfevents.1629954669.t1v-n-1ae8dadb-w-0.348909.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c2754b0718aaa184806c065e66c1e4cd5de84a84f740d764fa0aa7b57e056c74
-size 1176479

events.out.tfevents.1629902662.t1v-n-1ae8dadb-w-0.181842.0.v2 → events.out.tfevents.1630151615.t1v-n-1ae8dadb-w-0.8890.0.v2 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c3b9568e0532131b6d5232d4f2f0183bf62a9e2c1a98eab3d137bf52e2c21bb
-size 3097897

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc6c70fe6cb7046c1a855abe8c2efa9b465bf15a70e00985b508e34a79ef7e1a
+size 1470757

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cf30f16ac72e048be2b0ad47ce76fdf2efcb13b5346dcf8a7d20d633848f7ac
 size 1421662309

 version https://git-lfs.github.com/spec/v1
+oid sha256:4adcd65be12523ba6f7b50c340edac6ac4bbfc8b58f38b74c49d4a0a52bdc323
 size 1421662309

start_train.sh CHANGED Viewed

@@ -2,7 +2,7 @@
 unset LD_PRELOAD
 python3 run_mlm_flax.py \
     --output_dir="./" \
-    --model_type="roberta" \
     --config_name="./" \
     --tokenizer_name="./" \
     --dataset_filepath="/researchdisk1/data/training_data_full" \
@@ -10,20 +10,20 @@ python3 run_mlm_flax.py \
     --max_seq_length="128" \
     --pad_to_max_length \
     --preprocessing_num_workers="96" \
-    --per_device_train_batch_size="64" \
-    --per_device_eval_batch_size="64" \
     --adam_beta1="0.9" \
     --adam_beta2="0.98" \
     --adam_epsilon="1e-6" \
     --learning_rate="2e-4" \
-#    --weight_decay="0.01" \
     --warmup_steps="1500" \
     --overwrite_output_dir \
-    --num_train_epochs="3" \
     --save_strategy="steps" \
     --save_steps="10000" \
     --save_total_limit="5" \
     --eval_steps="10000" \
     --logging_steps="1000" \
     --dtype="bfloat16" \
-    --push_to_hub

 unset LD_PRELOAD
 python3 run_mlm_flax.py \
     --output_dir="./" \
+    --model_name_or_path="./" \
     --config_name="./" \
     --tokenizer_name="./" \
     --dataset_filepath="/researchdisk1/data/training_data_full" \
     --max_seq_length="128" \
     --pad_to_max_length \
     --preprocessing_num_workers="96" \
+    --per_device_train_batch_size="32" \
+    --per_device_eval_batch_size="32" \
     --adam_beta1="0.9" \
     --adam_beta2="0.98" \
     --adam_epsilon="1e-6" \
     --learning_rate="2e-4" \
     --warmup_steps="1500" \
     --overwrite_output_dir \
+    --num_train_epochs="1" \
     --save_strategy="steps" \
     --save_steps="10000" \
     --save_total_limit="5" \
     --eval_steps="10000" \
     --logging_steps="1000" \
     --dtype="bfloat16" \
+    --push_to_hub \
+    --adafactor