Saving weights and logs of step 1000

Files changed (3) hide show

events.out.tfevents.1635864466.t1v-n-f6f5b6cc-w-0.884976.0.v2 → events.out.tfevents.1635866181.t1v-n-f6f5b6cc-w-0.888850.0.v2 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca21a736d18acf0a3a5501d797b4fea6ed2e032ddf9b7b4f639d3a97f621b3cc
-size 736327

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8cb4edb30324f1ddab8b558e491c1fc7a05e2984c7d1954cde1fba10ad1e4b1
+size 147207

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e05d13e73546c8da9361a1b0805d8dff0efddbe17437799702ee4bc77903cfb2
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f350771be091da3adaf4c409cbe82b51bf611cf6f5829f474116347f8d6e20f
 size 497764120

run.sh CHANGED Viewed

@@ -9,9 +9,9 @@ python run_clm_flax.py \
     --cache_dir="/mnt/disks/flaxdisk/cache/" \
     --do_train --do_eval \
     --block_size="512" \
-    --per_device_train_batch_size="16" \
-    --per_device_eval_batch_size="16" \
-    --learning_rate="1e-4" \
     --warmup_steps="100" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \

     --cache_dir="/mnt/disks/flaxdisk/cache/" \
     --do_train --do_eval \
     --block_size="512" \
+    --per_device_train_batch_size="8" \
+    --per_device_eval_batch_size="8" \
+    --learning_rate="5e-5" \
     --warmup_steps="100" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \