Saving weights and logs of step 500
Browse files
events.out.tfevents.1631216744.t1v-n-1a0a7c50-w-0.1083508.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c451c48c3b7b5fef3c9c190f79ea3daaee814784edf7dd87901b38d6ebfe076
|
3 |
+
size 40
|
events.out.tfevents.1631217826.t1v-n-1a0a7c50-w-0.1102830.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbc8e1adb6803ae07acbae62936b11724dec25c7fd4e1e4336a3cbe65945f281
|
3 |
+
size 40
|
events.out.tfevents.1631270664.t1v-n-1a0a7c50-w-0.1438451.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1daad6304a496c40d0f25a275a2d7d4c38b01d62204b6b95a10c56767d7d5d1
|
3 |
+
size 40
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5262371934
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93dddc3e0a75161cc9c8ca31b5175a6faf380aa20a4bab6eef572ec3f4dd692c
|
3 |
size 5262371934
|
run.sh
CHANGED
@@ -6,14 +6,15 @@ python run_clm_mp.py \
|
|
6 |
--do_train \
|
7 |
--do_eval \
|
8 |
--block_size 1024 \
|
9 |
-
--num_train_epochs
|
10 |
--learning_rate 4e-6 \
|
11 |
-
--per_device_train_batch_size
|
12 |
-
--per_device_eval_batch_size
|
13 |
--overwrite_output_dir \
|
14 |
-
--output_dir
|
15 |
--cache_dir /mnt/disks/flaxdisk/cache/ \
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
19 |
--push_to_hub
|
|
|
6 |
--do_train \
|
7 |
--do_eval \
|
8 |
--block_size 1024 \
|
9 |
+
--num_train_epochs 5 \
|
10 |
--learning_rate 4e-6 \
|
11 |
+
--per_device_train_batch_size 2 \
|
12 |
+
--per_device_eval_batch_size 2 \
|
13 |
--overwrite_output_dir \
|
14 |
+
--output_dir "./" \
|
15 |
--cache_dir /mnt/disks/flaxdisk/cache/ \
|
16 |
+
--preprocessing_num_workers 96 \
|
17 |
+
--dtype bfloat16 \
|
18 |
+
--logging_steps 5000 \
|
19 |
+
--eval_steps 5000 \
|
20 |
--push_to_hub
|