aapot
commited on
Commit
•
8fe8e96
1
Parent(s):
2670ecc
Saving weights and logs of step 10000
Browse files- events.out.tfevents.1629913928.t1v-n-1ae8dadb-w-0.220173.0.v2 +0 -3
- events.out.tfevents.1629925854.t1v-n-1ae8dadb-w-0.258038.0.v2 +0 -3
- events.out.tfevents.1629954669.t1v-n-1ae8dadb-w-0.348909.0.v2 +0 -3
- events.out.tfevents.1629902662.t1v-n-1ae8dadb-w-0.181842.0.v2 → events.out.tfevents.1630151615.t1v-n-1ae8dadb-w-0.8890.0.v2 +2 -2
- flax_model.msgpack +1 -1
- start_train.sh +6 -6
events.out.tfevents.1629913928.t1v-n-1ae8dadb-w-0.220173.0.v2
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ba48449025afa6e28d08421224bf053c3a32ec691cfc7318dec5bd9a48fb65c3
|
3 |
-
size 3246967
|
|
|
|
|
|
|
|
events.out.tfevents.1629925854.t1v-n-1ae8dadb-w-0.258038.0.v2
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d3ba93cc73524ee91a324c9c60c9b1944661982351b153371e82ee189b547e08
|
3 |
-
size 8166703
|
|
|
|
|
|
|
|
events.out.tfevents.1629954669.t1v-n-1ae8dadb-w-0.348909.0.v2
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2754b0718aaa184806c065e66c1e4cd5de84a84f740d764fa0aa7b57e056c74
|
3 |
-
size 1176479
|
|
|
|
|
|
|
|
events.out.tfevents.1629902662.t1v-n-1ae8dadb-w-0.181842.0.v2 → events.out.tfevents.1630151615.t1v-n-1ae8dadb-w-0.8890.0.v2
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc6c70fe6cb7046c1a855abe8c2efa9b465bf15a70e00985b508e34a79ef7e1a
|
3 |
+
size 1470757
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421662309
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4adcd65be12523ba6f7b50c340edac6ac4bbfc8b58f38b74c49d4a0a52bdc323
|
3 |
size 1421662309
|
start_train.sh
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
unset LD_PRELOAD
|
3 |
python3 run_mlm_flax.py \
|
4 |
--output_dir="./" \
|
5 |
-
--
|
6 |
--config_name="./" \
|
7 |
--tokenizer_name="./" \
|
8 |
--dataset_filepath="/researchdisk1/data/training_data_full" \
|
@@ -10,20 +10,20 @@ python3 run_mlm_flax.py \
|
|
10 |
--max_seq_length="128" \
|
11 |
--pad_to_max_length \
|
12 |
--preprocessing_num_workers="96" \
|
13 |
-
--per_device_train_batch_size="
|
14 |
-
--per_device_eval_batch_size="
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
--adam_epsilon="1e-6" \
|
18 |
--learning_rate="2e-4" \
|
19 |
-
# --weight_decay="0.01" \
|
20 |
--warmup_steps="1500" \
|
21 |
--overwrite_output_dir \
|
22 |
-
--num_train_epochs="
|
23 |
--save_strategy="steps" \
|
24 |
--save_steps="10000" \
|
25 |
--save_total_limit="5" \
|
26 |
--eval_steps="10000" \
|
27 |
--logging_steps="1000" \
|
28 |
--dtype="bfloat16" \
|
29 |
-
--push_to_hub
|
|
|
|
2 |
unset LD_PRELOAD
|
3 |
python3 run_mlm_flax.py \
|
4 |
--output_dir="./" \
|
5 |
+
--model_name_or_path="./" \
|
6 |
--config_name="./" \
|
7 |
--tokenizer_name="./" \
|
8 |
--dataset_filepath="/researchdisk1/data/training_data_full" \
|
|
|
10 |
--max_seq_length="128" \
|
11 |
--pad_to_max_length \
|
12 |
--preprocessing_num_workers="96" \
|
13 |
+
--per_device_train_batch_size="32" \
|
14 |
+
--per_device_eval_batch_size="32" \
|
15 |
--adam_beta1="0.9" \
|
16 |
--adam_beta2="0.98" \
|
17 |
--adam_epsilon="1e-6" \
|
18 |
--learning_rate="2e-4" \
|
|
|
19 |
--warmup_steps="1500" \
|
20 |
--overwrite_output_dir \
|
21 |
+
--num_train_epochs="1" \
|
22 |
--save_strategy="steps" \
|
23 |
--save_steps="10000" \
|
24 |
--save_total_limit="5" \
|
25 |
--eval_steps="10000" \
|
26 |
--logging_steps="1000" \
|
27 |
--dtype="bfloat16" \
|
28 |
+
--push_to_hub \
|
29 |
+
--adafactor
|