Saving weights and logs of step 10000
Browse files- config.json +2 -2
- events.out.tfevents.1640267893.t1v-n-ccbf3e94-w-0.1815882.3.v2 +3 -0
- events.out.tfevents.1640306668.t1v-n-ccbf3e94-w-0.1858138.3.v2 +3 -0
- events.out.tfevents.1640342709.t1v-n-ccbf3e94-w-0.1891938.3.v2 +3 -0
- events.out.tfevents.1640584052.t1v-n-ccbf3e94-w-0.2048063.3.v2 +3 -0
- events.out.tfevents.1640604030.t1v-n-ccbf3e94-w-0.2117285.3.v2 +3 -0
- events.out.tfevents.1640614846.t1v-n-ccbf3e94-w-0.2129895.3.v2 +3 -0
- events.out.tfevents.1640973870.t1v-n-ccbf3e94-w-0.135363.3.v2 +3 -0
- flax_model.msgpack +3 -0
- run_mlm_flax.py +1 -1
- run_step1.sh +7 -5
config.json
CHANGED
@@ -20,8 +20,8 @@
|
|
20 |
"num_hidden_layers": 12,
|
21 |
"pad_token_id": 1,
|
22 |
"position_embedding_type": "absolute",
|
23 |
-
"torch_dtype": "
|
24 |
-
"transformers_version": "4.
|
25 |
"type_vocab_size": 1,
|
26 |
"use_cache": true,
|
27 |
"vocab_size": 50265
|
|
|
20 |
"num_hidden_layers": 12,
|
21 |
"pad_token_id": 1,
|
22 |
"position_embedding_type": "absolute",
|
23 |
+
"torch_dtype": "bfloat16",
|
24 |
+
"transformers_version": "4.15.0.dev0",
|
25 |
"type_vocab_size": 1,
|
26 |
"use_cache": true,
|
27 |
"vocab_size": 50265
|
events.out.tfevents.1640267893.t1v-n-ccbf3e94-w-0.1815882.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2402ba2035cc5336fb1a525316962406a94006a1949d4f3a9abc23687f18b6d7
|
3 |
+
size 40
|
events.out.tfevents.1640306668.t1v-n-ccbf3e94-w-0.1858138.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5042739f45e5ff277b7816530227712dbc44d14f1ecb4e087857508c6aec056
|
3 |
+
size 40
|
events.out.tfevents.1640342709.t1v-n-ccbf3e94-w-0.1891938.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4706a8941689a1d8e657aaa7e746f5cf087039377bcdf6727b0cdbd0bb99afb0
|
3 |
+
size 40
|
events.out.tfevents.1640584052.t1v-n-ccbf3e94-w-0.2048063.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f36c3f39db8a456e90edbc8efa0b2ca1aac1fdf9b1c2952289d87c703ff98b7b
|
3 |
+
size 40
|
events.out.tfevents.1640604030.t1v-n-ccbf3e94-w-0.2117285.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcf63d7a4b4b2bf4dcb10bd3f7a292a719a6060663d1fd1454cd2a6fdf42d1e6
|
3 |
+
size 40
|
events.out.tfevents.1640614846.t1v-n-ccbf3e94-w-0.2129895.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdf83876709cfc72a855f8adb08a79225eb1a8fa3f3dd0545e9596948a94a2cc
|
3 |
+
size 40
|
events.out.tfevents.1640973870.t1v-n-ccbf3e94-w-0.135363.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e35f3e489425792028defb716d72f7d8744224524fa57d8dd819b8b6247fd9f3
|
3 |
+
size 1470136
|
flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f8a01e29c7428d66b11693f80574203b31ab8f28bb1bd23313d222eb521c3e6
|
3 |
+
size 498796983
|
run_mlm_flax.py
CHANGED
@@ -508,7 +508,7 @@ if __name__ == "__main__":
|
|
508 |
init_value=0.0, end_value=training_args.learning_rate, transition_steps=training_args.warmup_steps
|
509 |
)
|
510 |
|
511 |
-
if
|
512 |
end_lr_value = training_args.learning_rate
|
513 |
else:
|
514 |
end_lr_value = 0
|
|
|
508 |
init_value=0.0, end_value=training_args.learning_rate, transition_steps=training_args.warmup_steps
|
509 |
)
|
510 |
|
511 |
+
if data_args.static_learning_rate:
|
512 |
end_lr_value = training_args.learning_rate
|
513 |
else:
|
514 |
end_lr_value = 0
|
run_step1.sh
CHANGED
@@ -3,16 +3,17 @@
|
|
3 |
--model_type="roberta" \
|
4 |
--config_name="./" \
|
5 |
--tokenizer_name="./" \
|
6 |
-
--
|
|
|
7 |
--cache_dir="/mnt/disks/flaxdisk/cache/" \
|
8 |
--max_seq_length="512" \
|
9 |
--weight_decay="0.01" \
|
10 |
-
--per_device_train_batch_size="
|
11 |
-
--per_device_eval_batch_size="
|
12 |
-
--learning_rate="
|
13 |
--warmup_steps="10000" \
|
14 |
--overwrite_output_dir \
|
15 |
-
--num_train_epochs="
|
16 |
--adam_beta1="0.9" \
|
17 |
--adam_beta2="0.98" \
|
18 |
--adam_epsilon="1e-6" \
|
@@ -22,4 +23,5 @@
|
|
22 |
--preprocessing_num_workers="64" \
|
23 |
--auth_token="True" \
|
24 |
--static_learning_rate="True" \
|
|
|
25 |
--push_to_hub
|
|
|
3 |
--model_type="roberta" \
|
4 |
--config_name="./" \
|
5 |
--tokenizer_name="./" \
|
6 |
+
--train_file /mnt/disks/flaxdisk/corpus/train_1_4.json \
|
7 |
+
--validation_file /mnt/disks/flaxdisk/corpus/validation.json \
|
8 |
--cache_dir="/mnt/disks/flaxdisk/cache/" \
|
9 |
--max_seq_length="512" \
|
10 |
--weight_decay="0.01" \
|
11 |
+
--per_device_train_batch_size="40" \
|
12 |
+
--per_device_eval_batch_size="40" \
|
13 |
+
--learning_rate="2e-4" \
|
14 |
--warmup_steps="10000" \
|
15 |
--overwrite_output_dir \
|
16 |
+
--num_train_epochs="2" \
|
17 |
--adam_beta1="0.9" \
|
18 |
--adam_beta2="0.98" \
|
19 |
--adam_epsilon="1e-6" \
|
|
|
23 |
--preprocessing_num_workers="64" \
|
24 |
--auth_token="True" \
|
25 |
--static_learning_rate="True" \
|
26 |
+
--dtype="bfloat16" \
|
27 |
--push_to_hub
|