Saving weights and logs of step 10000
Browse files- config.json +2 -1
- events.out.tfevents.1638128224.t1v-n-8eba1090-w-0.1649448.0.v2 +3 -0
- flax_model.msgpack +1 -1
- start_train.sh +5 -4
config.json
CHANGED
@@ -7,6 +7,7 @@
|
|
7 |
"bos_token_id": 0,
|
8 |
"classifier_dropout": null,
|
9 |
"eos_token_id": 2,
|
|
|
10 |
"hidden_act": "gelu",
|
11 |
"hidden_dropout_prob": 0.1,
|
12 |
"hidden_size": 1024,
|
@@ -20,7 +21,7 @@
|
|
20 |
"pad_token_id": 1,
|
21 |
"position_embedding_type": "absolute",
|
22 |
"torch_dtype": "float32",
|
23 |
-
"transformers_version": "4.
|
24 |
"type_vocab_size": 1,
|
25 |
"use_cache": true,
|
26 |
"vocab_size": 50265
|
|
|
7 |
"bos_token_id": 0,
|
8 |
"classifier_dropout": null,
|
9 |
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
"hidden_act": "gelu",
|
12 |
"hidden_dropout_prob": 0.1,
|
13 |
"hidden_size": 1024,
|
|
|
21 |
"pad_token_id": 1,
|
22 |
"position_embedding_type": "absolute",
|
23 |
"torch_dtype": "float32",
|
24 |
+
"transformers_version": "4.10.3",
|
25 |
"type_vocab_size": 1,
|
26 |
"use_cache": true,
|
27 |
"vocab_size": 50265
|
events.out.tfevents.1638128224.t1v-n-8eba1090-w-0.1649448.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb25bd08dd3f4745e12c4538e65ab164e2c38587885b7daac4782b14fa6f0141
|
3 |
+
size 1470757
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421662309
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d9373d6e238915c1e24dcfb133a085828d33d2d935863497a7d2546b63c360c
|
3 |
size 1421662309
|
start_train.sh
CHANGED
@@ -8,11 +8,11 @@ python3 run_mlm_flax.py \
|
|
8 |
--config_name="./" \
|
9 |
--tokenizer_name="./" \
|
10 |
--dataset_filepath="/researchdisk/training_dataset_full" \
|
11 |
-
--max_seq_length="
|
12 |
--pad_to_max_length \
|
13 |
--preprocessing_num_workers="96" \
|
14 |
-
--per_device_train_batch_size="
|
15 |
-
--per_device_eval_batch_size="
|
16 |
--adam_beta1="0.9" \
|
17 |
--adam_beta2="0.98" \
|
18 |
--adam_epsilon="1e-6" \
|
@@ -26,5 +26,6 @@ python3 run_mlm_flax.py \
|
|
26 |
--eval_steps="10000" \
|
27 |
--logging_steps="1000" \
|
28 |
--dtype="bfloat16" \
|
|
|
29 |
--push_to_hub \
|
30 |
-
--hub_model_id="Finnish-NLP/roberta-large-finnish-v2"
|
|
|
8 |
--config_name="./" \
|
9 |
--tokenizer_name="./" \
|
10 |
--dataset_filepath="/researchdisk/training_dataset_full" \
|
11 |
+
--max_seq_length="512" \
|
12 |
--pad_to_max_length \
|
13 |
--preprocessing_num_workers="96" \
|
14 |
+
--per_device_train_batch_size="8" \
|
15 |
+
--per_device_eval_batch_size="8" \
|
16 |
--adam_beta1="0.9" \
|
17 |
--adam_beta2="0.98" \
|
18 |
--adam_epsilon="1e-6" \
|
|
|
26 |
--eval_steps="10000" \
|
27 |
--logging_steps="1000" \
|
28 |
--dtype="bfloat16" \
|
29 |
+
--adafactor \
|
30 |
--push_to_hub \
|
31 |
+
# --hub_model_id="Finnish-NLP/roberta-large-finnish-v2"
|