aapot commited on
Commit
bca30c6
1 Parent(s): e952e6f

Saving weights and logs of step 10000

Browse files
config.json CHANGED
@@ -7,6 +7,7 @@
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
@@ -20,7 +21,7 @@
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.11.0",
24
  "type_vocab_size": 1,
25
  "use_cache": true,
26
  "vocab_size": 50265
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 1024,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.10.3",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
events.out.tfevents.1638128224.t1v-n-8eba1090-w-0.1649448.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb25bd08dd3f4745e12c4538e65ab164e2c38587885b7daac4782b14fa6f0141
3
+ size 1470757
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a878489306599e17d93ae80b2eed555b4c6b13fbd306d4fd88c06b1c2b8e7a0
3
  size 1421662309
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d9373d6e238915c1e24dcfb133a085828d33d2d935863497a7d2546b63c360c
3
  size 1421662309
start_train.sh CHANGED
@@ -8,11 +8,11 @@ python3 run_mlm_flax.py \
8
  --config_name="./" \
9
  --tokenizer_name="./" \
10
  --dataset_filepath="/researchdisk/training_dataset_full" \
11
- --max_seq_length="128" \
12
  --pad_to_max_length \
13
  --preprocessing_num_workers="96" \
14
- --per_device_train_batch_size="64" \
15
- --per_device_eval_batch_size="64" \
16
  --adam_beta1="0.9" \
17
  --adam_beta2="0.98" \
18
  --adam_epsilon="1e-6" \
@@ -26,5 +26,6 @@ python3 run_mlm_flax.py \
26
  --eval_steps="10000" \
27
  --logging_steps="1000" \
28
  --dtype="bfloat16" \
 
29
  --push_to_hub \
30
- --hub_model_id="Finnish-NLP/roberta-large-finnish-v2"
8
  --config_name="./" \
9
  --tokenizer_name="./" \
10
  --dataset_filepath="/researchdisk/training_dataset_full" \
11
+ --max_seq_length="512" \
12
  --pad_to_max_length \
13
  --preprocessing_num_workers="96" \
14
+ --per_device_train_batch_size="8" \
15
+ --per_device_eval_batch_size="8" \
16
  --adam_beta1="0.9" \
17
  --adam_beta2="0.98" \
18
  --adam_epsilon="1e-6" \
26
  --eval_steps="10000" \
27
  --logging_steps="1000" \
28
  --dtype="bfloat16" \
29
+ --adafactor \
30
  --push_to_hub \
31
+ # --hub_model_id="Finnish-NLP/roberta-large-finnish-v2"