pere commited on
Commit
9afb9b2
1 Parent(s): 46233bb

Saving weights and logs of step 10000

Browse files
config.json CHANGED
@@ -20,8 +20,8 @@
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
- "torch_dtype": "float32",
24
- "transformers_version": "4.14.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
 
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.15.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
events.out.tfevents.1640267893.t1v-n-ccbf3e94-w-0.1815882.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2402ba2035cc5336fb1a525316962406a94006a1949d4f3a9abc23687f18b6d7
3
+ size 40
events.out.tfevents.1640306668.t1v-n-ccbf3e94-w-0.1858138.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5042739f45e5ff277b7816530227712dbc44d14f1ecb4e087857508c6aec056
3
+ size 40
events.out.tfevents.1640342709.t1v-n-ccbf3e94-w-0.1891938.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4706a8941689a1d8e657aaa7e746f5cf087039377bcdf6727b0cdbd0bb99afb0
3
+ size 40
events.out.tfevents.1640584052.t1v-n-ccbf3e94-w-0.2048063.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36c3f39db8a456e90edbc8efa0b2ca1aac1fdf9b1c2952289d87c703ff98b7b
3
+ size 40
events.out.tfevents.1640604030.t1v-n-ccbf3e94-w-0.2117285.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcf63d7a4b4b2bf4dcb10bd3f7a292a719a6060663d1fd1454cd2a6fdf42d1e6
3
+ size 40
events.out.tfevents.1640614846.t1v-n-ccbf3e94-w-0.2129895.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf83876709cfc72a855f8adb08a79225eb1a8fa3f3dd0545e9596948a94a2cc
3
+ size 40
events.out.tfevents.1640973870.t1v-n-ccbf3e94-w-0.135363.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35f3e489425792028defb716d72f7d8744224524fa57d8dd819b8b6247fd9f3
3
+ size 1470136
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8a01e29c7428d66b11693f80574203b31ab8f28bb1bd23313d222eb521c3e6
3
+ size 498796983
run_mlm_flax.py CHANGED
@@ -508,7 +508,7 @@ if __name__ == "__main__":
508
  init_value=0.0, end_value=training_args.learning_rate, transition_steps=training_args.warmup_steps
509
  )
510
 
511
- if data_argsdata_argtatic_learning_rate:
512
  end_lr_value = training_args.learning_rate
513
  else:
514
  end_lr_value = 0
 
508
  init_value=0.0, end_value=training_args.learning_rate, transition_steps=training_args.warmup_steps
509
  )
510
 
511
+ if data_args.static_learning_rate:
512
  end_lr_value = training_args.learning_rate
513
  else:
514
  end_lr_value = 0
run_step1.sh CHANGED
@@ -3,16 +3,17 @@
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
- --dataset_name="NbAiLab/nbailab_extended" \
 
7
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
8
  --max_seq_length="512" \
9
  --weight_decay="0.01" \
10
- --per_device_train_batch_size="48" \
11
- --per_device_eval_batch_size="48" \
12
- --learning_rate="4e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
- --num_train_epochs="1000" \
16
  --adam_beta1="0.9" \
17
  --adam_beta2="0.98" \
18
  --adam_epsilon="1e-6" \
@@ -22,4 +23,5 @@
22
  --preprocessing_num_workers="64" \
23
  --auth_token="True" \
24
  --static_learning_rate="True" \
 
25
  --push_to_hub
 
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
+ --train_file /mnt/disks/flaxdisk/corpus/train_1_4.json \
7
+ --validation_file /mnt/disks/flaxdisk/corpus/validation.json \
8
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
9
  --max_seq_length="512" \
10
  --weight_decay="0.01" \
11
+ --per_device_train_batch_size="40" \
12
+ --per_device_eval_batch_size="40" \
13
+ --learning_rate="2e-4" \
14
  --warmup_steps="10000" \
15
  --overwrite_output_dir \
16
+ --num_train_epochs="2" \
17
  --adam_beta1="0.9" \
18
  --adam_beta2="0.98" \
19
  --adam_epsilon="1e-6" \
 
23
  --preprocessing_num_workers="64" \
24
  --auth_token="True" \
25
  --static_learning_rate="True" \
26
+ --dtype="bfloat16" \
27
  --push_to_hub