sakares commited on
Commit
de940b9
1 Parent(s): 89c14b0

use proper tokenizer.json and change to batch_size=256

Browse files
events.out.tfevents.1626444652.t1v-n-bf8aeee7-w-0.7169.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28995d9c2e8ef74fe2f9bb8d05d9f6eacef6a2b734557bcf1d66ef3c32756805
3
+ size 40
run.sh CHANGED
@@ -9,8 +9,8 @@ python3 run_mlm_flax.py \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
  --preprocessing_num_workers="64" \
12
- --per_device_train_batch_size="128" \
13
- --per_device_eval_batch_size="128" \
14
  --learning_rate="3e-5" \
15
  --warmup_steps="5000" \
16
  --overwrite_output_dir \
@@ -18,7 +18,8 @@ python3 run_mlm_flax.py \
18
  --num_train_epochs="8" \
19
  --adam_beta1="0.9" \
20
  --adam_beta2="0.98" \
21
- --logging_steps="500" \
22
- --save_steps="5000" \
23
- --eval_steps="20000" \
 
24
  --push_to_hub
 
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
  --preprocessing_num_workers="64" \
12
+ --per_device_train_batch_size="256" \
13
+ --per_device_eval_batch_size="256" \
14
  --learning_rate="3e-5" \
15
  --warmup_steps="5000" \
16
  --overwrite_output_dir \
 
18
  --num_train_epochs="8" \
19
  --adam_beta1="0.9" \
20
  --adam_beta2="0.98" \
21
+ --logging_steps="62" \
22
+ --save_steps="625" \
23
+ --eval_steps="2500" \
24
+ --dtype="bfloat16" \
25
  --push_to_hub
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff