sakares commited on
Commit
b0729e9
1 Parent(s): bfd5d73

lower lr regarding explode gradient. also new seed number

Browse files
Files changed (1) hide show
  1. run.sh +5 -4
run.sh CHANGED
@@ -12,14 +12,15 @@ python3 run_mlm_flax.py \
12
  --preprocessing_num_workers="64" \
13
  --per_device_train_batch_size="32" \
14
  --per_device_eval_batch_size="32" \
15
- --learning_rate="3e-4" \
16
- --warmup_steps="1000" \
17
  --overwrite_output_dir \
18
- --num_train_epochs="10" \
 
19
  --adam_beta1="0.9" \
20
  --adam_beta2="0.98" \
21
  --logging_steps="500" \
22
  --save_steps="10000" \
23
- --eval_steps="10000" \
24
  --dtype="bfloat16" \
25
  --push_to_hub
 
12
  --preprocessing_num_workers="64" \
13
  --per_device_train_batch_size="32" \
14
  --per_device_eval_batch_size="32" \
15
+ --learning_rate="3e-5" \
16
+ --warmup_steps="2000" \
17
  --overwrite_output_dir \
18
+ --seed="19" \
19
+ --num_train_epochs="8" \
20
  --adam_beta1="0.9" \
21
  --adam_beta2="0.98" \
22
  --logging_steps="500" \
23
  --save_steps="10000" \
24
+ --eval_steps="20000" \
25
  --dtype="bfloat16" \
26
  --push_to_hub