versae commited on
Commit
934b127
1 Parent(s): 182f272

Train options

Browse files
Files changed (1) hide show
  1. train.128.sh +4 -3
train.128.sh CHANGED
@@ -5,7 +5,7 @@ python run_mlm_flax.py \
5
  --tokenizer_name="NbAiLab/nb-roberta-base" \
6
  --dataset_name="NbAiLab/NCC" \
7
  --max_seq_length="128" \
8
- --weight_decay="0.01" \
9
  --per_device_train_batch_size="232" \
10
  --per_device_eval_batch_size="232" \
11
  --pad_to_max_length \
@@ -13,9 +13,10 @@ python run_mlm_flax.py \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
  --num_train_epochs="3" \
 
16
  --adam_beta1="0.9" \
17
- --adam_beta2="0.98" \
18
- --adam_epsilon="1e-6" \
19
  --logging_steps="1000" \
20
  --save_steps="1000" \
21
  --eval_steps="1000" \
 
5
  --tokenizer_name="NbAiLab/nb-roberta-base" \
6
  --dataset_name="NbAiLab/NCC" \
7
  --max_seq_length="128" \
8
+ --weight_decay="0.0" \
9
  --per_device_train_batch_size="232" \
10
  --per_device_eval_batch_size="232" \
11
  --pad_to_max_length \
 
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
  --num_train_epochs="3" \
16
+ --distributed_shampoo \
17
  --adam_beta1="0.9" \
18
+ --adam_beta2="0.99" \
19
+ --adam_epsilon="1e-10" \
20
  --logging_steps="1000" \
21
  --save_steps="1000" \
22
  --eval_steps="1000" \