pere commited on
Commit
060854e
1 Parent(s): de8a418

Saving weights and logs of step 1000

Browse files
events.out.tfevents.1635864466.t1v-n-f6f5b6cc-w-0.884976.0.v2 → events.out.tfevents.1635866181.t1v-n-f6f5b6cc-w-0.888850.0.v2 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca21a736d18acf0a3a5501d797b4fea6ed2e032ddf9b7b4f639d3a97f621b3cc
3
- size 736327
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8cb4edb30324f1ddab8b558e491c1fc7a05e2984c7d1954cde1fba10ad1e4b1
3
+ size 147207
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e05d13e73546c8da9361a1b0805d8dff0efddbe17437799702ee4bc77903cfb2
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f350771be091da3adaf4c409cbe82b51bf611cf6f5829f474116347f8d6e20f
3
  size 497764120
run.sh CHANGED
@@ -9,9 +9,9 @@ python run_clm_flax.py \
9
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
10
  --do_train --do_eval \
11
  --block_size="512" \
12
- --per_device_train_batch_size="16" \
13
- --per_device_eval_batch_size="16" \
14
- --learning_rate="1e-4" \
15
  --warmup_steps="100" \
16
  --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
17
  --overwrite_output_dir \
 
9
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
10
  --do_train --do_eval \
11
  --block_size="512" \
12
+ --per_device_train_batch_size="8" \
13
+ --per_device_eval_batch_size="8" \
14
+ --learning_rate="5e-5" \
15
  --warmup_steps="100" \
16
  --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
17
  --overwrite_output_dir \