pere commited on
Commit
5937dc9
1 Parent(s): 71f287a

Saving weights and logs of step 10000

Browse files
events.out.tfevents.1640956682.t1v-n-358ff5d1-w-0.11974.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b0fe1349611f01cd08d8a11929610ed03a8c20073c1fa070a7b7843955235bf
3
+ size 1470136
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a7c59a989095aa103c94d09f11a9b5cd017f6fc2e3b4603db111c5b9d8bfa42
3
  size 498796983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc49f760bccb82672d93ae5c122aed1f410add84860109051c07d287d42a5517
3
  size 498796983
run_step1.sh CHANGED
@@ -7,12 +7,12 @@
7
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
8
  --max_seq_length="128" \
9
  --weight_decay="0.01" \
10
- --per_device_train_batch_size="164" \
11
- --per_device_eval_batch_size="164" \
12
- --learning_rate="3e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
- --num_train_epochs="1000" \
16
  --adam_beta1="0.9" \
17
  --adam_beta2="0.98" \
18
  --adam_epsilon="1e-6" \
 
7
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
8
  --max_seq_length="128" \
9
  --weight_decay="0.01" \
10
+ --per_device_train_batch_size="200" \
11
+ --per_device_eval_batch_size="200" \
12
+ --learning_rate="4e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
15
+ --num_train_epochs="10" \
16
  --adam_beta1="0.9" \
17
  --adam_beta2="0.98" \
18
  --adam_epsilon="1e-6" \