yhavinga commited on
Commit
b1ffc4b
1 Parent(s): ca739a2

Update model

Browse files
eval_results.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "eval_accuracy": 0.6356837749481201,
3
- "eval_loss": 1.9501113891601562
4
  }
 
1
  {
2
+ "eval_accuracy": 0.6502997875213623,
3
+ "eval_loss": 1.9498332738876343
4
  }
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00bb5b987a1bba701e3d03578ceed23e976c4b9e22db5d095c12495abd5e3a09
3
  size 307750439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35274b0f8d7d45dba0dec9b4043244035f8ee3d3c11465cb8828f53c433a50c8
3
  size 307750439
run_longt5-local-small-mc4.sh CHANGED
@@ -26,21 +26,22 @@ python ../train/run_t5_mlm_flax_pmap.py \
26
  --dataset_name="${DATASET}" \
27
  --dataset_config_name="${DATASET_CONFIG}" \
28
  --max_seq_length="1024" \
29
- --per_device_train_batch_size="32" \
30
- --per_device_eval_batch_size="32" \
31
- --gradient_accumulation_steps="4" \
32
  --mean_noise_span_length="3" \
 
33
  --dtype="bfloat16" \
34
  --z_loss="1e-4" \
35
  --optim="adafactor" \
36
  --learning_rate="0.005" \
37
- --lr_decay="linear" \
38
  --overwrite_output_dir \
39
  --num_train_epochs="6" \
40
- --logging_steps="20" \
41
- --save_steps="1000" \
42
- --eval_steps="1000" \
43
- --warmup_steps="300" \
44
  --wandb_project="long-t5-local-small" \
45
  --wandb_job_type="pmap"
46
 
 
26
  --dataset_name="${DATASET}" \
27
  --dataset_config_name="${DATASET_CONFIG}" \
28
  --max_seq_length="1024" \
29
+ --per_device_train_batch_size="64" \
30
+ --per_device_eval_batch_size="64" \
31
+ --gradient_accumulation_steps="1" \
32
  --mean_noise_span_length="3" \
33
+ --gradient_checkpointing="false" \
34
  --dtype="bfloat16" \
35
  --z_loss="1e-4" \
36
  --optim="adafactor" \
37
  --learning_rate="0.005" \
38
+ --lr_scheduler_type="linear" \
39
  --overwrite_output_dir \
40
  --num_train_epochs="6" \
41
+ --logging_steps="80" \
42
+ --save_steps="4000" \
43
+ --eval_steps="4000" \
44
+ --warmup_steps="1200" \
45
  --wandb_project="long-t5-local-small" \
46
  --wandb_job_type="pmap"
47
 
runs/{events.out.tfevents.1660513900.t1v-n-40c2b24e-w-0.1025593.0.v2 → Aug15_21-16-54_t1v-n-40c2b24e-w-0/events.out.tfevents.1660598268.t1v-n-40c2b24e-w-0.1128999.0.v2} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1863a6bfac6afc7708d3bb6e53d4f1b1eaa7d05a3adeabb256ed2e600622e52
3
- size 1086215
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2edc48086840f9785d0b3426ead313735f851c7705e4c146b669b1b9fa321282
3
+ size 544983
training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 168001}
 
1
+ {"step": 84001}