yhavinga commited on
Commit
5ba1456
1 Parent(s): 4d72585

Update model

Browse files
README.md CHANGED
@@ -1 +1 @@
1
- See logs at https://wandb.ai/yepster/long-t5-local-small/runs/3c5gvwod?workspace=user-yepster
 
1
+ See logs at https://wandb.ai/yepster/long-t5-local-small/runs/37o59uad?workspace=user-yepster
eval_results.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "eval_accuracy": 0.6216245889663696,
3
- "eval_loss": 1.904692530632019
4
  }
 
1
  {
2
+ "eval_accuracy": 0.6356837749481201,
3
+ "eval_loss": 1.9501113891601562
4
  }
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95364c438eca009d45f05cf04a79d78d949c17c6855db8545983940388f205aa
3
  size 307750439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00bb5b987a1bba701e3d03578ceed23e976c4b9e22db5d095c12495abd5e3a09
3
  size 307750439
run_longt5-local-small-mc4.sh CHANGED
@@ -30,17 +30,17 @@ python ../train/run_t5_mlm_flax_pmap.py \
30
  --per_device_eval_batch_size="32" \
31
  --gradient_accumulation_steps="4" \
32
  --mean_noise_span_length="3" \
33
- --dtype="float32" \
 
34
  --optim="adafactor" \
35
  --learning_rate="0.005" \
36
  --lr_decay="linear" \
37
  --overwrite_output_dir \
38
- --num_train_epochs="4" \
39
  --logging_steps="20" \
40
  --save_steps="1000" \
41
  --eval_steps="1000" \
42
  --warmup_steps="300" \
43
- --validation_split_count="15000" \
44
  --wandb_project="long-t5-local-small" \
45
  --wandb_job_type="pmap"
46
 
 
30
  --per_device_eval_batch_size="32" \
31
  --gradient_accumulation_steps="4" \
32
  --mean_noise_span_length="3" \
33
+ --dtype="bfloat16" \
34
+ --z_loss="1e-4" \
35
  --optim="adafactor" \
36
  --learning_rate="0.005" \
37
  --lr_decay="linear" \
38
  --overwrite_output_dir \
39
+ --num_train_epochs="6" \
40
  --logging_steps="20" \
41
  --save_steps="1000" \
42
  --eval_steps="1000" \
43
  --warmup_steps="300" \
 
44
  --wandb_project="long-t5-local-small" \
45
  --wandb_job_type="pmap"
46
 
opt_state.msgpack → runs/Aug14_21-50-45_t1v-n-40c2b24e-w-0/events.out.tfevents.1660513900.t1v-n-40c2b24e-w-0.1025593.0.v2 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d175742112303b753a95793344147207e107480407df90ecb849e89cc28ace2
3
- size 308759699
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1863a6bfac6afc7708d3bb6e53d4f1b1eaa7d05a3adeabb256ed2e600622e52
3
+ size 1086215
training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 112001}
 
1
+ {"step": 168001}