dat commited on
Commit
b681451
1 Parent(s): cba305b

Saving weights and logs of step 90000

Browse files
checkpoint_90000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62d4bdd9a95f5039224ba4d25a6b9d4c04488dfbd282b54718cf5562822965a
3
+ size 1530270447
events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c96fa1e6828a98c51c01d533b269b18d5b997aab8e26c595124e60a983f3a911
3
- size 8991258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d384252cfc2c9551426c799e7901b66a0c7e2351dc2b43d1d2634656ab26a9
3
+ size 13503400
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5938f5a18f14894542ca37eeda9f7d5780b628c6b67cf598633785806d1ead78
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9dd04b40d879736851e06d01940a5427c1086eabab0e17bdc29ee5016e1f264
3
  size 510090043
run.sh CHANGED
@@ -15,17 +15,17 @@ python ./run_mlm_flax_no_accum.py \
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --logging_steps="50" \
18
- --eval_steps="20000" \
19
- --num_train_epochs="5" \
20
  --preprocessing_num_workers="96" \
21
- --save_steps="30000" \
22
  --learning_rate="3e-5" \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
25
  --save_total_limit="5"\
26
- --max_eval_samples="500"\
 
27
  #--gradient_accumulation_steps="4"\
28
- #--resume_from_checkpoint="./"\
29
  #--adafactor \
30
  #--dtype="bfloat16" \
31
 
 
15
  --adam_beta1="0.9" \
16
  --adam_beta2="0.98" \
17
  --logging_steps="50" \
18
+ --eval_steps="5000" \
19
+ --num_train_epochs="2"\
20
  --preprocessing_num_workers="96" \
21
+ --save_steps="10000" \
22
  --learning_rate="3e-5" \
23
  --per_device_train_batch_size="1" \
24
  --per_device_eval_batch_size="1" \
25
  --save_total_limit="5"\
26
+ --max_eval_samples="5000"\
27
+ --resume_from_checkpoint="./"\
28
  #--gradient_accumulation_steps="4"\
 
29
  #--adafactor \
30
  #--dtype="bfloat16" \
31
 
wandb/run-20210715_031107-69jkygz3/files/output.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210715_031107-69jkygz3/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"training_step": 60000, "learning_rate": 2.914472861448303e-05, "train_loss": 3.118450880050659, "_runtime": 24758, "_timestamp": 1626343425, "_step": 1202, "eval_step": 60000, "eval_accuracy": 0.4923423230648041, "eval_loss": 2.8991198539733887}
 
1
+ {"training_step": 90000, "learning_rate": 2.863156441890169e-05, "train_loss": 2.512800693511963, "_runtime": 36892, "_timestamp": 1626355559, "_step": 1803, "eval_step": 80000, "eval_accuracy": 0.533167839050293, "eval_loss": 2.5656967163085938}
wandb/run-20210715_031107-69jkygz3/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210715_031107-69jkygz3/run-69jkygz3.wandb CHANGED
Binary files a/wandb/run-20210715_031107-69jkygz3/run-69jkygz3.wandb and b/wandb/run-20210715_031107-69jkygz3/run-69jkygz3.wandb differ