UltimoUno commited on
Commit
ed239d5
1 Parent(s): 04eb43f

Uploaded checkpoint-25000

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +2 -2
  4. scheduler.pt +1 -1
  5. trainer_state.json +3511 -3
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f25348f631a7afff0ccbe2020c61db0f08c8827f34e53f5b9dcdcbef37584b11
3
  size 2836579040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d15715937ea965725264e52ec82a27c5a59d3e2767e72ea004c92451c7c82a2
3
  size 2836579040
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b904676d1025d219018d948b9f3cfe47993db4ed0d878f37b9b04923c19fc541
3
  size 5673376169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf292371fb20f79bd804dd72ad097ecec4ebbe12ed591d8a91a7c7a700c86e3b
3
  size 5673376169
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9de204c0067b7b80c3b397a5621bcbaed6c4c0e59bfc1ac45b0bfde553b2fcfa
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e65e950ab7222c5262501cd7b7e711ba567e3a80af84a6ae9728e309c1152a
3
+ size 14308
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ba7ec89473516394564e372c38d1919dcb8b87defe36067c8d0703ba252ef6a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b027b2afefb0b47380a0742dc93c54ef79a5f7b66b6bd192151dcb4007d1b136
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
  "eval_steps": 5000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14039,6 +14039,3514 @@
14039
  "eval_samples_per_second": 14.62,
14040
  "eval_steps_per_second": 14.62,
14041
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14042
  }
14043
  ],
14044
  "logging_steps": 10,
@@ -14046,7 +17554,7 @@
14046
  "num_input_tokens_seen": 0,
14047
  "num_train_epochs": 1,
14048
  "save_steps": 5000,
14049
- "total_flos": 3.227844083712e+17,
14050
  "train_batch_size": 1,
14051
  "trial_name": null,
14052
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.625,
5
  "eval_steps": 5000,
6
+ "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14039
  "eval_samples_per_second": 14.62,
14040
  "eval_steps_per_second": 14.62,
14041
  "step": 20000
14042
+ },
14043
+ {
14044
+ "epoch": 0.5,
14045
+ "grad_norm": 69.5,
14046
+ "learning_rate": 3.3864406779661014e-07,
14047
+ "loss": 1.5056,
14048
+ "step": 20010
14049
+ },
14050
+ {
14051
+ "epoch": 0.5,
14052
+ "grad_norm": 66.5,
14053
+ "learning_rate": 3.383050847457627e-07,
14054
+ "loss": 1.5009,
14055
+ "step": 20020
14056
+ },
14057
+ {
14058
+ "epoch": 0.5,
14059
+ "grad_norm": 69.5,
14060
+ "learning_rate": 3.3796610169491525e-07,
14061
+ "loss": 1.4073,
14062
+ "step": 20030
14063
+ },
14064
+ {
14065
+ "epoch": 0.5,
14066
+ "grad_norm": 63.25,
14067
+ "learning_rate": 3.376271186440678e-07,
14068
+ "loss": 1.4391,
14069
+ "step": 20040
14070
+ },
14071
+ {
14072
+ "epoch": 0.5,
14073
+ "grad_norm": 68.0,
14074
+ "learning_rate": 3.3728813559322035e-07,
14075
+ "loss": 1.4502,
14076
+ "step": 20050
14077
+ },
14078
+ {
14079
+ "epoch": 0.5,
14080
+ "grad_norm": 64.5,
14081
+ "learning_rate": 3.369491525423729e-07,
14082
+ "loss": 1.4458,
14083
+ "step": 20060
14084
+ },
14085
+ {
14086
+ "epoch": 0.5,
14087
+ "grad_norm": 68.0,
14088
+ "learning_rate": 3.366101694915254e-07,
14089
+ "loss": 1.4603,
14090
+ "step": 20070
14091
+ },
14092
+ {
14093
+ "epoch": 0.5,
14094
+ "grad_norm": 66.0,
14095
+ "learning_rate": 3.36271186440678e-07,
14096
+ "loss": 1.4461,
14097
+ "step": 20080
14098
+ },
14099
+ {
14100
+ "epoch": 0.5,
14101
+ "grad_norm": 67.0,
14102
+ "learning_rate": 3.3593220338983046e-07,
14103
+ "loss": 1.4921,
14104
+ "step": 20090
14105
+ },
14106
+ {
14107
+ "epoch": 0.5,
14108
+ "grad_norm": 64.0,
14109
+ "learning_rate": 3.35593220338983e-07,
14110
+ "loss": 1.4483,
14111
+ "step": 20100
14112
+ },
14113
+ {
14114
+ "epoch": 0.5,
14115
+ "grad_norm": 69.0,
14116
+ "learning_rate": 3.3525423728813557e-07,
14117
+ "loss": 1.4376,
14118
+ "step": 20110
14119
+ },
14120
+ {
14121
+ "epoch": 0.5,
14122
+ "grad_norm": 66.0,
14123
+ "learning_rate": 3.349152542372881e-07,
14124
+ "loss": 1.5029,
14125
+ "step": 20120
14126
+ },
14127
+ {
14128
+ "epoch": 0.5,
14129
+ "grad_norm": 67.5,
14130
+ "learning_rate": 3.345762711864407e-07,
14131
+ "loss": 1.4496,
14132
+ "step": 20130
14133
+ },
14134
+ {
14135
+ "epoch": 0.5,
14136
+ "grad_norm": 66.0,
14137
+ "learning_rate": 3.342372881355932e-07,
14138
+ "loss": 1.4555,
14139
+ "step": 20140
14140
+ },
14141
+ {
14142
+ "epoch": 0.5,
14143
+ "grad_norm": 65.0,
14144
+ "learning_rate": 3.3389830508474574e-07,
14145
+ "loss": 1.4612,
14146
+ "step": 20150
14147
+ },
14148
+ {
14149
+ "epoch": 0.5,
14150
+ "grad_norm": 64.5,
14151
+ "learning_rate": 3.335593220338983e-07,
14152
+ "loss": 1.4043,
14153
+ "step": 20160
14154
+ },
14155
+ {
14156
+ "epoch": 0.5,
14157
+ "grad_norm": 66.0,
14158
+ "learning_rate": 3.3322033898305085e-07,
14159
+ "loss": 1.4803,
14160
+ "step": 20170
14161
+ },
14162
+ {
14163
+ "epoch": 0.5,
14164
+ "grad_norm": 64.0,
14165
+ "learning_rate": 3.328813559322034e-07,
14166
+ "loss": 1.4877,
14167
+ "step": 20180
14168
+ },
14169
+ {
14170
+ "epoch": 0.5,
14171
+ "grad_norm": 63.5,
14172
+ "learning_rate": 3.325423728813559e-07,
14173
+ "loss": 1.4582,
14174
+ "step": 20190
14175
+ },
14176
+ {
14177
+ "epoch": 0.51,
14178
+ "grad_norm": 67.5,
14179
+ "learning_rate": 3.3220338983050843e-07,
14180
+ "loss": 1.4408,
14181
+ "step": 20200
14182
+ },
14183
+ {
14184
+ "epoch": 0.51,
14185
+ "grad_norm": 65.0,
14186
+ "learning_rate": 3.31864406779661e-07,
14187
+ "loss": 1.4368,
14188
+ "step": 20210
14189
+ },
14190
+ {
14191
+ "epoch": 0.51,
14192
+ "grad_norm": 66.5,
14193
+ "learning_rate": 3.3152542372881354e-07,
14194
+ "loss": 1.4513,
14195
+ "step": 20220
14196
+ },
14197
+ {
14198
+ "epoch": 0.51,
14199
+ "grad_norm": 65.0,
14200
+ "learning_rate": 3.3118644067796606e-07,
14201
+ "loss": 1.4667,
14202
+ "step": 20230
14203
+ },
14204
+ {
14205
+ "epoch": 0.51,
14206
+ "grad_norm": 70.5,
14207
+ "learning_rate": 3.3084745762711864e-07,
14208
+ "loss": 1.4526,
14209
+ "step": 20240
14210
+ },
14211
+ {
14212
+ "epoch": 0.51,
14213
+ "grad_norm": 67.5,
14214
+ "learning_rate": 3.3050847457627117e-07,
14215
+ "loss": 1.4661,
14216
+ "step": 20250
14217
+ },
14218
+ {
14219
+ "epoch": 0.51,
14220
+ "grad_norm": 67.0,
14221
+ "learning_rate": 3.3016949152542375e-07,
14222
+ "loss": 1.4909,
14223
+ "step": 20260
14224
+ },
14225
+ {
14226
+ "epoch": 0.51,
14227
+ "grad_norm": 65.5,
14228
+ "learning_rate": 3.298305084745763e-07,
14229
+ "loss": 1.4234,
14230
+ "step": 20270
14231
+ },
14232
+ {
14233
+ "epoch": 0.51,
14234
+ "grad_norm": 69.0,
14235
+ "learning_rate": 3.294915254237288e-07,
14236
+ "loss": 1.482,
14237
+ "step": 20280
14238
+ },
14239
+ {
14240
+ "epoch": 0.51,
14241
+ "grad_norm": 71.0,
14242
+ "learning_rate": 3.2915254237288134e-07,
14243
+ "loss": 1.4114,
14244
+ "step": 20290
14245
+ },
14246
+ {
14247
+ "epoch": 0.51,
14248
+ "grad_norm": 70.5,
14249
+ "learning_rate": 3.2881355932203386e-07,
14250
+ "loss": 1.4808,
14251
+ "step": 20300
14252
+ },
14253
+ {
14254
+ "epoch": 0.51,
14255
+ "grad_norm": 67.5,
14256
+ "learning_rate": 3.284745762711864e-07,
14257
+ "loss": 1.4945,
14258
+ "step": 20310
14259
+ },
14260
+ {
14261
+ "epoch": 0.51,
14262
+ "grad_norm": 66.5,
14263
+ "learning_rate": 3.2813559322033897e-07,
14264
+ "loss": 1.4487,
14265
+ "step": 20320
14266
+ },
14267
+ {
14268
+ "epoch": 0.51,
14269
+ "grad_norm": 65.0,
14270
+ "learning_rate": 3.277966101694915e-07,
14271
+ "loss": 1.4687,
14272
+ "step": 20330
14273
+ },
14274
+ {
14275
+ "epoch": 0.51,
14276
+ "grad_norm": 61.0,
14277
+ "learning_rate": 3.2745762711864403e-07,
14278
+ "loss": 1.4407,
14279
+ "step": 20340
14280
+ },
14281
+ {
14282
+ "epoch": 0.51,
14283
+ "grad_norm": 65.0,
14284
+ "learning_rate": 3.271186440677966e-07,
14285
+ "loss": 1.4604,
14286
+ "step": 20350
14287
+ },
14288
+ {
14289
+ "epoch": 0.51,
14290
+ "grad_norm": 65.0,
14291
+ "learning_rate": 3.2677966101694914e-07,
14292
+ "loss": 1.4424,
14293
+ "step": 20360
14294
+ },
14295
+ {
14296
+ "epoch": 0.51,
14297
+ "grad_norm": 67.5,
14298
+ "learning_rate": 3.264406779661017e-07,
14299
+ "loss": 1.4465,
14300
+ "step": 20370
14301
+ },
14302
+ {
14303
+ "epoch": 0.51,
14304
+ "grad_norm": 66.0,
14305
+ "learning_rate": 3.2610169491525424e-07,
14306
+ "loss": 1.4652,
14307
+ "step": 20380
14308
+ },
14309
+ {
14310
+ "epoch": 0.51,
14311
+ "grad_norm": 65.5,
14312
+ "learning_rate": 3.2576271186440677e-07,
14313
+ "loss": 1.436,
14314
+ "step": 20390
14315
+ },
14316
+ {
14317
+ "epoch": 0.51,
14318
+ "grad_norm": 67.5,
14319
+ "learning_rate": 3.254237288135593e-07,
14320
+ "loss": 1.4117,
14321
+ "step": 20400
14322
+ },
14323
+ {
14324
+ "epoch": 0.51,
14325
+ "grad_norm": 65.5,
14326
+ "learning_rate": 3.2508474576271183e-07,
14327
+ "loss": 1.4606,
14328
+ "step": 20410
14329
+ },
14330
+ {
14331
+ "epoch": 0.51,
14332
+ "grad_norm": 63.75,
14333
+ "learning_rate": 3.2474576271186435e-07,
14334
+ "loss": 1.4409,
14335
+ "step": 20420
14336
+ },
14337
+ {
14338
+ "epoch": 0.51,
14339
+ "grad_norm": 64.5,
14340
+ "learning_rate": 3.2440677966101694e-07,
14341
+ "loss": 1.4309,
14342
+ "step": 20430
14343
+ },
14344
+ {
14345
+ "epoch": 0.51,
14346
+ "grad_norm": 66.0,
14347
+ "learning_rate": 3.2406779661016946e-07,
14348
+ "loss": 1.4505,
14349
+ "step": 20440
14350
+ },
14351
+ {
14352
+ "epoch": 0.51,
14353
+ "grad_norm": 67.0,
14354
+ "learning_rate": 3.2372881355932204e-07,
14355
+ "loss": 1.4847,
14356
+ "step": 20450
14357
+ },
14358
+ {
14359
+ "epoch": 0.51,
14360
+ "grad_norm": 62.5,
14361
+ "learning_rate": 3.2338983050847457e-07,
14362
+ "loss": 1.4916,
14363
+ "step": 20460
14364
+ },
14365
+ {
14366
+ "epoch": 0.51,
14367
+ "grad_norm": 66.0,
14368
+ "learning_rate": 3.230508474576271e-07,
14369
+ "loss": 1.4501,
14370
+ "step": 20470
14371
+ },
14372
+ {
14373
+ "epoch": 0.51,
14374
+ "grad_norm": 66.5,
14375
+ "learning_rate": 3.227118644067797e-07,
14376
+ "loss": 1.4273,
14377
+ "step": 20480
14378
+ },
14379
+ {
14380
+ "epoch": 0.51,
14381
+ "grad_norm": 69.0,
14382
+ "learning_rate": 3.223728813559322e-07,
14383
+ "loss": 1.4616,
14384
+ "step": 20490
14385
+ },
14386
+ {
14387
+ "epoch": 0.51,
14388
+ "grad_norm": 67.5,
14389
+ "learning_rate": 3.220338983050847e-07,
14390
+ "loss": 1.4818,
14391
+ "step": 20500
14392
+ },
14393
+ {
14394
+ "epoch": 0.51,
14395
+ "grad_norm": 67.5,
14396
+ "learning_rate": 3.2169491525423726e-07,
14397
+ "loss": 1.468,
14398
+ "step": 20510
14399
+ },
14400
+ {
14401
+ "epoch": 0.51,
14402
+ "grad_norm": 66.0,
14403
+ "learning_rate": 3.213559322033898e-07,
14404
+ "loss": 1.5071,
14405
+ "step": 20520
14406
+ },
14407
+ {
14408
+ "epoch": 0.51,
14409
+ "grad_norm": 66.5,
14410
+ "learning_rate": 3.2101694915254237e-07,
14411
+ "loss": 1.4516,
14412
+ "step": 20530
14413
+ },
14414
+ {
14415
+ "epoch": 0.51,
14416
+ "grad_norm": 67.0,
14417
+ "learning_rate": 3.206779661016949e-07,
14418
+ "loss": 1.4563,
14419
+ "step": 20540
14420
+ },
14421
+ {
14422
+ "epoch": 0.51,
14423
+ "grad_norm": 70.5,
14424
+ "learning_rate": 3.203389830508474e-07,
14425
+ "loss": 1.4821,
14426
+ "step": 20550
14427
+ },
14428
+ {
14429
+ "epoch": 0.51,
14430
+ "grad_norm": 67.5,
14431
+ "learning_rate": 3.2e-07,
14432
+ "loss": 1.4666,
14433
+ "step": 20560
14434
+ },
14435
+ {
14436
+ "epoch": 0.51,
14437
+ "grad_norm": 65.5,
14438
+ "learning_rate": 3.1966101694915253e-07,
14439
+ "loss": 1.4145,
14440
+ "step": 20570
14441
+ },
14442
+ {
14443
+ "epoch": 0.51,
14444
+ "grad_norm": 62.75,
14445
+ "learning_rate": 3.1932203389830506e-07,
14446
+ "loss": 1.4483,
14447
+ "step": 20580
14448
+ },
14449
+ {
14450
+ "epoch": 0.51,
14451
+ "grad_norm": 67.5,
14452
+ "learning_rate": 3.1898305084745764e-07,
14453
+ "loss": 1.4599,
14454
+ "step": 20590
14455
+ },
14456
+ {
14457
+ "epoch": 0.52,
14458
+ "grad_norm": 66.0,
14459
+ "learning_rate": 3.186440677966101e-07,
14460
+ "loss": 1.4915,
14461
+ "step": 20600
14462
+ },
14463
+ {
14464
+ "epoch": 0.52,
14465
+ "grad_norm": 67.0,
14466
+ "learning_rate": 3.183050847457627e-07,
14467
+ "loss": 1.4934,
14468
+ "step": 20610
14469
+ },
14470
+ {
14471
+ "epoch": 0.52,
14472
+ "grad_norm": 70.0,
14473
+ "learning_rate": 3.179661016949152e-07,
14474
+ "loss": 1.4333,
14475
+ "step": 20620
14476
+ },
14477
+ {
14478
+ "epoch": 0.52,
14479
+ "grad_norm": 64.5,
14480
+ "learning_rate": 3.1762711864406775e-07,
14481
+ "loss": 1.5158,
14482
+ "step": 20630
14483
+ },
14484
+ {
14485
+ "epoch": 0.52,
14486
+ "grad_norm": 68.5,
14487
+ "learning_rate": 3.1728813559322033e-07,
14488
+ "loss": 1.426,
14489
+ "step": 20640
14490
+ },
14491
+ {
14492
+ "epoch": 0.52,
14493
+ "grad_norm": 69.5,
14494
+ "learning_rate": 3.1694915254237286e-07,
14495
+ "loss": 1.448,
14496
+ "step": 20650
14497
+ },
14498
+ {
14499
+ "epoch": 0.52,
14500
+ "grad_norm": 64.5,
14501
+ "learning_rate": 3.166101694915254e-07,
14502
+ "loss": 1.4542,
14503
+ "step": 20660
14504
+ },
14505
+ {
14506
+ "epoch": 0.52,
14507
+ "grad_norm": 65.5,
14508
+ "learning_rate": 3.1627118644067797e-07,
14509
+ "loss": 1.4202,
14510
+ "step": 20670
14511
+ },
14512
+ {
14513
+ "epoch": 0.52,
14514
+ "grad_norm": 67.0,
14515
+ "learning_rate": 3.159322033898305e-07,
14516
+ "loss": 1.4718,
14517
+ "step": 20680
14518
+ },
14519
+ {
14520
+ "epoch": 0.52,
14521
+ "grad_norm": 71.0,
14522
+ "learning_rate": 3.155932203389831e-07,
14523
+ "loss": 1.5071,
14524
+ "step": 20690
14525
+ },
14526
+ {
14527
+ "epoch": 0.52,
14528
+ "grad_norm": 68.0,
14529
+ "learning_rate": 3.152542372881356e-07,
14530
+ "loss": 1.4972,
14531
+ "step": 20700
14532
+ },
14533
+ {
14534
+ "epoch": 0.52,
14535
+ "grad_norm": 67.0,
14536
+ "learning_rate": 3.149152542372881e-07,
14537
+ "loss": 1.4734,
14538
+ "step": 20710
14539
+ },
14540
+ {
14541
+ "epoch": 0.52,
14542
+ "grad_norm": 67.5,
14543
+ "learning_rate": 3.1457627118644066e-07,
14544
+ "loss": 1.5047,
14545
+ "step": 20720
14546
+ },
14547
+ {
14548
+ "epoch": 0.52,
14549
+ "grad_norm": 70.0,
14550
+ "learning_rate": 3.142372881355932e-07,
14551
+ "loss": 1.5067,
14552
+ "step": 20730
14553
+ },
14554
+ {
14555
+ "epoch": 0.52,
14556
+ "grad_norm": 67.0,
14557
+ "learning_rate": 3.138983050847457e-07,
14558
+ "loss": 1.4898,
14559
+ "step": 20740
14560
+ },
14561
+ {
14562
+ "epoch": 0.52,
14563
+ "grad_norm": 65.5,
14564
+ "learning_rate": 3.135593220338983e-07,
14565
+ "loss": 1.499,
14566
+ "step": 20750
14567
+ },
14568
+ {
14569
+ "epoch": 0.52,
14570
+ "grad_norm": 74.0,
14571
+ "learning_rate": 3.132203389830508e-07,
14572
+ "loss": 1.4741,
14573
+ "step": 20760
14574
+ },
14575
+ {
14576
+ "epoch": 0.52,
14577
+ "grad_norm": 66.5,
14578
+ "learning_rate": 3.128813559322034e-07,
14579
+ "loss": 1.4807,
14580
+ "step": 20770
14581
+ },
14582
+ {
14583
+ "epoch": 0.52,
14584
+ "grad_norm": 68.0,
14585
+ "learning_rate": 3.1254237288135593e-07,
14586
+ "loss": 1.439,
14587
+ "step": 20780
14588
+ },
14589
+ {
14590
+ "epoch": 0.52,
14591
+ "grad_norm": 67.0,
14592
+ "learning_rate": 3.1220338983050846e-07,
14593
+ "loss": 1.4547,
14594
+ "step": 20790
14595
+ },
14596
+ {
14597
+ "epoch": 0.52,
14598
+ "grad_norm": 66.0,
14599
+ "learning_rate": 3.1186440677966104e-07,
14600
+ "loss": 1.5078,
14601
+ "step": 20800
14602
+ },
14603
+ {
14604
+ "epoch": 0.52,
14605
+ "grad_norm": 67.0,
14606
+ "learning_rate": 3.115254237288135e-07,
14607
+ "loss": 1.4719,
14608
+ "step": 20810
14609
+ },
14610
+ {
14611
+ "epoch": 0.52,
14612
+ "grad_norm": 65.5,
14613
+ "learning_rate": 3.1118644067796604e-07,
14614
+ "loss": 1.3959,
14615
+ "step": 20820
14616
+ },
14617
+ {
14618
+ "epoch": 0.52,
14619
+ "grad_norm": 63.0,
14620
+ "learning_rate": 3.108474576271186e-07,
14621
+ "loss": 1.467,
14622
+ "step": 20830
14623
+ },
14624
+ {
14625
+ "epoch": 0.52,
14626
+ "grad_norm": 67.5,
14627
+ "learning_rate": 3.1050847457627115e-07,
14628
+ "loss": 1.4718,
14629
+ "step": 20840
14630
+ },
14631
+ {
14632
+ "epoch": 0.52,
14633
+ "grad_norm": 69.0,
14634
+ "learning_rate": 3.1016949152542373e-07,
14635
+ "loss": 1.4672,
14636
+ "step": 20850
14637
+ },
14638
+ {
14639
+ "epoch": 0.52,
14640
+ "grad_norm": 66.5,
14641
+ "learning_rate": 3.0983050847457626e-07,
14642
+ "loss": 1.5616,
14643
+ "step": 20860
14644
+ },
14645
+ {
14646
+ "epoch": 0.52,
14647
+ "grad_norm": 69.5,
14648
+ "learning_rate": 3.094915254237288e-07,
14649
+ "loss": 1.494,
14650
+ "step": 20870
14651
+ },
14652
+ {
14653
+ "epoch": 0.52,
14654
+ "grad_norm": 67.0,
14655
+ "learning_rate": 3.0915254237288137e-07,
14656
+ "loss": 1.5186,
14657
+ "step": 20880
14658
+ },
14659
+ {
14660
+ "epoch": 0.52,
14661
+ "grad_norm": 70.5,
14662
+ "learning_rate": 3.088135593220339e-07,
14663
+ "loss": 1.488,
14664
+ "step": 20890
14665
+ },
14666
+ {
14667
+ "epoch": 0.52,
14668
+ "grad_norm": 68.0,
14669
+ "learning_rate": 3.084745762711864e-07,
14670
+ "loss": 1.4227,
14671
+ "step": 20900
14672
+ },
14673
+ {
14674
+ "epoch": 0.52,
14675
+ "grad_norm": 63.5,
14676
+ "learning_rate": 3.08135593220339e-07,
14677
+ "loss": 1.432,
14678
+ "step": 20910
14679
+ },
14680
+ {
14681
+ "epoch": 0.52,
14682
+ "grad_norm": 66.5,
14683
+ "learning_rate": 3.077966101694915e-07,
14684
+ "loss": 1.4613,
14685
+ "step": 20920
14686
+ },
14687
+ {
14688
+ "epoch": 0.52,
14689
+ "grad_norm": 68.0,
14690
+ "learning_rate": 3.0745762711864406e-07,
14691
+ "loss": 1.465,
14692
+ "step": 20930
14693
+ },
14694
+ {
14695
+ "epoch": 0.52,
14696
+ "grad_norm": 66.0,
14697
+ "learning_rate": 3.071186440677966e-07,
14698
+ "loss": 1.4968,
14699
+ "step": 20940
14700
+ },
14701
+ {
14702
+ "epoch": 0.52,
14703
+ "grad_norm": 71.0,
14704
+ "learning_rate": 3.067796610169491e-07,
14705
+ "loss": 1.4866,
14706
+ "step": 20950
14707
+ },
14708
+ {
14709
+ "epoch": 0.52,
14710
+ "grad_norm": 65.5,
14711
+ "learning_rate": 3.064406779661017e-07,
14712
+ "loss": 1.5076,
14713
+ "step": 20960
14714
+ },
14715
+ {
14716
+ "epoch": 0.52,
14717
+ "grad_norm": 67.5,
14718
+ "learning_rate": 3.061016949152542e-07,
14719
+ "loss": 1.4341,
14720
+ "step": 20970
14721
+ },
14722
+ {
14723
+ "epoch": 0.52,
14724
+ "grad_norm": 66.5,
14725
+ "learning_rate": 3.0576271186440675e-07,
14726
+ "loss": 1.4862,
14727
+ "step": 20980
14728
+ },
14729
+ {
14730
+ "epoch": 0.52,
14731
+ "grad_norm": 67.0,
14732
+ "learning_rate": 3.0542372881355933e-07,
14733
+ "loss": 1.5154,
14734
+ "step": 20990
14735
+ },
14736
+ {
14737
+ "epoch": 0.53,
14738
+ "grad_norm": 68.0,
14739
+ "learning_rate": 3.0508474576271186e-07,
14740
+ "loss": 1.4475,
14741
+ "step": 21000
14742
+ },
14743
+ {
14744
+ "epoch": 0.53,
14745
+ "grad_norm": 64.5,
14746
+ "learning_rate": 3.0474576271186444e-07,
14747
+ "loss": 1.489,
14748
+ "step": 21010
14749
+ },
14750
+ {
14751
+ "epoch": 0.53,
14752
+ "grad_norm": 73.0,
14753
+ "learning_rate": 3.044067796610169e-07,
14754
+ "loss": 1.4482,
14755
+ "step": 21020
14756
+ },
14757
+ {
14758
+ "epoch": 0.53,
14759
+ "grad_norm": 66.5,
14760
+ "learning_rate": 3.0406779661016944e-07,
14761
+ "loss": 1.5017,
14762
+ "step": 21030
14763
+ },
14764
+ {
14765
+ "epoch": 0.53,
14766
+ "grad_norm": 65.5,
14767
+ "learning_rate": 3.03728813559322e-07,
14768
+ "loss": 1.4573,
14769
+ "step": 21040
14770
+ },
14771
+ {
14772
+ "epoch": 0.53,
14773
+ "grad_norm": 66.5,
14774
+ "learning_rate": 3.0338983050847455e-07,
14775
+ "loss": 1.4509,
14776
+ "step": 21050
14777
+ },
14778
+ {
14779
+ "epoch": 0.53,
14780
+ "grad_norm": 71.0,
14781
+ "learning_rate": 3.030508474576271e-07,
14782
+ "loss": 1.4765,
14783
+ "step": 21060
14784
+ },
14785
+ {
14786
+ "epoch": 0.53,
14787
+ "grad_norm": 65.0,
14788
+ "learning_rate": 3.0271186440677966e-07,
14789
+ "loss": 1.4637,
14790
+ "step": 21070
14791
+ },
14792
+ {
14793
+ "epoch": 0.53,
14794
+ "grad_norm": 71.5,
14795
+ "learning_rate": 3.023728813559322e-07,
14796
+ "loss": 1.4916,
14797
+ "step": 21080
14798
+ },
14799
+ {
14800
+ "epoch": 0.53,
14801
+ "grad_norm": 68.0,
14802
+ "learning_rate": 3.0203389830508477e-07,
14803
+ "loss": 1.5076,
14804
+ "step": 21090
14805
+ },
14806
+ {
14807
+ "epoch": 0.53,
14808
+ "grad_norm": 68.0,
14809
+ "learning_rate": 3.016949152542373e-07,
14810
+ "loss": 1.4416,
14811
+ "step": 21100
14812
+ },
14813
+ {
14814
+ "epoch": 0.53,
14815
+ "grad_norm": 66.0,
14816
+ "learning_rate": 3.013559322033898e-07,
14817
+ "loss": 1.4528,
14818
+ "step": 21110
14819
+ },
14820
+ {
14821
+ "epoch": 0.53,
14822
+ "grad_norm": 67.0,
14823
+ "learning_rate": 3.0101694915254235e-07,
14824
+ "loss": 1.4604,
14825
+ "step": 21120
14826
+ },
14827
+ {
14828
+ "epoch": 0.53,
14829
+ "grad_norm": 67.5,
14830
+ "learning_rate": 3.006779661016949e-07,
14831
+ "loss": 1.5248,
14832
+ "step": 21130
14833
+ },
14834
+ {
14835
+ "epoch": 0.53,
14836
+ "grad_norm": 68.5,
14837
+ "learning_rate": 3.003389830508474e-07,
14838
+ "loss": 1.5132,
14839
+ "step": 21140
14840
+ },
14841
+ {
14842
+ "epoch": 0.53,
14843
+ "grad_norm": 65.5,
14844
+ "learning_rate": 3e-07,
14845
+ "loss": 1.4708,
14846
+ "step": 21150
14847
+ },
14848
+ {
14849
+ "epoch": 0.53,
14850
+ "grad_norm": 65.0,
14851
+ "learning_rate": 2.996610169491525e-07,
14852
+ "loss": 1.4542,
14853
+ "step": 21160
14854
+ },
14855
+ {
14856
+ "epoch": 0.53,
14857
+ "grad_norm": 68.5,
14858
+ "learning_rate": 2.993220338983051e-07,
14859
+ "loss": 1.4819,
14860
+ "step": 21170
14861
+ },
14862
+ {
14863
+ "epoch": 0.53,
14864
+ "grad_norm": 66.0,
14865
+ "learning_rate": 2.989830508474576e-07,
14866
+ "loss": 1.4844,
14867
+ "step": 21180
14868
+ },
14869
+ {
14870
+ "epoch": 0.53,
14871
+ "grad_norm": 69.0,
14872
+ "learning_rate": 2.9864406779661015e-07,
14873
+ "loss": 1.4743,
14874
+ "step": 21190
14875
+ },
14876
+ {
14877
+ "epoch": 0.53,
14878
+ "grad_norm": 65.5,
14879
+ "learning_rate": 2.9830508474576273e-07,
14880
+ "loss": 1.4118,
14881
+ "step": 21200
14882
+ },
14883
+ {
14884
+ "epoch": 0.53,
14885
+ "grad_norm": 68.0,
14886
+ "learning_rate": 2.9796610169491526e-07,
14887
+ "loss": 1.4716,
14888
+ "step": 21210
14889
+ },
14890
+ {
14891
+ "epoch": 0.53,
14892
+ "grad_norm": 64.5,
14893
+ "learning_rate": 2.976271186440678e-07,
14894
+ "loss": 1.469,
14895
+ "step": 21220
14896
+ },
14897
+ {
14898
+ "epoch": 0.53,
14899
+ "grad_norm": 66.5,
14900
+ "learning_rate": 2.972881355932203e-07,
14901
+ "loss": 1.4809,
14902
+ "step": 21230
14903
+ },
14904
+ {
14905
+ "epoch": 0.53,
14906
+ "grad_norm": 64.5,
14907
+ "learning_rate": 2.9694915254237284e-07,
14908
+ "loss": 1.4595,
14909
+ "step": 21240
14910
+ },
14911
+ {
14912
+ "epoch": 0.53,
14913
+ "grad_norm": 73.0,
14914
+ "learning_rate": 2.966101694915254e-07,
14915
+ "loss": 1.4719,
14916
+ "step": 21250
14917
+ },
14918
+ {
14919
+ "epoch": 0.53,
14920
+ "grad_norm": 66.0,
14921
+ "learning_rate": 2.9627118644067795e-07,
14922
+ "loss": 1.4414,
14923
+ "step": 21260
14924
+ },
14925
+ {
14926
+ "epoch": 0.53,
14927
+ "grad_norm": 63.25,
14928
+ "learning_rate": 2.959322033898305e-07,
14929
+ "loss": 1.4531,
14930
+ "step": 21270
14931
+ },
14932
+ {
14933
+ "epoch": 0.53,
14934
+ "grad_norm": 65.0,
14935
+ "learning_rate": 2.9559322033898306e-07,
14936
+ "loss": 1.4457,
14937
+ "step": 21280
14938
+ },
14939
+ {
14940
+ "epoch": 0.53,
14941
+ "grad_norm": 67.5,
14942
+ "learning_rate": 2.952542372881356e-07,
14943
+ "loss": 1.4854,
14944
+ "step": 21290
14945
+ },
14946
+ {
14947
+ "epoch": 0.53,
14948
+ "grad_norm": 67.0,
14949
+ "learning_rate": 2.949152542372881e-07,
14950
+ "loss": 1.4871,
14951
+ "step": 21300
14952
+ },
14953
+ {
14954
+ "epoch": 0.53,
14955
+ "grad_norm": 68.0,
14956
+ "learning_rate": 2.945762711864407e-07,
14957
+ "loss": 1.4342,
14958
+ "step": 21310
14959
+ },
14960
+ {
14961
+ "epoch": 0.53,
14962
+ "grad_norm": 65.0,
14963
+ "learning_rate": 2.942372881355932e-07,
14964
+ "loss": 1.4727,
14965
+ "step": 21320
14966
+ },
14967
+ {
14968
+ "epoch": 0.53,
14969
+ "grad_norm": 71.0,
14970
+ "learning_rate": 2.9389830508474575e-07,
14971
+ "loss": 1.4878,
14972
+ "step": 21330
14973
+ },
14974
+ {
14975
+ "epoch": 0.53,
14976
+ "grad_norm": 69.0,
14977
+ "learning_rate": 2.935593220338983e-07,
14978
+ "loss": 1.4561,
14979
+ "step": 21340
14980
+ },
14981
+ {
14982
+ "epoch": 0.53,
14983
+ "grad_norm": 69.0,
14984
+ "learning_rate": 2.932203389830508e-07,
14985
+ "loss": 1.414,
14986
+ "step": 21350
14987
+ },
14988
+ {
14989
+ "epoch": 0.53,
14990
+ "grad_norm": 66.5,
14991
+ "learning_rate": 2.928813559322034e-07,
14992
+ "loss": 1.436,
14993
+ "step": 21360
14994
+ },
14995
+ {
14996
+ "epoch": 0.53,
14997
+ "grad_norm": 69.5,
14998
+ "learning_rate": 2.925423728813559e-07,
14999
+ "loss": 1.4707,
15000
+ "step": 21370
15001
+ },
15002
+ {
15003
+ "epoch": 0.53,
15004
+ "grad_norm": 66.5,
15005
+ "learning_rate": 2.9220338983050844e-07,
15006
+ "loss": 1.4839,
15007
+ "step": 21380
15008
+ },
15009
+ {
15010
+ "epoch": 0.53,
15011
+ "grad_norm": 70.5,
15012
+ "learning_rate": 2.91864406779661e-07,
15013
+ "loss": 1.4895,
15014
+ "step": 21390
15015
+ },
15016
+ {
15017
+ "epoch": 0.54,
15018
+ "grad_norm": 67.0,
15019
+ "learning_rate": 2.9152542372881355e-07,
15020
+ "loss": 1.4373,
15021
+ "step": 21400
15022
+ },
15023
+ {
15024
+ "epoch": 0.54,
15025
+ "grad_norm": 65.0,
15026
+ "learning_rate": 2.9118644067796613e-07,
15027
+ "loss": 1.3779,
15028
+ "step": 21410
15029
+ },
15030
+ {
15031
+ "epoch": 0.54,
15032
+ "grad_norm": 66.5,
15033
+ "learning_rate": 2.9084745762711866e-07,
15034
+ "loss": 1.4942,
15035
+ "step": 21420
15036
+ },
15037
+ {
15038
+ "epoch": 0.54,
15039
+ "grad_norm": 67.0,
15040
+ "learning_rate": 2.905084745762712e-07,
15041
+ "loss": 1.4563,
15042
+ "step": 21430
15043
+ },
15044
+ {
15045
+ "epoch": 0.54,
15046
+ "grad_norm": 63.25,
15047
+ "learning_rate": 2.901694915254237e-07,
15048
+ "loss": 1.4411,
15049
+ "step": 21440
15050
+ },
15051
+ {
15052
+ "epoch": 0.54,
15053
+ "grad_norm": 68.5,
15054
+ "learning_rate": 2.8983050847457624e-07,
15055
+ "loss": 1.4222,
15056
+ "step": 21450
15057
+ },
15058
+ {
15059
+ "epoch": 0.54,
15060
+ "grad_norm": 65.5,
15061
+ "learning_rate": 2.8949152542372877e-07,
15062
+ "loss": 1.4217,
15063
+ "step": 21460
15064
+ },
15065
+ {
15066
+ "epoch": 0.54,
15067
+ "grad_norm": 68.0,
15068
+ "learning_rate": 2.8915254237288135e-07,
15069
+ "loss": 1.4356,
15070
+ "step": 21470
15071
+ },
15072
+ {
15073
+ "epoch": 0.54,
15074
+ "grad_norm": 67.0,
15075
+ "learning_rate": 2.888135593220339e-07,
15076
+ "loss": 1.477,
15077
+ "step": 21480
15078
+ },
15079
+ {
15080
+ "epoch": 0.54,
15081
+ "grad_norm": 62.5,
15082
+ "learning_rate": 2.8847457627118646e-07,
15083
+ "loss": 1.4594,
15084
+ "step": 21490
15085
+ },
15086
+ {
15087
+ "epoch": 0.54,
15088
+ "grad_norm": 65.5,
15089
+ "learning_rate": 2.88135593220339e-07,
15090
+ "loss": 1.4453,
15091
+ "step": 21500
15092
+ },
15093
+ {
15094
+ "epoch": 0.54,
15095
+ "grad_norm": 62.5,
15096
+ "learning_rate": 2.877966101694915e-07,
15097
+ "loss": 1.4637,
15098
+ "step": 21510
15099
+ },
15100
+ {
15101
+ "epoch": 0.54,
15102
+ "grad_norm": 69.5,
15103
+ "learning_rate": 2.874576271186441e-07,
15104
+ "loss": 1.4335,
15105
+ "step": 21520
15106
+ },
15107
+ {
15108
+ "epoch": 0.54,
15109
+ "grad_norm": 68.0,
15110
+ "learning_rate": 2.871186440677966e-07,
15111
+ "loss": 1.4404,
15112
+ "step": 21530
15113
+ },
15114
+ {
15115
+ "epoch": 0.54,
15116
+ "grad_norm": 66.5,
15117
+ "learning_rate": 2.867796610169491e-07,
15118
+ "loss": 1.489,
15119
+ "step": 21540
15120
+ },
15121
+ {
15122
+ "epoch": 0.54,
15123
+ "grad_norm": 64.0,
15124
+ "learning_rate": 2.864406779661017e-07,
15125
+ "loss": 1.4628,
15126
+ "step": 21550
15127
+ },
15128
+ {
15129
+ "epoch": 0.54,
15130
+ "grad_norm": 65.0,
15131
+ "learning_rate": 2.861016949152542e-07,
15132
+ "loss": 1.4929,
15133
+ "step": 21560
15134
+ },
15135
+ {
15136
+ "epoch": 0.54,
15137
+ "grad_norm": 68.5,
15138
+ "learning_rate": 2.857627118644068e-07,
15139
+ "loss": 1.4815,
15140
+ "step": 21570
15141
+ },
15142
+ {
15143
+ "epoch": 0.54,
15144
+ "grad_norm": 66.0,
15145
+ "learning_rate": 2.854237288135593e-07,
15146
+ "loss": 1.4563,
15147
+ "step": 21580
15148
+ },
15149
+ {
15150
+ "epoch": 0.54,
15151
+ "grad_norm": 67.5,
15152
+ "learning_rate": 2.8508474576271184e-07,
15153
+ "loss": 1.4754,
15154
+ "step": 21590
15155
+ },
15156
+ {
15157
+ "epoch": 0.54,
15158
+ "grad_norm": 67.0,
15159
+ "learning_rate": 2.847457627118644e-07,
15160
+ "loss": 1.5266,
15161
+ "step": 21600
15162
+ },
15163
+ {
15164
+ "epoch": 0.54,
15165
+ "grad_norm": 65.0,
15166
+ "learning_rate": 2.8440677966101695e-07,
15167
+ "loss": 1.4389,
15168
+ "step": 21610
15169
+ },
15170
+ {
15171
+ "epoch": 0.54,
15172
+ "grad_norm": 66.5,
15173
+ "learning_rate": 2.840677966101695e-07,
15174
+ "loss": 1.4256,
15175
+ "step": 21620
15176
+ },
15177
+ {
15178
+ "epoch": 0.54,
15179
+ "grad_norm": 66.0,
15180
+ "learning_rate": 2.8372881355932206e-07,
15181
+ "loss": 1.4931,
15182
+ "step": 21630
15183
+ },
15184
+ {
15185
+ "epoch": 0.54,
15186
+ "grad_norm": 65.5,
15187
+ "learning_rate": 2.8338983050847453e-07,
15188
+ "loss": 1.5427,
15189
+ "step": 21640
15190
+ },
15191
+ {
15192
+ "epoch": 0.54,
15193
+ "grad_norm": 66.5,
15194
+ "learning_rate": 2.8305084745762706e-07,
15195
+ "loss": 1.4232,
15196
+ "step": 21650
15197
+ },
15198
+ {
15199
+ "epoch": 0.54,
15200
+ "grad_norm": 66.0,
15201
+ "learning_rate": 2.8271186440677964e-07,
15202
+ "loss": 1.484,
15203
+ "step": 21660
15204
+ },
15205
+ {
15206
+ "epoch": 0.54,
15207
+ "grad_norm": 67.0,
15208
+ "learning_rate": 2.8237288135593217e-07,
15209
+ "loss": 1.4328,
15210
+ "step": 21670
15211
+ },
15212
+ {
15213
+ "epoch": 0.54,
15214
+ "grad_norm": 66.5,
15215
+ "learning_rate": 2.8203389830508475e-07,
15216
+ "loss": 1.4981,
15217
+ "step": 21680
15218
+ },
15219
+ {
15220
+ "epoch": 0.54,
15221
+ "grad_norm": 68.5,
15222
+ "learning_rate": 2.816949152542373e-07,
15223
+ "loss": 1.4774,
15224
+ "step": 21690
15225
+ },
15226
+ {
15227
+ "epoch": 0.54,
15228
+ "grad_norm": 66.5,
15229
+ "learning_rate": 2.813559322033898e-07,
15230
+ "loss": 1.4202,
15231
+ "step": 21700
15232
+ },
15233
+ {
15234
+ "epoch": 0.54,
15235
+ "grad_norm": 66.5,
15236
+ "learning_rate": 2.810169491525424e-07,
15237
+ "loss": 1.4588,
15238
+ "step": 21710
15239
+ },
15240
+ {
15241
+ "epoch": 0.54,
15242
+ "grad_norm": 69.5,
15243
+ "learning_rate": 2.806779661016949e-07,
15244
+ "loss": 1.5449,
15245
+ "step": 21720
15246
+ },
15247
+ {
15248
+ "epoch": 0.54,
15249
+ "grad_norm": 70.5,
15250
+ "learning_rate": 2.803389830508475e-07,
15251
+ "loss": 1.5002,
15252
+ "step": 21730
15253
+ },
15254
+ {
15255
+ "epoch": 0.54,
15256
+ "grad_norm": 68.0,
15257
+ "learning_rate": 2.8e-07,
15258
+ "loss": 1.463,
15259
+ "step": 21740
15260
+ },
15261
+ {
15262
+ "epoch": 0.54,
15263
+ "grad_norm": 66.5,
15264
+ "learning_rate": 2.796610169491525e-07,
15265
+ "loss": 1.467,
15266
+ "step": 21750
15267
+ },
15268
+ {
15269
+ "epoch": 0.54,
15270
+ "grad_norm": 69.5,
15271
+ "learning_rate": 2.793220338983051e-07,
15272
+ "loss": 1.4127,
15273
+ "step": 21760
15274
+ },
15275
+ {
15276
+ "epoch": 0.54,
15277
+ "grad_norm": 71.5,
15278
+ "learning_rate": 2.789830508474576e-07,
15279
+ "loss": 1.493,
15280
+ "step": 21770
15281
+ },
15282
+ {
15283
+ "epoch": 0.54,
15284
+ "grad_norm": 64.0,
15285
+ "learning_rate": 2.7864406779661013e-07,
15286
+ "loss": 1.4866,
15287
+ "step": 21780
15288
+ },
15289
+ {
15290
+ "epoch": 0.54,
15291
+ "grad_norm": 69.0,
15292
+ "learning_rate": 2.783050847457627e-07,
15293
+ "loss": 1.5279,
15294
+ "step": 21790
15295
+ },
15296
+ {
15297
+ "epoch": 0.55,
15298
+ "grad_norm": 69.0,
15299
+ "learning_rate": 2.7796610169491524e-07,
15300
+ "loss": 1.5252,
15301
+ "step": 21800
15302
+ },
15303
+ {
15304
+ "epoch": 0.55,
15305
+ "grad_norm": 67.0,
15306
+ "learning_rate": 2.776271186440678e-07,
15307
+ "loss": 1.4794,
15308
+ "step": 21810
15309
+ },
15310
+ {
15311
+ "epoch": 0.55,
15312
+ "grad_norm": 67.5,
15313
+ "learning_rate": 2.7728813559322035e-07,
15314
+ "loss": 1.4078,
15315
+ "step": 21820
15316
+ },
15317
+ {
15318
+ "epoch": 0.55,
15319
+ "grad_norm": 69.0,
15320
+ "learning_rate": 2.769491525423729e-07,
15321
+ "loss": 1.4782,
15322
+ "step": 21830
15323
+ },
15324
+ {
15325
+ "epoch": 0.55,
15326
+ "grad_norm": 70.5,
15327
+ "learning_rate": 2.7661016949152545e-07,
15328
+ "loss": 1.4411,
15329
+ "step": 21840
15330
+ },
15331
+ {
15332
+ "epoch": 0.55,
15333
+ "grad_norm": 63.75,
15334
+ "learning_rate": 2.7627118644067793e-07,
15335
+ "loss": 1.393,
15336
+ "step": 21850
15337
+ },
15338
+ {
15339
+ "epoch": 0.55,
15340
+ "grad_norm": 67.0,
15341
+ "learning_rate": 2.7593220338983046e-07,
15342
+ "loss": 1.4784,
15343
+ "step": 21860
15344
+ },
15345
+ {
15346
+ "epoch": 0.55,
15347
+ "grad_norm": 68.0,
15348
+ "learning_rate": 2.7559322033898304e-07,
15349
+ "loss": 1.4012,
15350
+ "step": 21870
15351
+ },
15352
+ {
15353
+ "epoch": 0.55,
15354
+ "grad_norm": 65.5,
15355
+ "learning_rate": 2.7525423728813557e-07,
15356
+ "loss": 1.477,
15357
+ "step": 21880
15358
+ },
15359
+ {
15360
+ "epoch": 0.55,
15361
+ "grad_norm": 66.5,
15362
+ "learning_rate": 2.749152542372881e-07,
15363
+ "loss": 1.5067,
15364
+ "step": 21890
15365
+ },
15366
+ {
15367
+ "epoch": 0.55,
15368
+ "grad_norm": 67.5,
15369
+ "learning_rate": 2.745762711864407e-07,
15370
+ "loss": 1.4631,
15371
+ "step": 21900
15372
+ },
15373
+ {
15374
+ "epoch": 0.55,
15375
+ "grad_norm": 66.5,
15376
+ "learning_rate": 2.742372881355932e-07,
15377
+ "loss": 1.5257,
15378
+ "step": 21910
15379
+ },
15380
+ {
15381
+ "epoch": 0.55,
15382
+ "grad_norm": 67.5,
15383
+ "learning_rate": 2.738983050847458e-07,
15384
+ "loss": 1.4561,
15385
+ "step": 21920
15386
+ },
15387
+ {
15388
+ "epoch": 0.55,
15389
+ "grad_norm": 65.5,
15390
+ "learning_rate": 2.735593220338983e-07,
15391
+ "loss": 1.4773,
15392
+ "step": 21930
15393
+ },
15394
+ {
15395
+ "epoch": 0.55,
15396
+ "grad_norm": 67.5,
15397
+ "learning_rate": 2.7322033898305084e-07,
15398
+ "loss": 1.4732,
15399
+ "step": 21940
15400
+ },
15401
+ {
15402
+ "epoch": 0.55,
15403
+ "grad_norm": 66.0,
15404
+ "learning_rate": 2.7288135593220336e-07,
15405
+ "loss": 1.4743,
15406
+ "step": 21950
15407
+ },
15408
+ {
15409
+ "epoch": 0.55,
15410
+ "grad_norm": 67.0,
15411
+ "learning_rate": 2.725423728813559e-07,
15412
+ "loss": 1.4624,
15413
+ "step": 21960
15414
+ },
15415
+ {
15416
+ "epoch": 0.55,
15417
+ "grad_norm": 63.5,
15418
+ "learning_rate": 2.722033898305084e-07,
15419
+ "loss": 1.4451,
15420
+ "step": 21970
15421
+ },
15422
+ {
15423
+ "epoch": 0.55,
15424
+ "grad_norm": 64.5,
15425
+ "learning_rate": 2.71864406779661e-07,
15426
+ "loss": 1.4502,
15427
+ "step": 21980
15428
+ },
15429
+ {
15430
+ "epoch": 0.55,
15431
+ "grad_norm": 68.0,
15432
+ "learning_rate": 2.7152542372881353e-07,
15433
+ "loss": 1.4902,
15434
+ "step": 21990
15435
+ },
15436
+ {
15437
+ "epoch": 0.55,
15438
+ "grad_norm": 65.5,
15439
+ "learning_rate": 2.711864406779661e-07,
15440
+ "loss": 1.4215,
15441
+ "step": 22000
15442
+ },
15443
+ {
15444
+ "epoch": 0.55,
15445
+ "grad_norm": 66.0,
15446
+ "learning_rate": 2.7084745762711864e-07,
15447
+ "loss": 1.4646,
15448
+ "step": 22010
15449
+ },
15450
+ {
15451
+ "epoch": 0.55,
15452
+ "grad_norm": 65.5,
15453
+ "learning_rate": 2.7050847457627116e-07,
15454
+ "loss": 1.4843,
15455
+ "step": 22020
15456
+ },
15457
+ {
15458
+ "epoch": 0.55,
15459
+ "grad_norm": 66.5,
15460
+ "learning_rate": 2.7016949152542375e-07,
15461
+ "loss": 1.4881,
15462
+ "step": 22030
15463
+ },
15464
+ {
15465
+ "epoch": 0.55,
15466
+ "grad_norm": 68.5,
15467
+ "learning_rate": 2.6983050847457627e-07,
15468
+ "loss": 1.4136,
15469
+ "step": 22040
15470
+ },
15471
+ {
15472
+ "epoch": 0.55,
15473
+ "grad_norm": 63.25,
15474
+ "learning_rate": 2.6949152542372885e-07,
15475
+ "loss": 1.4735,
15476
+ "step": 22050
15477
+ },
15478
+ {
15479
+ "epoch": 0.55,
15480
+ "grad_norm": 67.5,
15481
+ "learning_rate": 2.6915254237288133e-07,
15482
+ "loss": 1.4365,
15483
+ "step": 22060
15484
+ },
15485
+ {
15486
+ "epoch": 0.55,
15487
+ "grad_norm": 68.0,
15488
+ "learning_rate": 2.6881355932203386e-07,
15489
+ "loss": 1.5033,
15490
+ "step": 22070
15491
+ },
15492
+ {
15493
+ "epoch": 0.55,
15494
+ "grad_norm": 65.5,
15495
+ "learning_rate": 2.6847457627118644e-07,
15496
+ "loss": 1.4978,
15497
+ "step": 22080
15498
+ },
15499
+ {
15500
+ "epoch": 0.55,
15501
+ "grad_norm": 67.0,
15502
+ "learning_rate": 2.6813559322033896e-07,
15503
+ "loss": 1.4561,
15504
+ "step": 22090
15505
+ },
15506
+ {
15507
+ "epoch": 0.55,
15508
+ "grad_norm": 65.5,
15509
+ "learning_rate": 2.677966101694915e-07,
15510
+ "loss": 1.5001,
15511
+ "step": 22100
15512
+ },
15513
+ {
15514
+ "epoch": 0.55,
15515
+ "grad_norm": 66.5,
15516
+ "learning_rate": 2.6745762711864407e-07,
15517
+ "loss": 1.4332,
15518
+ "step": 22110
15519
+ },
15520
+ {
15521
+ "epoch": 0.55,
15522
+ "grad_norm": 67.5,
15523
+ "learning_rate": 2.671186440677966e-07,
15524
+ "loss": 1.4629,
15525
+ "step": 22120
15526
+ },
15527
+ {
15528
+ "epoch": 0.55,
15529
+ "grad_norm": 67.5,
15530
+ "learning_rate": 2.6677966101694913e-07,
15531
+ "loss": 1.5131,
15532
+ "step": 22130
15533
+ },
15534
+ {
15535
+ "epoch": 0.55,
15536
+ "grad_norm": 66.0,
15537
+ "learning_rate": 2.664406779661017e-07,
15538
+ "loss": 1.4893,
15539
+ "step": 22140
15540
+ },
15541
+ {
15542
+ "epoch": 0.55,
15543
+ "grad_norm": 68.0,
15544
+ "learning_rate": 2.6610169491525424e-07,
15545
+ "loss": 1.4378,
15546
+ "step": 22150
15547
+ },
15548
+ {
15549
+ "epoch": 0.55,
15550
+ "grad_norm": 69.5,
15551
+ "learning_rate": 2.6576271186440676e-07,
15552
+ "loss": 1.4219,
15553
+ "step": 22160
15554
+ },
15555
+ {
15556
+ "epoch": 0.55,
15557
+ "grad_norm": 66.5,
15558
+ "learning_rate": 2.654237288135593e-07,
15559
+ "loss": 1.4722,
15560
+ "step": 22170
15561
+ },
15562
+ {
15563
+ "epoch": 0.55,
15564
+ "grad_norm": 68.0,
15565
+ "learning_rate": 2.650847457627118e-07,
15566
+ "loss": 1.509,
15567
+ "step": 22180
15568
+ },
15569
+ {
15570
+ "epoch": 0.55,
15571
+ "grad_norm": 65.5,
15572
+ "learning_rate": 2.647457627118644e-07,
15573
+ "loss": 1.4875,
15574
+ "step": 22190
15575
+ },
15576
+ {
15577
+ "epoch": 0.56,
15578
+ "grad_norm": 72.5,
15579
+ "learning_rate": 2.6440677966101693e-07,
15580
+ "loss": 1.4397,
15581
+ "step": 22200
15582
+ },
15583
+ {
15584
+ "epoch": 0.56,
15585
+ "grad_norm": 67.5,
15586
+ "learning_rate": 2.6406779661016945e-07,
15587
+ "loss": 1.4862,
15588
+ "step": 22210
15589
+ },
15590
+ {
15591
+ "epoch": 0.56,
15592
+ "grad_norm": 70.0,
15593
+ "learning_rate": 2.6372881355932204e-07,
15594
+ "loss": 1.4248,
15595
+ "step": 22220
15596
+ },
15597
+ {
15598
+ "epoch": 0.56,
15599
+ "grad_norm": 67.5,
15600
+ "learning_rate": 2.6338983050847456e-07,
15601
+ "loss": 1.485,
15602
+ "step": 22230
15603
+ },
15604
+ {
15605
+ "epoch": 0.56,
15606
+ "grad_norm": 67.5,
15607
+ "learning_rate": 2.6305084745762714e-07,
15608
+ "loss": 1.4798,
15609
+ "step": 22240
15610
+ },
15611
+ {
15612
+ "epoch": 0.56,
15613
+ "grad_norm": 64.0,
15614
+ "learning_rate": 2.6271186440677967e-07,
15615
+ "loss": 1.4664,
15616
+ "step": 22250
15617
+ },
15618
+ {
15619
+ "epoch": 0.56,
15620
+ "grad_norm": 65.5,
15621
+ "learning_rate": 2.623728813559322e-07,
15622
+ "loss": 1.4435,
15623
+ "step": 22260
15624
+ },
15625
+ {
15626
+ "epoch": 0.56,
15627
+ "grad_norm": 64.0,
15628
+ "learning_rate": 2.6203389830508473e-07,
15629
+ "loss": 1.5132,
15630
+ "step": 22270
15631
+ },
15632
+ {
15633
+ "epoch": 0.56,
15634
+ "grad_norm": 67.0,
15635
+ "learning_rate": 2.6169491525423725e-07,
15636
+ "loss": 1.4555,
15637
+ "step": 22280
15638
+ },
15639
+ {
15640
+ "epoch": 0.56,
15641
+ "grad_norm": 69.5,
15642
+ "learning_rate": 2.613559322033898e-07,
15643
+ "loss": 1.4874,
15644
+ "step": 22290
15645
+ },
15646
+ {
15647
+ "epoch": 0.56,
15648
+ "grad_norm": 67.0,
15649
+ "learning_rate": 2.6101694915254236e-07,
15650
+ "loss": 1.4806,
15651
+ "step": 22300
15652
+ },
15653
+ {
15654
+ "epoch": 0.56,
15655
+ "grad_norm": 69.5,
15656
+ "learning_rate": 2.606779661016949e-07,
15657
+ "loss": 1.4733,
15658
+ "step": 22310
15659
+ },
15660
+ {
15661
+ "epoch": 0.56,
15662
+ "grad_norm": 65.5,
15663
+ "learning_rate": 2.6033898305084747e-07,
15664
+ "loss": 1.4037,
15665
+ "step": 22320
15666
+ },
15667
+ {
15668
+ "epoch": 0.56,
15669
+ "grad_norm": 67.0,
15670
+ "learning_rate": 2.6e-07,
15671
+ "loss": 1.4826,
15672
+ "step": 22330
15673
+ },
15674
+ {
15675
+ "epoch": 0.56,
15676
+ "grad_norm": 69.0,
15677
+ "learning_rate": 2.5966101694915253e-07,
15678
+ "loss": 1.4517,
15679
+ "step": 22340
15680
+ },
15681
+ {
15682
+ "epoch": 0.56,
15683
+ "grad_norm": 68.5,
15684
+ "learning_rate": 2.593220338983051e-07,
15685
+ "loss": 1.4646,
15686
+ "step": 22350
15687
+ },
15688
+ {
15689
+ "epoch": 0.56,
15690
+ "grad_norm": 66.5,
15691
+ "learning_rate": 2.5898305084745763e-07,
15692
+ "loss": 1.463,
15693
+ "step": 22360
15694
+ },
15695
+ {
15696
+ "epoch": 0.56,
15697
+ "grad_norm": 65.0,
15698
+ "learning_rate": 2.586440677966101e-07,
15699
+ "loss": 1.3894,
15700
+ "step": 22370
15701
+ },
15702
+ {
15703
+ "epoch": 0.56,
15704
+ "grad_norm": 66.0,
15705
+ "learning_rate": 2.583050847457627e-07,
15706
+ "loss": 1.4653,
15707
+ "step": 22380
15708
+ },
15709
+ {
15710
+ "epoch": 0.56,
15711
+ "grad_norm": 64.0,
15712
+ "learning_rate": 2.579661016949152e-07,
15713
+ "loss": 1.4551,
15714
+ "step": 22390
15715
+ },
15716
+ {
15717
+ "epoch": 0.56,
15718
+ "grad_norm": 70.5,
15719
+ "learning_rate": 2.576271186440678e-07,
15720
+ "loss": 1.4699,
15721
+ "step": 22400
15722
+ },
15723
+ {
15724
+ "epoch": 0.56,
15725
+ "grad_norm": 68.0,
15726
+ "learning_rate": 2.572881355932203e-07,
15727
+ "loss": 1.4967,
15728
+ "step": 22410
15729
+ },
15730
+ {
15731
+ "epoch": 0.56,
15732
+ "grad_norm": 66.5,
15733
+ "learning_rate": 2.5694915254237285e-07,
15734
+ "loss": 1.4465,
15735
+ "step": 22420
15736
+ },
15737
+ {
15738
+ "epoch": 0.56,
15739
+ "grad_norm": 71.0,
15740
+ "learning_rate": 2.5661016949152543e-07,
15741
+ "loss": 1.5049,
15742
+ "step": 22430
15743
+ },
15744
+ {
15745
+ "epoch": 0.56,
15746
+ "grad_norm": 68.0,
15747
+ "learning_rate": 2.5627118644067796e-07,
15748
+ "loss": 1.459,
15749
+ "step": 22440
15750
+ },
15751
+ {
15752
+ "epoch": 0.56,
15753
+ "grad_norm": 64.5,
15754
+ "learning_rate": 2.559322033898305e-07,
15755
+ "loss": 1.4719,
15756
+ "step": 22450
15757
+ },
15758
+ {
15759
+ "epoch": 0.56,
15760
+ "grad_norm": 68.5,
15761
+ "learning_rate": 2.5559322033898307e-07,
15762
+ "loss": 1.4305,
15763
+ "step": 22460
15764
+ },
15765
+ {
15766
+ "epoch": 0.56,
15767
+ "grad_norm": 68.0,
15768
+ "learning_rate": 2.5525423728813555e-07,
15769
+ "loss": 1.4761,
15770
+ "step": 22470
15771
+ },
15772
+ {
15773
+ "epoch": 0.56,
15774
+ "grad_norm": 66.5,
15775
+ "learning_rate": 2.549152542372881e-07,
15776
+ "loss": 1.4834,
15777
+ "step": 22480
15778
+ },
15779
+ {
15780
+ "epoch": 0.56,
15781
+ "grad_norm": 68.0,
15782
+ "learning_rate": 2.5457627118644065e-07,
15783
+ "loss": 1.4149,
15784
+ "step": 22490
15785
+ },
15786
+ {
15787
+ "epoch": 0.56,
15788
+ "grad_norm": 66.0,
15789
+ "learning_rate": 2.542372881355932e-07,
15790
+ "loss": 1.4605,
15791
+ "step": 22500
15792
+ },
15793
+ {
15794
+ "epoch": 0.56,
15795
+ "grad_norm": 68.5,
15796
+ "learning_rate": 2.5389830508474576e-07,
15797
+ "loss": 1.4435,
15798
+ "step": 22510
15799
+ },
15800
+ {
15801
+ "epoch": 0.56,
15802
+ "grad_norm": 68.5,
15803
+ "learning_rate": 2.535593220338983e-07,
15804
+ "loss": 1.5106,
15805
+ "step": 22520
15806
+ },
15807
+ {
15808
+ "epoch": 0.56,
15809
+ "grad_norm": 66.5,
15810
+ "learning_rate": 2.532203389830508e-07,
15811
+ "loss": 1.4488,
15812
+ "step": 22530
15813
+ },
15814
+ {
15815
+ "epoch": 0.56,
15816
+ "grad_norm": 63.75,
15817
+ "learning_rate": 2.528813559322034e-07,
15818
+ "loss": 1.4492,
15819
+ "step": 22540
15820
+ },
15821
+ {
15822
+ "epoch": 0.56,
15823
+ "grad_norm": 66.5,
15824
+ "learning_rate": 2.525423728813559e-07,
15825
+ "loss": 1.4728,
15826
+ "step": 22550
15827
+ },
15828
+ {
15829
+ "epoch": 0.56,
15830
+ "grad_norm": 68.0,
15831
+ "learning_rate": 2.522033898305085e-07,
15832
+ "loss": 1.5016,
15833
+ "step": 22560
15834
+ },
15835
+ {
15836
+ "epoch": 0.56,
15837
+ "grad_norm": 68.5,
15838
+ "learning_rate": 2.5186440677966103e-07,
15839
+ "loss": 1.4636,
15840
+ "step": 22570
15841
+ },
15842
+ {
15843
+ "epoch": 0.56,
15844
+ "grad_norm": 66.5,
15845
+ "learning_rate": 2.515254237288135e-07,
15846
+ "loss": 1.4976,
15847
+ "step": 22580
15848
+ },
15849
+ {
15850
+ "epoch": 0.56,
15851
+ "grad_norm": 70.0,
15852
+ "learning_rate": 2.511864406779661e-07,
15853
+ "loss": 1.47,
15854
+ "step": 22590
15855
+ },
15856
+ {
15857
+ "epoch": 0.56,
15858
+ "grad_norm": 65.0,
15859
+ "learning_rate": 2.508474576271186e-07,
15860
+ "loss": 1.4302,
15861
+ "step": 22600
15862
+ },
15863
+ {
15864
+ "epoch": 0.57,
15865
+ "grad_norm": 71.5,
15866
+ "learning_rate": 2.5050847457627114e-07,
15867
+ "loss": 1.4743,
15868
+ "step": 22610
15869
+ },
15870
+ {
15871
+ "epoch": 0.57,
15872
+ "grad_norm": 72.0,
15873
+ "learning_rate": 2.501694915254237e-07,
15874
+ "loss": 1.4998,
15875
+ "step": 22620
15876
+ },
15877
+ {
15878
+ "epoch": 0.57,
15879
+ "grad_norm": 66.0,
15880
+ "learning_rate": 2.4983050847457625e-07,
15881
+ "loss": 1.4342,
15882
+ "step": 22630
15883
+ },
15884
+ {
15885
+ "epoch": 0.57,
15886
+ "grad_norm": 66.0,
15887
+ "learning_rate": 2.4949152542372883e-07,
15888
+ "loss": 1.4675,
15889
+ "step": 22640
15890
+ },
15891
+ {
15892
+ "epoch": 0.57,
15893
+ "grad_norm": 66.5,
15894
+ "learning_rate": 2.4915254237288136e-07,
15895
+ "loss": 1.4961,
15896
+ "step": 22650
15897
+ },
15898
+ {
15899
+ "epoch": 0.57,
15900
+ "grad_norm": 67.5,
15901
+ "learning_rate": 2.488135593220339e-07,
15902
+ "loss": 1.4907,
15903
+ "step": 22660
15904
+ },
15905
+ {
15906
+ "epoch": 0.57,
15907
+ "grad_norm": 68.0,
15908
+ "learning_rate": 2.484745762711864e-07,
15909
+ "loss": 1.4894,
15910
+ "step": 22670
15911
+ },
15912
+ {
15913
+ "epoch": 0.57,
15914
+ "grad_norm": 66.0,
15915
+ "learning_rate": 2.48135593220339e-07,
15916
+ "loss": 1.4661,
15917
+ "step": 22680
15918
+ },
15919
+ {
15920
+ "epoch": 0.57,
15921
+ "grad_norm": 65.0,
15922
+ "learning_rate": 2.477966101694915e-07,
15923
+ "loss": 1.4717,
15924
+ "step": 22690
15925
+ },
15926
+ {
15927
+ "epoch": 0.57,
15928
+ "grad_norm": 63.75,
15929
+ "learning_rate": 2.4745762711864405e-07,
15930
+ "loss": 1.4701,
15931
+ "step": 22700
15932
+ },
15933
+ {
15934
+ "epoch": 0.57,
15935
+ "grad_norm": 66.0,
15936
+ "learning_rate": 2.471186440677966e-07,
15937
+ "loss": 1.4747,
15938
+ "step": 22710
15939
+ },
15940
+ {
15941
+ "epoch": 0.57,
15942
+ "grad_norm": 67.0,
15943
+ "learning_rate": 2.4677966101694916e-07,
15944
+ "loss": 1.4711,
15945
+ "step": 22720
15946
+ },
15947
+ {
15948
+ "epoch": 0.57,
15949
+ "grad_norm": 66.5,
15950
+ "learning_rate": 2.464406779661017e-07,
15951
+ "loss": 1.4452,
15952
+ "step": 22730
15953
+ },
15954
+ {
15955
+ "epoch": 0.57,
15956
+ "grad_norm": 67.5,
15957
+ "learning_rate": 2.461016949152542e-07,
15958
+ "loss": 1.4918,
15959
+ "step": 22740
15960
+ },
15961
+ {
15962
+ "epoch": 0.57,
15963
+ "grad_norm": 68.0,
15964
+ "learning_rate": 2.457627118644068e-07,
15965
+ "loss": 1.4777,
15966
+ "step": 22750
15967
+ },
15968
+ {
15969
+ "epoch": 0.57,
15970
+ "grad_norm": 65.0,
15971
+ "learning_rate": 2.454237288135593e-07,
15972
+ "loss": 1.4705,
15973
+ "step": 22760
15974
+ },
15975
+ {
15976
+ "epoch": 0.57,
15977
+ "grad_norm": 66.5,
15978
+ "learning_rate": 2.4508474576271185e-07,
15979
+ "loss": 1.4481,
15980
+ "step": 22770
15981
+ },
15982
+ {
15983
+ "epoch": 0.57,
15984
+ "grad_norm": 67.0,
15985
+ "learning_rate": 2.447457627118644e-07,
15986
+ "loss": 1.4624,
15987
+ "step": 22780
15988
+ },
15989
+ {
15990
+ "epoch": 0.57,
15991
+ "grad_norm": 65.5,
15992
+ "learning_rate": 2.4440677966101696e-07,
15993
+ "loss": 1.4995,
15994
+ "step": 22790
15995
+ },
15996
+ {
15997
+ "epoch": 0.57,
15998
+ "grad_norm": 67.0,
15999
+ "learning_rate": 2.440677966101695e-07,
16000
+ "loss": 1.4544,
16001
+ "step": 22800
16002
+ },
16003
+ {
16004
+ "epoch": 0.57,
16005
+ "grad_norm": 66.0,
16006
+ "learning_rate": 2.43728813559322e-07,
16007
+ "loss": 1.4868,
16008
+ "step": 22810
16009
+ },
16010
+ {
16011
+ "epoch": 0.57,
16012
+ "grad_norm": 65.5,
16013
+ "learning_rate": 2.4338983050847454e-07,
16014
+ "loss": 1.4181,
16015
+ "step": 22820
16016
+ },
16017
+ {
16018
+ "epoch": 0.57,
16019
+ "grad_norm": 66.0,
16020
+ "learning_rate": 2.430508474576271e-07,
16021
+ "loss": 1.4431,
16022
+ "step": 22830
16023
+ },
16024
+ {
16025
+ "epoch": 0.57,
16026
+ "grad_norm": 66.5,
16027
+ "learning_rate": 2.4271186440677965e-07,
16028
+ "loss": 1.4813,
16029
+ "step": 22840
16030
+ },
16031
+ {
16032
+ "epoch": 0.57,
16033
+ "grad_norm": 66.0,
16034
+ "learning_rate": 2.423728813559322e-07,
16035
+ "loss": 1.5211,
16036
+ "step": 22850
16037
+ },
16038
+ {
16039
+ "epoch": 0.57,
16040
+ "grad_norm": 65.0,
16041
+ "learning_rate": 2.420338983050847e-07,
16042
+ "loss": 1.4331,
16043
+ "step": 22860
16044
+ },
16045
+ {
16046
+ "epoch": 0.57,
16047
+ "grad_norm": 66.5,
16048
+ "learning_rate": 2.416949152542373e-07,
16049
+ "loss": 1.5019,
16050
+ "step": 22870
16051
+ },
16052
+ {
16053
+ "epoch": 0.57,
16054
+ "grad_norm": 68.0,
16055
+ "learning_rate": 2.413559322033898e-07,
16056
+ "loss": 1.4635,
16057
+ "step": 22880
16058
+ },
16059
+ {
16060
+ "epoch": 0.57,
16061
+ "grad_norm": 67.5,
16062
+ "learning_rate": 2.4101694915254234e-07,
16063
+ "loss": 1.4641,
16064
+ "step": 22890
16065
+ },
16066
+ {
16067
+ "epoch": 0.57,
16068
+ "grad_norm": 66.0,
16069
+ "learning_rate": 2.406779661016949e-07,
16070
+ "loss": 1.4351,
16071
+ "step": 22900
16072
+ },
16073
+ {
16074
+ "epoch": 0.57,
16075
+ "grad_norm": 71.0,
16076
+ "learning_rate": 2.4033898305084745e-07,
16077
+ "loss": 1.4785,
16078
+ "step": 22910
16079
+ },
16080
+ {
16081
+ "epoch": 0.57,
16082
+ "grad_norm": 66.5,
16083
+ "learning_rate": 2.4e-07,
16084
+ "loss": 1.4715,
16085
+ "step": 22920
16086
+ },
16087
+ {
16088
+ "epoch": 0.57,
16089
+ "grad_norm": 67.5,
16090
+ "learning_rate": 2.396610169491525e-07,
16091
+ "loss": 1.4631,
16092
+ "step": 22930
16093
+ },
16094
+ {
16095
+ "epoch": 0.57,
16096
+ "grad_norm": 66.5,
16097
+ "learning_rate": 2.393220338983051e-07,
16098
+ "loss": 1.5024,
16099
+ "step": 22940
16100
+ },
16101
+ {
16102
+ "epoch": 0.57,
16103
+ "grad_norm": 68.0,
16104
+ "learning_rate": 2.389830508474576e-07,
16105
+ "loss": 1.4602,
16106
+ "step": 22950
16107
+ },
16108
+ {
16109
+ "epoch": 0.57,
16110
+ "grad_norm": 66.5,
16111
+ "learning_rate": 2.386440677966102e-07,
16112
+ "loss": 1.3829,
16113
+ "step": 22960
16114
+ },
16115
+ {
16116
+ "epoch": 0.57,
16117
+ "grad_norm": 67.0,
16118
+ "learning_rate": 2.383050847457627e-07,
16119
+ "loss": 1.4193,
16120
+ "step": 22970
16121
+ },
16122
+ {
16123
+ "epoch": 0.57,
16124
+ "grad_norm": 70.5,
16125
+ "learning_rate": 2.3796610169491525e-07,
16126
+ "loss": 1.4878,
16127
+ "step": 22980
16128
+ },
16129
+ {
16130
+ "epoch": 0.57,
16131
+ "grad_norm": 65.5,
16132
+ "learning_rate": 2.3762711864406778e-07,
16133
+ "loss": 1.4852,
16134
+ "step": 22990
16135
+ },
16136
+ {
16137
+ "epoch": 0.57,
16138
+ "grad_norm": 70.0,
16139
+ "learning_rate": 2.3728813559322033e-07,
16140
+ "loss": 1.4789,
16141
+ "step": 23000
16142
+ },
16143
+ {
16144
+ "epoch": 0.58,
16145
+ "grad_norm": 71.0,
16146
+ "learning_rate": 2.3694915254237289e-07,
16147
+ "loss": 1.4596,
16148
+ "step": 23010
16149
+ },
16150
+ {
16151
+ "epoch": 0.58,
16152
+ "grad_norm": 64.5,
16153
+ "learning_rate": 2.3661016949152541e-07,
16154
+ "loss": 1.4706,
16155
+ "step": 23020
16156
+ },
16157
+ {
16158
+ "epoch": 0.58,
16159
+ "grad_norm": 68.5,
16160
+ "learning_rate": 2.3627118644067794e-07,
16161
+ "loss": 1.4422,
16162
+ "step": 23030
16163
+ },
16164
+ {
16165
+ "epoch": 0.58,
16166
+ "grad_norm": 65.0,
16167
+ "learning_rate": 2.359322033898305e-07,
16168
+ "loss": 1.441,
16169
+ "step": 23040
16170
+ },
16171
+ {
16172
+ "epoch": 0.58,
16173
+ "grad_norm": 67.5,
16174
+ "learning_rate": 2.3559322033898305e-07,
16175
+ "loss": 1.4376,
16176
+ "step": 23050
16177
+ },
16178
+ {
16179
+ "epoch": 0.58,
16180
+ "grad_norm": 64.5,
16181
+ "learning_rate": 2.352542372881356e-07,
16182
+ "loss": 1.4766,
16183
+ "step": 23060
16184
+ },
16185
+ {
16186
+ "epoch": 0.58,
16187
+ "grad_norm": 66.0,
16188
+ "learning_rate": 2.349152542372881e-07,
16189
+ "loss": 1.482,
16190
+ "step": 23070
16191
+ },
16192
+ {
16193
+ "epoch": 0.58,
16194
+ "grad_norm": 65.0,
16195
+ "learning_rate": 2.3457627118644066e-07,
16196
+ "loss": 1.4598,
16197
+ "step": 23080
16198
+ },
16199
+ {
16200
+ "epoch": 0.58,
16201
+ "grad_norm": 67.5,
16202
+ "learning_rate": 2.3423728813559321e-07,
16203
+ "loss": 1.5314,
16204
+ "step": 23090
16205
+ },
16206
+ {
16207
+ "epoch": 0.58,
16208
+ "grad_norm": 66.5,
16209
+ "learning_rate": 2.3389830508474577e-07,
16210
+ "loss": 1.4375,
16211
+ "step": 23100
16212
+ },
16213
+ {
16214
+ "epoch": 0.58,
16215
+ "grad_norm": 68.0,
16216
+ "learning_rate": 2.335593220338983e-07,
16217
+ "loss": 1.4928,
16218
+ "step": 23110
16219
+ },
16220
+ {
16221
+ "epoch": 0.58,
16222
+ "grad_norm": 67.5,
16223
+ "learning_rate": 2.3322033898305082e-07,
16224
+ "loss": 1.4823,
16225
+ "step": 23120
16226
+ },
16227
+ {
16228
+ "epoch": 0.58,
16229
+ "grad_norm": 70.0,
16230
+ "learning_rate": 2.3288135593220338e-07,
16231
+ "loss": 1.5182,
16232
+ "step": 23130
16233
+ },
16234
+ {
16235
+ "epoch": 0.58,
16236
+ "grad_norm": 68.0,
16237
+ "learning_rate": 2.3254237288135593e-07,
16238
+ "loss": 1.5249,
16239
+ "step": 23140
16240
+ },
16241
+ {
16242
+ "epoch": 0.58,
16243
+ "grad_norm": 67.0,
16244
+ "learning_rate": 2.3220338983050846e-07,
16245
+ "loss": 1.4414,
16246
+ "step": 23150
16247
+ },
16248
+ {
16249
+ "epoch": 0.58,
16250
+ "grad_norm": 67.0,
16251
+ "learning_rate": 2.3186440677966101e-07,
16252
+ "loss": 1.4908,
16253
+ "step": 23160
16254
+ },
16255
+ {
16256
+ "epoch": 0.58,
16257
+ "grad_norm": 66.0,
16258
+ "learning_rate": 2.3152542372881357e-07,
16259
+ "loss": 1.4741,
16260
+ "step": 23170
16261
+ },
16262
+ {
16263
+ "epoch": 0.58,
16264
+ "grad_norm": 64.5,
16265
+ "learning_rate": 2.311864406779661e-07,
16266
+ "loss": 1.4474,
16267
+ "step": 23180
16268
+ },
16269
+ {
16270
+ "epoch": 0.58,
16271
+ "grad_norm": 65.5,
16272
+ "learning_rate": 2.3084745762711862e-07,
16273
+ "loss": 1.498,
16274
+ "step": 23190
16275
+ },
16276
+ {
16277
+ "epoch": 0.58,
16278
+ "grad_norm": 66.5,
16279
+ "learning_rate": 2.3050847457627118e-07,
16280
+ "loss": 1.4339,
16281
+ "step": 23200
16282
+ },
16283
+ {
16284
+ "epoch": 0.58,
16285
+ "grad_norm": 67.5,
16286
+ "learning_rate": 2.3016949152542373e-07,
16287
+ "loss": 1.5245,
16288
+ "step": 23210
16289
+ },
16290
+ {
16291
+ "epoch": 0.58,
16292
+ "grad_norm": 69.0,
16293
+ "learning_rate": 2.2983050847457629e-07,
16294
+ "loss": 1.5048,
16295
+ "step": 23220
16296
+ },
16297
+ {
16298
+ "epoch": 0.58,
16299
+ "grad_norm": 65.0,
16300
+ "learning_rate": 2.2949152542372879e-07,
16301
+ "loss": 1.47,
16302
+ "step": 23230
16303
+ },
16304
+ {
16305
+ "epoch": 0.58,
16306
+ "grad_norm": 65.0,
16307
+ "learning_rate": 2.2915254237288134e-07,
16308
+ "loss": 1.4383,
16309
+ "step": 23240
16310
+ },
16311
+ {
16312
+ "epoch": 0.58,
16313
+ "grad_norm": 69.0,
16314
+ "learning_rate": 2.288135593220339e-07,
16315
+ "loss": 1.5034,
16316
+ "step": 23250
16317
+ },
16318
+ {
16319
+ "epoch": 0.58,
16320
+ "grad_norm": 63.0,
16321
+ "learning_rate": 2.2847457627118645e-07,
16322
+ "loss": 1.4556,
16323
+ "step": 23260
16324
+ },
16325
+ {
16326
+ "epoch": 0.58,
16327
+ "grad_norm": 66.5,
16328
+ "learning_rate": 2.2813559322033898e-07,
16329
+ "loss": 1.4525,
16330
+ "step": 23270
16331
+ },
16332
+ {
16333
+ "epoch": 0.58,
16334
+ "grad_norm": 65.5,
16335
+ "learning_rate": 2.277966101694915e-07,
16336
+ "loss": 1.4466,
16337
+ "step": 23280
16338
+ },
16339
+ {
16340
+ "epoch": 0.58,
16341
+ "grad_norm": 70.0,
16342
+ "learning_rate": 2.2745762711864406e-07,
16343
+ "loss": 1.3894,
16344
+ "step": 23290
16345
+ },
16346
+ {
16347
+ "epoch": 0.58,
16348
+ "grad_norm": 62.5,
16349
+ "learning_rate": 2.271186440677966e-07,
16350
+ "loss": 1.356,
16351
+ "step": 23300
16352
+ },
16353
+ {
16354
+ "epoch": 0.58,
16355
+ "grad_norm": 67.5,
16356
+ "learning_rate": 2.2677966101694914e-07,
16357
+ "loss": 1.5171,
16358
+ "step": 23310
16359
+ },
16360
+ {
16361
+ "epoch": 0.58,
16362
+ "grad_norm": 66.5,
16363
+ "learning_rate": 2.264406779661017e-07,
16364
+ "loss": 1.4655,
16365
+ "step": 23320
16366
+ },
16367
+ {
16368
+ "epoch": 0.58,
16369
+ "grad_norm": 67.5,
16370
+ "learning_rate": 2.2610169491525422e-07,
16371
+ "loss": 1.4562,
16372
+ "step": 23330
16373
+ },
16374
+ {
16375
+ "epoch": 0.58,
16376
+ "grad_norm": 71.0,
16377
+ "learning_rate": 2.2576271186440678e-07,
16378
+ "loss": 1.4748,
16379
+ "step": 23340
16380
+ },
16381
+ {
16382
+ "epoch": 0.58,
16383
+ "grad_norm": 65.0,
16384
+ "learning_rate": 2.254237288135593e-07,
16385
+ "loss": 1.5118,
16386
+ "step": 23350
16387
+ },
16388
+ {
16389
+ "epoch": 0.58,
16390
+ "grad_norm": 73.0,
16391
+ "learning_rate": 2.2508474576271186e-07,
16392
+ "loss": 1.4944,
16393
+ "step": 23360
16394
+ },
16395
+ {
16396
+ "epoch": 0.58,
16397
+ "grad_norm": 68.5,
16398
+ "learning_rate": 2.247457627118644e-07,
16399
+ "loss": 1.4614,
16400
+ "step": 23370
16401
+ },
16402
+ {
16403
+ "epoch": 0.58,
16404
+ "grad_norm": 68.0,
16405
+ "learning_rate": 2.2440677966101691e-07,
16406
+ "loss": 1.4759,
16407
+ "step": 23380
16408
+ },
16409
+ {
16410
+ "epoch": 0.58,
16411
+ "grad_norm": 73.0,
16412
+ "learning_rate": 2.2406779661016947e-07,
16413
+ "loss": 1.4913,
16414
+ "step": 23390
16415
+ },
16416
+ {
16417
+ "epoch": 0.58,
16418
+ "grad_norm": 63.5,
16419
+ "learning_rate": 2.2372881355932202e-07,
16420
+ "loss": 1.4718,
16421
+ "step": 23400
16422
+ },
16423
+ {
16424
+ "epoch": 0.59,
16425
+ "grad_norm": 67.5,
16426
+ "learning_rate": 2.2338983050847458e-07,
16427
+ "loss": 1.4678,
16428
+ "step": 23410
16429
+ },
16430
+ {
16431
+ "epoch": 0.59,
16432
+ "grad_norm": 71.0,
16433
+ "learning_rate": 2.2305084745762713e-07,
16434
+ "loss": 1.49,
16435
+ "step": 23420
16436
+ },
16437
+ {
16438
+ "epoch": 0.59,
16439
+ "grad_norm": 72.0,
16440
+ "learning_rate": 2.2271186440677966e-07,
16441
+ "loss": 1.4582,
16442
+ "step": 23430
16443
+ },
16444
+ {
16445
+ "epoch": 0.59,
16446
+ "grad_norm": 65.5,
16447
+ "learning_rate": 2.2237288135593219e-07,
16448
+ "loss": 1.4385,
16449
+ "step": 23440
16450
+ },
16451
+ {
16452
+ "epoch": 0.59,
16453
+ "grad_norm": 67.0,
16454
+ "learning_rate": 2.2203389830508474e-07,
16455
+ "loss": 1.5076,
16456
+ "step": 23450
16457
+ },
16458
+ {
16459
+ "epoch": 0.59,
16460
+ "grad_norm": 66.5,
16461
+ "learning_rate": 2.216949152542373e-07,
16462
+ "loss": 1.4824,
16463
+ "step": 23460
16464
+ },
16465
+ {
16466
+ "epoch": 0.59,
16467
+ "grad_norm": 68.0,
16468
+ "learning_rate": 2.2135593220338982e-07,
16469
+ "loss": 1.4799,
16470
+ "step": 23470
16471
+ },
16472
+ {
16473
+ "epoch": 0.59,
16474
+ "grad_norm": 67.0,
16475
+ "learning_rate": 2.2101694915254238e-07,
16476
+ "loss": 1.4648,
16477
+ "step": 23480
16478
+ },
16479
+ {
16480
+ "epoch": 0.59,
16481
+ "grad_norm": 67.0,
16482
+ "learning_rate": 2.206779661016949e-07,
16483
+ "loss": 1.4858,
16484
+ "step": 23490
16485
+ },
16486
+ {
16487
+ "epoch": 0.59,
16488
+ "grad_norm": 68.0,
16489
+ "learning_rate": 2.2033898305084743e-07,
16490
+ "loss": 1.5103,
16491
+ "step": 23500
16492
+ },
16493
+ {
16494
+ "epoch": 0.59,
16495
+ "grad_norm": 67.5,
16496
+ "learning_rate": 2.1999999999999998e-07,
16497
+ "loss": 1.4786,
16498
+ "step": 23510
16499
+ },
16500
+ {
16501
+ "epoch": 0.59,
16502
+ "grad_norm": 66.5,
16503
+ "learning_rate": 2.1966101694915254e-07,
16504
+ "loss": 1.3982,
16505
+ "step": 23520
16506
+ },
16507
+ {
16508
+ "epoch": 0.59,
16509
+ "grad_norm": 67.0,
16510
+ "learning_rate": 2.193220338983051e-07,
16511
+ "loss": 1.4859,
16512
+ "step": 23530
16513
+ },
16514
+ {
16515
+ "epoch": 0.59,
16516
+ "grad_norm": 64.5,
16517
+ "learning_rate": 2.189830508474576e-07,
16518
+ "loss": 1.4449,
16519
+ "step": 23540
16520
+ },
16521
+ {
16522
+ "epoch": 0.59,
16523
+ "grad_norm": 66.0,
16524
+ "learning_rate": 2.1864406779661015e-07,
16525
+ "loss": 1.425,
16526
+ "step": 23550
16527
+ },
16528
+ {
16529
+ "epoch": 0.59,
16530
+ "grad_norm": 67.0,
16531
+ "learning_rate": 2.183050847457627e-07,
16532
+ "loss": 1.4306,
16533
+ "step": 23560
16534
+ },
16535
+ {
16536
+ "epoch": 0.59,
16537
+ "grad_norm": 66.0,
16538
+ "learning_rate": 2.1796610169491526e-07,
16539
+ "loss": 1.3738,
16540
+ "step": 23570
16541
+ },
16542
+ {
16543
+ "epoch": 0.59,
16544
+ "grad_norm": 64.0,
16545
+ "learning_rate": 2.1762711864406778e-07,
16546
+ "loss": 1.4323,
16547
+ "step": 23580
16548
+ },
16549
+ {
16550
+ "epoch": 0.59,
16551
+ "grad_norm": 69.0,
16552
+ "learning_rate": 2.172881355932203e-07,
16553
+ "loss": 1.4396,
16554
+ "step": 23590
16555
+ },
16556
+ {
16557
+ "epoch": 0.59,
16558
+ "grad_norm": 68.0,
16559
+ "learning_rate": 2.1694915254237287e-07,
16560
+ "loss": 1.4858,
16561
+ "step": 23600
16562
+ },
16563
+ {
16564
+ "epoch": 0.59,
16565
+ "grad_norm": 66.5,
16566
+ "learning_rate": 2.1661016949152542e-07,
16567
+ "loss": 1.5004,
16568
+ "step": 23610
16569
+ },
16570
+ {
16571
+ "epoch": 0.59,
16572
+ "grad_norm": 70.0,
16573
+ "learning_rate": 2.1627118644067795e-07,
16574
+ "loss": 1.5145,
16575
+ "step": 23620
16576
+ },
16577
+ {
16578
+ "epoch": 0.59,
16579
+ "grad_norm": 65.0,
16580
+ "learning_rate": 2.159322033898305e-07,
16581
+ "loss": 1.4797,
16582
+ "step": 23630
16583
+ },
16584
+ {
16585
+ "epoch": 0.59,
16586
+ "grad_norm": 66.5,
16587
+ "learning_rate": 2.1559322033898303e-07,
16588
+ "loss": 1.4935,
16589
+ "step": 23640
16590
+ },
16591
+ {
16592
+ "epoch": 0.59,
16593
+ "grad_norm": 68.5,
16594
+ "learning_rate": 2.1525423728813558e-07,
16595
+ "loss": 1.4311,
16596
+ "step": 23650
16597
+ },
16598
+ {
16599
+ "epoch": 0.59,
16600
+ "grad_norm": 66.0,
16601
+ "learning_rate": 2.149152542372881e-07,
16602
+ "loss": 1.5181,
16603
+ "step": 23660
16604
+ },
16605
+ {
16606
+ "epoch": 0.59,
16607
+ "grad_norm": 74.0,
16608
+ "learning_rate": 2.1457627118644067e-07,
16609
+ "loss": 1.4418,
16610
+ "step": 23670
16611
+ },
16612
+ {
16613
+ "epoch": 0.59,
16614
+ "grad_norm": 66.5,
16615
+ "learning_rate": 2.1423728813559322e-07,
16616
+ "loss": 1.4931,
16617
+ "step": 23680
16618
+ },
16619
+ {
16620
+ "epoch": 0.59,
16621
+ "grad_norm": 67.5,
16622
+ "learning_rate": 2.1389830508474575e-07,
16623
+ "loss": 1.4365,
16624
+ "step": 23690
16625
+ },
16626
+ {
16627
+ "epoch": 0.59,
16628
+ "grad_norm": 70.0,
16629
+ "learning_rate": 2.1355932203389828e-07,
16630
+ "loss": 1.4576,
16631
+ "step": 23700
16632
+ },
16633
+ {
16634
+ "epoch": 0.59,
16635
+ "grad_norm": 66.5,
16636
+ "learning_rate": 2.1322033898305083e-07,
16637
+ "loss": 1.4804,
16638
+ "step": 23710
16639
+ },
16640
+ {
16641
+ "epoch": 0.59,
16642
+ "grad_norm": 65.5,
16643
+ "learning_rate": 2.1288135593220338e-07,
16644
+ "loss": 1.4659,
16645
+ "step": 23720
16646
+ },
16647
+ {
16648
+ "epoch": 0.59,
16649
+ "grad_norm": 66.0,
16650
+ "learning_rate": 2.1254237288135594e-07,
16651
+ "loss": 1.4558,
16652
+ "step": 23730
16653
+ },
16654
+ {
16655
+ "epoch": 0.59,
16656
+ "grad_norm": 63.75,
16657
+ "learning_rate": 2.1220338983050847e-07,
16658
+ "loss": 1.4657,
16659
+ "step": 23740
16660
+ },
16661
+ {
16662
+ "epoch": 0.59,
16663
+ "grad_norm": 71.5,
16664
+ "learning_rate": 2.11864406779661e-07,
16665
+ "loss": 1.4803,
16666
+ "step": 23750
16667
+ },
16668
+ {
16669
+ "epoch": 0.59,
16670
+ "grad_norm": 66.5,
16671
+ "learning_rate": 2.1152542372881355e-07,
16672
+ "loss": 1.4303,
16673
+ "step": 23760
16674
+ },
16675
+ {
16676
+ "epoch": 0.59,
16677
+ "grad_norm": 66.0,
16678
+ "learning_rate": 2.111864406779661e-07,
16679
+ "loss": 1.4866,
16680
+ "step": 23770
16681
+ },
16682
+ {
16683
+ "epoch": 0.59,
16684
+ "grad_norm": 63.5,
16685
+ "learning_rate": 2.1084745762711863e-07,
16686
+ "loss": 1.4306,
16687
+ "step": 23780
16688
+ },
16689
+ {
16690
+ "epoch": 0.59,
16691
+ "grad_norm": 67.5,
16692
+ "learning_rate": 2.1050847457627118e-07,
16693
+ "loss": 1.4288,
16694
+ "step": 23790
16695
+ },
16696
+ {
16697
+ "epoch": 0.59,
16698
+ "grad_norm": 67.0,
16699
+ "learning_rate": 2.101694915254237e-07,
16700
+ "loss": 1.4637,
16701
+ "step": 23800
16702
+ },
16703
+ {
16704
+ "epoch": 0.6,
16705
+ "grad_norm": 67.5,
16706
+ "learning_rate": 2.0983050847457626e-07,
16707
+ "loss": 1.515,
16708
+ "step": 23810
16709
+ },
16710
+ {
16711
+ "epoch": 0.6,
16712
+ "grad_norm": 70.0,
16713
+ "learning_rate": 2.094915254237288e-07,
16714
+ "loss": 1.4255,
16715
+ "step": 23820
16716
+ },
16717
+ {
16718
+ "epoch": 0.6,
16719
+ "grad_norm": 67.5,
16720
+ "learning_rate": 2.0915254237288135e-07,
16721
+ "loss": 1.4935,
16722
+ "step": 23830
16723
+ },
16724
+ {
16725
+ "epoch": 0.6,
16726
+ "grad_norm": 68.5,
16727
+ "learning_rate": 2.088135593220339e-07,
16728
+ "loss": 1.5427,
16729
+ "step": 23840
16730
+ },
16731
+ {
16732
+ "epoch": 0.6,
16733
+ "grad_norm": 66.5,
16734
+ "learning_rate": 2.0847457627118643e-07,
16735
+ "loss": 1.5009,
16736
+ "step": 23850
16737
+ },
16738
+ {
16739
+ "epoch": 0.6,
16740
+ "grad_norm": 69.5,
16741
+ "learning_rate": 2.0813559322033896e-07,
16742
+ "loss": 1.5038,
16743
+ "step": 23860
16744
+ },
16745
+ {
16746
+ "epoch": 0.6,
16747
+ "grad_norm": 67.0,
16748
+ "learning_rate": 2.077966101694915e-07,
16749
+ "loss": 1.4524,
16750
+ "step": 23870
16751
+ },
16752
+ {
16753
+ "epoch": 0.6,
16754
+ "grad_norm": 69.0,
16755
+ "learning_rate": 2.0745762711864406e-07,
16756
+ "loss": 1.4575,
16757
+ "step": 23880
16758
+ },
16759
+ {
16760
+ "epoch": 0.6,
16761
+ "grad_norm": 67.0,
16762
+ "learning_rate": 2.0711864406779662e-07,
16763
+ "loss": 1.4309,
16764
+ "step": 23890
16765
+ },
16766
+ {
16767
+ "epoch": 0.6,
16768
+ "grad_norm": 70.0,
16769
+ "learning_rate": 2.0677966101694912e-07,
16770
+ "loss": 1.4585,
16771
+ "step": 23900
16772
+ },
16773
+ {
16774
+ "epoch": 0.6,
16775
+ "grad_norm": 64.0,
16776
+ "learning_rate": 2.0644067796610167e-07,
16777
+ "loss": 1.4503,
16778
+ "step": 23910
16779
+ },
16780
+ {
16781
+ "epoch": 0.6,
16782
+ "grad_norm": 71.0,
16783
+ "learning_rate": 2.0610169491525423e-07,
16784
+ "loss": 1.4801,
16785
+ "step": 23920
16786
+ },
16787
+ {
16788
+ "epoch": 0.6,
16789
+ "grad_norm": 67.5,
16790
+ "learning_rate": 2.0576271186440678e-07,
16791
+ "loss": 1.4773,
16792
+ "step": 23930
16793
+ },
16794
+ {
16795
+ "epoch": 0.6,
16796
+ "grad_norm": 66.0,
16797
+ "learning_rate": 2.054237288135593e-07,
16798
+ "loss": 1.4497,
16799
+ "step": 23940
16800
+ },
16801
+ {
16802
+ "epoch": 0.6,
16803
+ "grad_norm": 66.0,
16804
+ "learning_rate": 2.0508474576271184e-07,
16805
+ "loss": 1.4953,
16806
+ "step": 23950
16807
+ },
16808
+ {
16809
+ "epoch": 0.6,
16810
+ "grad_norm": 65.0,
16811
+ "learning_rate": 2.047457627118644e-07,
16812
+ "loss": 1.5234,
16813
+ "step": 23960
16814
+ },
16815
+ {
16816
+ "epoch": 0.6,
16817
+ "grad_norm": 67.0,
16818
+ "learning_rate": 2.0440677966101695e-07,
16819
+ "loss": 1.4482,
16820
+ "step": 23970
16821
+ },
16822
+ {
16823
+ "epoch": 0.6,
16824
+ "grad_norm": 68.0,
16825
+ "learning_rate": 2.0406779661016947e-07,
16826
+ "loss": 1.4598,
16827
+ "step": 23980
16828
+ },
16829
+ {
16830
+ "epoch": 0.6,
16831
+ "grad_norm": 67.0,
16832
+ "learning_rate": 2.0372881355932203e-07,
16833
+ "loss": 1.4573,
16834
+ "step": 23990
16835
+ },
16836
+ {
16837
+ "epoch": 0.6,
16838
+ "grad_norm": 67.0,
16839
+ "learning_rate": 2.0338983050847458e-07,
16840
+ "loss": 1.4849,
16841
+ "step": 24000
16842
+ },
16843
+ {
16844
+ "epoch": 0.6,
16845
+ "grad_norm": 68.0,
16846
+ "learning_rate": 2.030508474576271e-07,
16847
+ "loss": 1.4329,
16848
+ "step": 24010
16849
+ },
16850
+ {
16851
+ "epoch": 0.6,
16852
+ "grad_norm": 67.0,
16853
+ "learning_rate": 2.0271186440677964e-07,
16854
+ "loss": 1.481,
16855
+ "step": 24020
16856
+ },
16857
+ {
16858
+ "epoch": 0.6,
16859
+ "grad_norm": 65.0,
16860
+ "learning_rate": 2.023728813559322e-07,
16861
+ "loss": 1.4037,
16862
+ "step": 24030
16863
+ },
16864
+ {
16865
+ "epoch": 0.6,
16866
+ "grad_norm": 68.5,
16867
+ "learning_rate": 2.0203389830508475e-07,
16868
+ "loss": 1.4665,
16869
+ "step": 24040
16870
+ },
16871
+ {
16872
+ "epoch": 0.6,
16873
+ "grad_norm": 64.5,
16874
+ "learning_rate": 2.016949152542373e-07,
16875
+ "loss": 1.4291,
16876
+ "step": 24050
16877
+ },
16878
+ {
16879
+ "epoch": 0.6,
16880
+ "grad_norm": 68.5,
16881
+ "learning_rate": 2.013559322033898e-07,
16882
+ "loss": 1.4684,
16883
+ "step": 24060
16884
+ },
16885
+ {
16886
+ "epoch": 0.6,
16887
+ "grad_norm": 69.5,
16888
+ "learning_rate": 2.0101694915254235e-07,
16889
+ "loss": 1.4836,
16890
+ "step": 24070
16891
+ },
16892
+ {
16893
+ "epoch": 0.6,
16894
+ "grad_norm": 65.5,
16895
+ "learning_rate": 2.006779661016949e-07,
16896
+ "loss": 1.4914,
16897
+ "step": 24080
16898
+ },
16899
+ {
16900
+ "epoch": 0.6,
16901
+ "grad_norm": 64.0,
16902
+ "learning_rate": 2.0033898305084746e-07,
16903
+ "loss": 1.4268,
16904
+ "step": 24090
16905
+ },
16906
+ {
16907
+ "epoch": 0.6,
16908
+ "grad_norm": 66.5,
16909
+ "learning_rate": 2e-07,
16910
+ "loss": 1.5189,
16911
+ "step": 24100
16912
+ },
16913
+ {
16914
+ "epoch": 0.6,
16915
+ "grad_norm": 69.5,
16916
+ "learning_rate": 1.9966101694915252e-07,
16917
+ "loss": 1.4294,
16918
+ "step": 24110
16919
+ },
16920
+ {
16921
+ "epoch": 0.6,
16922
+ "grad_norm": 66.0,
16923
+ "learning_rate": 1.9932203389830507e-07,
16924
+ "loss": 1.4163,
16925
+ "step": 24120
16926
+ },
16927
+ {
16928
+ "epoch": 0.6,
16929
+ "grad_norm": 66.0,
16930
+ "learning_rate": 1.9898305084745763e-07,
16931
+ "loss": 1.4475,
16932
+ "step": 24130
16933
+ },
16934
+ {
16935
+ "epoch": 0.6,
16936
+ "grad_norm": 67.0,
16937
+ "learning_rate": 1.9864406779661015e-07,
16938
+ "loss": 1.4687,
16939
+ "step": 24140
16940
+ },
16941
+ {
16942
+ "epoch": 0.6,
16943
+ "grad_norm": 65.0,
16944
+ "learning_rate": 1.983050847457627e-07,
16945
+ "loss": 1.4612,
16946
+ "step": 24150
16947
+ },
16948
+ {
16949
+ "epoch": 0.6,
16950
+ "grad_norm": 66.5,
16951
+ "learning_rate": 1.9796610169491524e-07,
16952
+ "loss": 1.4849,
16953
+ "step": 24160
16954
+ },
16955
+ {
16956
+ "epoch": 0.6,
16957
+ "grad_norm": 65.0,
16958
+ "learning_rate": 1.976271186440678e-07,
16959
+ "loss": 1.461,
16960
+ "step": 24170
16961
+ },
16962
+ {
16963
+ "epoch": 0.6,
16964
+ "grad_norm": 66.0,
16965
+ "learning_rate": 1.9728813559322032e-07,
16966
+ "loss": 1.4663,
16967
+ "step": 24180
16968
+ },
16969
+ {
16970
+ "epoch": 0.6,
16971
+ "grad_norm": 68.5,
16972
+ "learning_rate": 1.9694915254237287e-07,
16973
+ "loss": 1.4987,
16974
+ "step": 24190
16975
+ },
16976
+ {
16977
+ "epoch": 0.6,
16978
+ "grad_norm": 74.0,
16979
+ "learning_rate": 1.9661016949152543e-07,
16980
+ "loss": 1.47,
16981
+ "step": 24200
16982
+ },
16983
+ {
16984
+ "epoch": 0.61,
16985
+ "grad_norm": 66.5,
16986
+ "learning_rate": 1.9627118644067795e-07,
16987
+ "loss": 1.4539,
16988
+ "step": 24210
16989
+ },
16990
+ {
16991
+ "epoch": 0.61,
16992
+ "grad_norm": 68.5,
16993
+ "learning_rate": 1.9593220338983048e-07,
16994
+ "loss": 1.4931,
16995
+ "step": 24220
16996
+ },
16997
+ {
16998
+ "epoch": 0.61,
16999
+ "grad_norm": 70.0,
17000
+ "learning_rate": 1.9559322033898304e-07,
17001
+ "loss": 1.507,
17002
+ "step": 24230
17003
+ },
17004
+ {
17005
+ "epoch": 0.61,
17006
+ "grad_norm": 69.0,
17007
+ "learning_rate": 1.952542372881356e-07,
17008
+ "loss": 1.4408,
17009
+ "step": 24240
17010
+ },
17011
+ {
17012
+ "epoch": 0.61,
17013
+ "grad_norm": 71.0,
17014
+ "learning_rate": 1.9491525423728814e-07,
17015
+ "loss": 1.4734,
17016
+ "step": 24250
17017
+ },
17018
+ {
17019
+ "epoch": 0.61,
17020
+ "grad_norm": 64.0,
17021
+ "learning_rate": 1.9457627118644067e-07,
17022
+ "loss": 1.4702,
17023
+ "step": 24260
17024
+ },
17025
+ {
17026
+ "epoch": 0.61,
17027
+ "grad_norm": 68.0,
17028
+ "learning_rate": 1.942372881355932e-07,
17029
+ "loss": 1.4114,
17030
+ "step": 24270
17031
+ },
17032
+ {
17033
+ "epoch": 0.61,
17034
+ "grad_norm": 67.0,
17035
+ "learning_rate": 1.9389830508474575e-07,
17036
+ "loss": 1.4777,
17037
+ "step": 24280
17038
+ },
17039
+ {
17040
+ "epoch": 0.61,
17041
+ "grad_norm": 65.5,
17042
+ "learning_rate": 1.935593220338983e-07,
17043
+ "loss": 1.4162,
17044
+ "step": 24290
17045
+ },
17046
+ {
17047
+ "epoch": 0.61,
17048
+ "grad_norm": 68.0,
17049
+ "learning_rate": 1.9322033898305084e-07,
17050
+ "loss": 1.4881,
17051
+ "step": 24300
17052
+ },
17053
+ {
17054
+ "epoch": 0.61,
17055
+ "grad_norm": 65.0,
17056
+ "learning_rate": 1.928813559322034e-07,
17057
+ "loss": 1.4465,
17058
+ "step": 24310
17059
+ },
17060
+ {
17061
+ "epoch": 0.61,
17062
+ "grad_norm": 65.5,
17063
+ "learning_rate": 1.9254237288135592e-07,
17064
+ "loss": 1.4648,
17065
+ "step": 24320
17066
+ },
17067
+ {
17068
+ "epoch": 0.61,
17069
+ "grad_norm": 64.5,
17070
+ "learning_rate": 1.9220338983050847e-07,
17071
+ "loss": 1.4391,
17072
+ "step": 24330
17073
+ },
17074
+ {
17075
+ "epoch": 0.61,
17076
+ "grad_norm": 71.5,
17077
+ "learning_rate": 1.91864406779661e-07,
17078
+ "loss": 1.488,
17079
+ "step": 24340
17080
+ },
17081
+ {
17082
+ "epoch": 0.61,
17083
+ "grad_norm": 65.5,
17084
+ "learning_rate": 1.9152542372881355e-07,
17085
+ "loss": 1.4339,
17086
+ "step": 24350
17087
+ },
17088
+ {
17089
+ "epoch": 0.61,
17090
+ "grad_norm": 68.5,
17091
+ "learning_rate": 1.911864406779661e-07,
17092
+ "loss": 1.4563,
17093
+ "step": 24360
17094
+ },
17095
+ {
17096
+ "epoch": 0.61,
17097
+ "grad_norm": 69.5,
17098
+ "learning_rate": 1.9084745762711864e-07,
17099
+ "loss": 1.4788,
17100
+ "step": 24370
17101
+ },
17102
+ {
17103
+ "epoch": 0.61,
17104
+ "grad_norm": 66.0,
17105
+ "learning_rate": 1.9050847457627116e-07,
17106
+ "loss": 1.4948,
17107
+ "step": 24380
17108
+ },
17109
+ {
17110
+ "epoch": 0.61,
17111
+ "grad_norm": 66.5,
17112
+ "learning_rate": 1.9016949152542372e-07,
17113
+ "loss": 1.4515,
17114
+ "step": 24390
17115
+ },
17116
+ {
17117
+ "epoch": 0.61,
17118
+ "grad_norm": 65.5,
17119
+ "learning_rate": 1.8983050847457627e-07,
17120
+ "loss": 1.4362,
17121
+ "step": 24400
17122
+ },
17123
+ {
17124
+ "epoch": 0.61,
17125
+ "grad_norm": 69.0,
17126
+ "learning_rate": 1.8949152542372883e-07,
17127
+ "loss": 1.4519,
17128
+ "step": 24410
17129
+ },
17130
+ {
17131
+ "epoch": 0.61,
17132
+ "grad_norm": 67.5,
17133
+ "learning_rate": 1.8915254237288133e-07,
17134
+ "loss": 1.4351,
17135
+ "step": 24420
17136
+ },
17137
+ {
17138
+ "epoch": 0.61,
17139
+ "grad_norm": 68.5,
17140
+ "learning_rate": 1.8881355932203388e-07,
17141
+ "loss": 1.4866,
17142
+ "step": 24430
17143
+ },
17144
+ {
17145
+ "epoch": 0.61,
17146
+ "grad_norm": 66.0,
17147
+ "learning_rate": 1.8847457627118643e-07,
17148
+ "loss": 1.4731,
17149
+ "step": 24440
17150
+ },
17151
+ {
17152
+ "epoch": 0.61,
17153
+ "grad_norm": 66.0,
17154
+ "learning_rate": 1.88135593220339e-07,
17155
+ "loss": 1.4202,
17156
+ "step": 24450
17157
+ },
17158
+ {
17159
+ "epoch": 0.61,
17160
+ "grad_norm": 67.0,
17161
+ "learning_rate": 1.8779661016949152e-07,
17162
+ "loss": 1.4834,
17163
+ "step": 24460
17164
+ },
17165
+ {
17166
+ "epoch": 0.61,
17167
+ "grad_norm": 69.5,
17168
+ "learning_rate": 1.8745762711864404e-07,
17169
+ "loss": 1.4601,
17170
+ "step": 24470
17171
+ },
17172
+ {
17173
+ "epoch": 0.61,
17174
+ "grad_norm": 69.0,
17175
+ "learning_rate": 1.871186440677966e-07,
17176
+ "loss": 1.4693,
17177
+ "step": 24480
17178
+ },
17179
+ {
17180
+ "epoch": 0.61,
17181
+ "grad_norm": 65.0,
17182
+ "learning_rate": 1.8677966101694915e-07,
17183
+ "loss": 1.4383,
17184
+ "step": 24490
17185
+ },
17186
+ {
17187
+ "epoch": 0.61,
17188
+ "grad_norm": 63.75,
17189
+ "learning_rate": 1.8644067796610168e-07,
17190
+ "loss": 1.4926,
17191
+ "step": 24500
17192
+ },
17193
+ {
17194
+ "epoch": 0.61,
17195
+ "grad_norm": 63.0,
17196
+ "learning_rate": 1.8610169491525423e-07,
17197
+ "loss": 1.4355,
17198
+ "step": 24510
17199
+ },
17200
+ {
17201
+ "epoch": 0.61,
17202
+ "grad_norm": 64.5,
17203
+ "learning_rate": 1.857627118644068e-07,
17204
+ "loss": 1.474,
17205
+ "step": 24520
17206
+ },
17207
+ {
17208
+ "epoch": 0.61,
17209
+ "grad_norm": 67.5,
17210
+ "learning_rate": 1.8542372881355932e-07,
17211
+ "loss": 1.4319,
17212
+ "step": 24530
17213
+ },
17214
+ {
17215
+ "epoch": 0.61,
17216
+ "grad_norm": 68.5,
17217
+ "learning_rate": 1.8508474576271184e-07,
17218
+ "loss": 1.469,
17219
+ "step": 24540
17220
+ },
17221
+ {
17222
+ "epoch": 0.61,
17223
+ "grad_norm": 66.0,
17224
+ "learning_rate": 1.847457627118644e-07,
17225
+ "loss": 1.4818,
17226
+ "step": 24550
17227
+ },
17228
+ {
17229
+ "epoch": 0.61,
17230
+ "grad_norm": 72.5,
17231
+ "learning_rate": 1.8440677966101695e-07,
17232
+ "loss": 1.4986,
17233
+ "step": 24560
17234
+ },
17235
+ {
17236
+ "epoch": 0.61,
17237
+ "grad_norm": 64.5,
17238
+ "learning_rate": 1.840677966101695e-07,
17239
+ "loss": 1.4524,
17240
+ "step": 24570
17241
+ },
17242
+ {
17243
+ "epoch": 0.61,
17244
+ "grad_norm": 65.0,
17245
+ "learning_rate": 1.83728813559322e-07,
17246
+ "loss": 1.5097,
17247
+ "step": 24580
17248
+ },
17249
+ {
17250
+ "epoch": 0.61,
17251
+ "grad_norm": 68.5,
17252
+ "learning_rate": 1.8338983050847456e-07,
17253
+ "loss": 1.4928,
17254
+ "step": 24590
17255
+ },
17256
+ {
17257
+ "epoch": 0.61,
17258
+ "grad_norm": 67.5,
17259
+ "learning_rate": 1.8305084745762712e-07,
17260
+ "loss": 1.4628,
17261
+ "step": 24600
17262
+ },
17263
+ {
17264
+ "epoch": 0.62,
17265
+ "grad_norm": 62.75,
17266
+ "learning_rate": 1.8271186440677967e-07,
17267
+ "loss": 1.4052,
17268
+ "step": 24610
17269
+ },
17270
+ {
17271
+ "epoch": 0.62,
17272
+ "grad_norm": 67.0,
17273
+ "learning_rate": 1.823728813559322e-07,
17274
+ "loss": 1.4308,
17275
+ "step": 24620
17276
+ },
17277
+ {
17278
+ "epoch": 0.62,
17279
+ "grad_norm": 70.5,
17280
+ "learning_rate": 1.8203389830508473e-07,
17281
+ "loss": 1.5198,
17282
+ "step": 24630
17283
+ },
17284
+ {
17285
+ "epoch": 0.62,
17286
+ "grad_norm": 65.5,
17287
+ "learning_rate": 1.8169491525423728e-07,
17288
+ "loss": 1.5095,
17289
+ "step": 24640
17290
+ },
17291
+ {
17292
+ "epoch": 0.62,
17293
+ "grad_norm": 67.0,
17294
+ "learning_rate": 1.8135593220338983e-07,
17295
+ "loss": 1.4858,
17296
+ "step": 24650
17297
+ },
17298
+ {
17299
+ "epoch": 0.62,
17300
+ "grad_norm": 69.0,
17301
+ "learning_rate": 1.8101694915254236e-07,
17302
+ "loss": 1.4812,
17303
+ "step": 24660
17304
+ },
17305
+ {
17306
+ "epoch": 0.62,
17307
+ "grad_norm": 66.0,
17308
+ "learning_rate": 1.8067796610169492e-07,
17309
+ "loss": 1.4465,
17310
+ "step": 24670
17311
+ },
17312
+ {
17313
+ "epoch": 0.62,
17314
+ "grad_norm": 65.5,
17315
+ "learning_rate": 1.8033898305084744e-07,
17316
+ "loss": 1.4647,
17317
+ "step": 24680
17318
+ },
17319
+ {
17320
+ "epoch": 0.62,
17321
+ "grad_norm": 66.5,
17322
+ "learning_rate": 1.8e-07,
17323
+ "loss": 1.5012,
17324
+ "step": 24690
17325
+ },
17326
+ {
17327
+ "epoch": 0.62,
17328
+ "grad_norm": 68.0,
17329
+ "learning_rate": 1.7966101694915252e-07,
17330
+ "loss": 1.4985,
17331
+ "step": 24700
17332
+ },
17333
+ {
17334
+ "epoch": 0.62,
17335
+ "grad_norm": 64.5,
17336
+ "learning_rate": 1.7932203389830508e-07,
17337
+ "loss": 1.4605,
17338
+ "step": 24710
17339
+ },
17340
+ {
17341
+ "epoch": 0.62,
17342
+ "grad_norm": 67.0,
17343
+ "learning_rate": 1.7898305084745763e-07,
17344
+ "loss": 1.4714,
17345
+ "step": 24720
17346
+ },
17347
+ {
17348
+ "epoch": 0.62,
17349
+ "grad_norm": 68.0,
17350
+ "learning_rate": 1.7864406779661016e-07,
17351
+ "loss": 1.45,
17352
+ "step": 24730
17353
+ },
17354
+ {
17355
+ "epoch": 0.62,
17356
+ "grad_norm": 67.0,
17357
+ "learning_rate": 1.783050847457627e-07,
17358
+ "loss": 1.4958,
17359
+ "step": 24740
17360
+ },
17361
+ {
17362
+ "epoch": 0.62,
17363
+ "grad_norm": 69.0,
17364
+ "learning_rate": 1.7796610169491524e-07,
17365
+ "loss": 1.4518,
17366
+ "step": 24750
17367
+ },
17368
+ {
17369
+ "epoch": 0.62,
17370
+ "grad_norm": 65.5,
17371
+ "learning_rate": 1.776271186440678e-07,
17372
+ "loss": 1.4454,
17373
+ "step": 24760
17374
+ },
17375
+ {
17376
+ "epoch": 0.62,
17377
+ "grad_norm": 66.5,
17378
+ "learning_rate": 1.7728813559322035e-07,
17379
+ "loss": 1.4836,
17380
+ "step": 24770
17381
+ },
17382
+ {
17383
+ "epoch": 0.62,
17384
+ "grad_norm": 64.5,
17385
+ "learning_rate": 1.7694915254237288e-07,
17386
+ "loss": 1.4561,
17387
+ "step": 24780
17388
+ },
17389
+ {
17390
+ "epoch": 0.62,
17391
+ "grad_norm": 66.0,
17392
+ "learning_rate": 1.766101694915254e-07,
17393
+ "loss": 1.4442,
17394
+ "step": 24790
17395
+ },
17396
+ {
17397
+ "epoch": 0.62,
17398
+ "grad_norm": 64.5,
17399
+ "learning_rate": 1.7627118644067796e-07,
17400
+ "loss": 1.4462,
17401
+ "step": 24800
17402
+ },
17403
+ {
17404
+ "epoch": 0.62,
17405
+ "grad_norm": 66.0,
17406
+ "learning_rate": 1.7593220338983051e-07,
17407
+ "loss": 1.5105,
17408
+ "step": 24810
17409
+ },
17410
+ {
17411
+ "epoch": 0.62,
17412
+ "grad_norm": 66.0,
17413
+ "learning_rate": 1.7559322033898304e-07,
17414
+ "loss": 1.4328,
17415
+ "step": 24820
17416
+ },
17417
+ {
17418
+ "epoch": 0.62,
17419
+ "grad_norm": 66.0,
17420
+ "learning_rate": 1.752542372881356e-07,
17421
+ "loss": 1.4657,
17422
+ "step": 24830
17423
+ },
17424
+ {
17425
+ "epoch": 0.62,
17426
+ "grad_norm": 68.5,
17427
+ "learning_rate": 1.7491525423728812e-07,
17428
+ "loss": 1.4911,
17429
+ "step": 24840
17430
+ },
17431
+ {
17432
+ "epoch": 0.62,
17433
+ "grad_norm": 65.0,
17434
+ "learning_rate": 1.7457627118644068e-07,
17435
+ "loss": 1.4936,
17436
+ "step": 24850
17437
+ },
17438
+ {
17439
+ "epoch": 0.62,
17440
+ "grad_norm": 66.5,
17441
+ "learning_rate": 1.742372881355932e-07,
17442
+ "loss": 1.4486,
17443
+ "step": 24860
17444
+ },
17445
+ {
17446
+ "epoch": 0.62,
17447
+ "grad_norm": 65.0,
17448
+ "learning_rate": 1.7389830508474576e-07,
17449
+ "loss": 1.4143,
17450
+ "step": 24870
17451
+ },
17452
+ {
17453
+ "epoch": 0.62,
17454
+ "grad_norm": 67.0,
17455
+ "learning_rate": 1.7355932203389831e-07,
17456
+ "loss": 1.4867,
17457
+ "step": 24880
17458
+ },
17459
+ {
17460
+ "epoch": 0.62,
17461
+ "grad_norm": 66.5,
17462
+ "learning_rate": 1.7322033898305084e-07,
17463
+ "loss": 1.4848,
17464
+ "step": 24890
17465
+ },
17466
+ {
17467
+ "epoch": 0.62,
17468
+ "grad_norm": 67.0,
17469
+ "learning_rate": 1.7288135593220337e-07,
17470
+ "loss": 1.4092,
17471
+ "step": 24900
17472
+ },
17473
+ {
17474
+ "epoch": 0.62,
17475
+ "grad_norm": 66.5,
17476
+ "learning_rate": 1.7254237288135592e-07,
17477
+ "loss": 1.4708,
17478
+ "step": 24910
17479
+ },
17480
+ {
17481
+ "epoch": 0.62,
17482
+ "grad_norm": 65.5,
17483
+ "learning_rate": 1.7220338983050848e-07,
17484
+ "loss": 1.4355,
17485
+ "step": 24920
17486
+ },
17487
+ {
17488
+ "epoch": 0.62,
17489
+ "grad_norm": 67.0,
17490
+ "learning_rate": 1.7186440677966103e-07,
17491
+ "loss": 1.4986,
17492
+ "step": 24930
17493
+ },
17494
+ {
17495
+ "epoch": 0.62,
17496
+ "grad_norm": 68.5,
17497
+ "learning_rate": 1.7152542372881353e-07,
17498
+ "loss": 1.5264,
17499
+ "step": 24940
17500
+ },
17501
+ {
17502
+ "epoch": 0.62,
17503
+ "grad_norm": 71.5,
17504
+ "learning_rate": 1.711864406779661e-07,
17505
+ "loss": 1.5117,
17506
+ "step": 24950
17507
+ },
17508
+ {
17509
+ "epoch": 0.62,
17510
+ "grad_norm": 68.5,
17511
+ "learning_rate": 1.7084745762711864e-07,
17512
+ "loss": 1.4339,
17513
+ "step": 24960
17514
+ },
17515
+ {
17516
+ "epoch": 0.62,
17517
+ "grad_norm": 67.0,
17518
+ "learning_rate": 1.705084745762712e-07,
17519
+ "loss": 1.4962,
17520
+ "step": 24970
17521
+ },
17522
+ {
17523
+ "epoch": 0.62,
17524
+ "grad_norm": 66.5,
17525
+ "learning_rate": 1.7016949152542372e-07,
17526
+ "loss": 1.4068,
17527
+ "step": 24980
17528
+ },
17529
+ {
17530
+ "epoch": 0.62,
17531
+ "grad_norm": 67.5,
17532
+ "learning_rate": 1.6983050847457625e-07,
17533
+ "loss": 1.4685,
17534
+ "step": 24990
17535
+ },
17536
+ {
17537
+ "epoch": 0.62,
17538
+ "grad_norm": 67.0,
17539
+ "learning_rate": 1.694915254237288e-07,
17540
+ "loss": 1.4548,
17541
+ "step": 25000
17542
+ },
17543
+ {
17544
+ "epoch": 0.62,
17545
+ "eval_loss": 1.466233730316162,
17546
+ "eval_runtime": 68.2791,
17547
+ "eval_samples_per_second": 14.646,
17548
+ "eval_steps_per_second": 14.646,
17549
+ "step": 25000
17550
  }
17551
  ],
17552
  "logging_steps": 10,
 
17554
  "num_input_tokens_seen": 0,
17555
  "num_train_epochs": 1,
17556
  "save_steps": 5000,
17557
+ "total_flos": 4.03480510464e+17,
17558
  "train_batch_size": 1,
17559
  "trial_name": null,
17560
  "trial_params": null