gabrielaltay commited on
Commit
1203cbb
1 Parent(s): 2809865

Training in progress, step 18216, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2f0dbf644a9677e79e68a1d141370f9fc19d80aafd4fa3703ba7daaf4679cdf
3
  size 500389884
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da8378f58afdcbb61da5aab016c87b360ba3b97ce5895a57c14de5629f844194
3
  size 500389884
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f9ef24481a5454737bb36638668624276fc1de872bfb96458291412f8b836da
3
  size 1000900218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95db74dc8b3eeeb846cb4c4e9addbe7241924481ae96c22081d519fcd26cbfc
3
  size 1000900218
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ea443f9780df3e4f55b7a8fe78f5f54f5f5faaadff43eb645514989afd8f776
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f61f7e51cc603cdd137a87be15b659d00af22f66c4e5d529fd92a34ec1a2fc
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bde8530504f368a95ffd37dfe12eaf937597762fada8d945ae1e071babd54fa
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04bf168981df42daaf3298864d910e1018e869d302c5844d03dbfe8eb4c3e75
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5539ffd5320a73f47cc2092b2ac4690b8e116f5ffa1613c0c9ef2d49e29add72
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d863b1e8dd7aadad275d56a5330082a7af839c16da6482be7ffc6e9ccd5d7a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8000395276446465,
5
  "eval_steps": 500,
6
- "global_step": 16192,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -28343,6 +28343,3548 @@
28343
  "learning_rate": 1.0007905528929295e-05,
28344
  "loss": 5.4911,
28345
  "step": 16192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28346
  }
28347
  ],
28348
  "logging_steps": 4,
@@ -28350,7 +31892,7 @@
28350
  "num_input_tokens_seen": 0,
28351
  "num_train_epochs": 1,
28352
  "save_steps": 2024,
28353
- "total_flos": 6.820471143346995e+16,
28354
  "train_batch_size": 8,
28355
  "trial_name": null,
28356
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9000444686002272,
5
  "eval_steps": 500,
6
+ "global_step": 18216,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
28343
  "learning_rate": 1.0007905528929295e-05,
28344
  "loss": 5.4911,
28345
  "step": 16192
28346
+ },
28347
+ {
28348
+ "epoch": 0.8,
28349
+ "grad_norm": 2.1323323249816895,
28350
+ "learning_rate": 9.998023617767677e-06,
28351
+ "loss": 5.5276,
28352
+ "step": 16196
28353
+ },
28354
+ {
28355
+ "epoch": 0.8,
28356
+ "grad_norm": 2.277160167694092,
28357
+ "learning_rate": 9.988141706606058e-06,
28358
+ "loss": 5.4718,
28359
+ "step": 16200
28360
+ },
28361
+ {
28362
+ "epoch": 0.8,
28363
+ "grad_norm": 1.8769733905792236,
28364
+ "learning_rate": 9.97825979544444e-06,
28365
+ "loss": 5.3622,
28366
+ "step": 16204
28367
+ },
28368
+ {
28369
+ "epoch": 0.8,
28370
+ "grad_norm": 1.8731465339660645,
28371
+ "learning_rate": 9.96837788428282e-06,
28372
+ "loss": 5.5215,
28373
+ "step": 16208
28374
+ },
28375
+ {
28376
+ "epoch": 0.8,
28377
+ "grad_norm": 1.8808999061584473,
28378
+ "learning_rate": 9.958495973121202e-06,
28379
+ "loss": 5.3953,
28380
+ "step": 16212
28381
+ },
28382
+ {
28383
+ "epoch": 0.8,
28384
+ "grad_norm": 1.8657152652740479,
28385
+ "learning_rate": 9.948614061959584e-06,
28386
+ "loss": 5.3316,
28387
+ "step": 16216
28388
+ },
28389
+ {
28390
+ "epoch": 0.8,
28391
+ "grad_norm": 1.849173665046692,
28392
+ "learning_rate": 9.938732150797965e-06,
28393
+ "loss": 5.4471,
28394
+ "step": 16220
28395
+ },
28396
+ {
28397
+ "epoch": 0.8,
28398
+ "grad_norm": 2.220717668533325,
28399
+ "learning_rate": 9.928850239636347e-06,
28400
+ "loss": 5.5204,
28401
+ "step": 16224
28402
+ },
28403
+ {
28404
+ "epoch": 0.8,
28405
+ "grad_norm": 2.0210342407226562,
28406
+ "learning_rate": 9.918968328474727e-06,
28407
+ "loss": 5.3364,
28408
+ "step": 16228
28409
+ },
28410
+ {
28411
+ "epoch": 0.8,
28412
+ "grad_norm": 1.9695372581481934,
28413
+ "learning_rate": 9.909086417313109e-06,
28414
+ "loss": 5.485,
28415
+ "step": 16232
28416
+ },
28417
+ {
28418
+ "epoch": 0.8,
28419
+ "grad_norm": 1.875001311302185,
28420
+ "learning_rate": 9.89920450615149e-06,
28421
+ "loss": 5.3977,
28422
+ "step": 16236
28423
+ },
28424
+ {
28425
+ "epoch": 0.8,
28426
+ "grad_norm": 2.136852741241455,
28427
+ "learning_rate": 9.889322594989871e-06,
28428
+ "loss": 5.5784,
28429
+ "step": 16240
28430
+ },
28431
+ {
28432
+ "epoch": 0.8,
28433
+ "grad_norm": 1.8972970247268677,
28434
+ "learning_rate": 9.879440683828253e-06,
28435
+ "loss": 5.4973,
28436
+ "step": 16244
28437
+ },
28438
+ {
28439
+ "epoch": 0.8,
28440
+ "grad_norm": 2.14034366607666,
28441
+ "learning_rate": 9.869558772666634e-06,
28442
+ "loss": 5.546,
28443
+ "step": 16248
28444
+ },
28445
+ {
28446
+ "epoch": 0.8,
28447
+ "grad_norm": 2.0028293132781982,
28448
+ "learning_rate": 9.859676861505016e-06,
28449
+ "loss": 5.3869,
28450
+ "step": 16252
28451
+ },
28452
+ {
28453
+ "epoch": 0.8,
28454
+ "grad_norm": 1.9485490322113037,
28455
+ "learning_rate": 9.849794950343396e-06,
28456
+ "loss": 5.4852,
28457
+ "step": 16256
28458
+ },
28459
+ {
28460
+ "epoch": 0.8,
28461
+ "grad_norm": 1.9260238409042358,
28462
+ "learning_rate": 9.839913039181778e-06,
28463
+ "loss": 5.473,
28464
+ "step": 16260
28465
+ },
28466
+ {
28467
+ "epoch": 0.8,
28468
+ "grad_norm": 2.1630001068115234,
28469
+ "learning_rate": 9.830031128020159e-06,
28470
+ "loss": 5.4207,
28471
+ "step": 16264
28472
+ },
28473
+ {
28474
+ "epoch": 0.8,
28475
+ "grad_norm": 1.9328508377075195,
28476
+ "learning_rate": 9.82014921685854e-06,
28477
+ "loss": 5.4433,
28478
+ "step": 16268
28479
+ },
28480
+ {
28481
+ "epoch": 0.8,
28482
+ "grad_norm": 1.9832642078399658,
28483
+ "learning_rate": 9.810267305696923e-06,
28484
+ "loss": 5.4387,
28485
+ "step": 16272
28486
+ },
28487
+ {
28488
+ "epoch": 0.8,
28489
+ "grad_norm": 2.0616111755371094,
28490
+ "learning_rate": 9.800385394535303e-06,
28491
+ "loss": 5.3895,
28492
+ "step": 16276
28493
+ },
28494
+ {
28495
+ "epoch": 0.8,
28496
+ "grad_norm": 2.1709606647491455,
28497
+ "learning_rate": 9.790503483373685e-06,
28498
+ "loss": 5.5639,
28499
+ "step": 16280
28500
+ },
28501
+ {
28502
+ "epoch": 0.8,
28503
+ "grad_norm": 2.1553335189819336,
28504
+ "learning_rate": 9.780621572212066e-06,
28505
+ "loss": 5.5419,
28506
+ "step": 16284
28507
+ },
28508
+ {
28509
+ "epoch": 0.8,
28510
+ "grad_norm": 1.7869793176651,
28511
+ "learning_rate": 9.770739661050448e-06,
28512
+ "loss": 5.4425,
28513
+ "step": 16288
28514
+ },
28515
+ {
28516
+ "epoch": 0.8,
28517
+ "grad_norm": 1.902388095855713,
28518
+ "learning_rate": 9.760857749888828e-06,
28519
+ "loss": 5.3651,
28520
+ "step": 16292
28521
+ },
28522
+ {
28523
+ "epoch": 0.81,
28524
+ "grad_norm": 2.135723829269409,
28525
+ "learning_rate": 9.750975838727212e-06,
28526
+ "loss": 5.5838,
28527
+ "step": 16296
28528
+ },
28529
+ {
28530
+ "epoch": 0.81,
28531
+ "grad_norm": 2.3749701976776123,
28532
+ "learning_rate": 9.741093927565592e-06,
28533
+ "loss": 5.5562,
28534
+ "step": 16300
28535
+ },
28536
+ {
28537
+ "epoch": 0.81,
28538
+ "grad_norm": 1.9421731233596802,
28539
+ "learning_rate": 9.731212016403973e-06,
28540
+ "loss": 5.432,
28541
+ "step": 16304
28542
+ },
28543
+ {
28544
+ "epoch": 0.81,
28545
+ "grad_norm": 1.87296462059021,
28546
+ "learning_rate": 9.721330105242355e-06,
28547
+ "loss": 5.5429,
28548
+ "step": 16308
28549
+ },
28550
+ {
28551
+ "epoch": 0.81,
28552
+ "grad_norm": 2.0159077644348145,
28553
+ "learning_rate": 9.711448194080735e-06,
28554
+ "loss": 5.5088,
28555
+ "step": 16312
28556
+ },
28557
+ {
28558
+ "epoch": 0.81,
28559
+ "grad_norm": 1.9539657831192017,
28560
+ "learning_rate": 9.701566282919117e-06,
28561
+ "loss": 5.49,
28562
+ "step": 16316
28563
+ },
28564
+ {
28565
+ "epoch": 0.81,
28566
+ "grad_norm": 2.0840256214141846,
28567
+ "learning_rate": 9.691684371757497e-06,
28568
+ "loss": 5.4972,
28569
+ "step": 16320
28570
+ },
28571
+ {
28572
+ "epoch": 0.81,
28573
+ "grad_norm": 2.0817222595214844,
28574
+ "learning_rate": 9.68180246059588e-06,
28575
+ "loss": 5.5185,
28576
+ "step": 16324
28577
+ },
28578
+ {
28579
+ "epoch": 0.81,
28580
+ "grad_norm": 2.172551393508911,
28581
+ "learning_rate": 9.671920549434262e-06,
28582
+ "loss": 5.5465,
28583
+ "step": 16328
28584
+ },
28585
+ {
28586
+ "epoch": 0.81,
28587
+ "grad_norm": 1.8779146671295166,
28588
+ "learning_rate": 9.662038638272644e-06,
28589
+ "loss": 5.5047,
28590
+ "step": 16332
28591
+ },
28592
+ {
28593
+ "epoch": 0.81,
28594
+ "grad_norm": 1.9017332792282104,
28595
+ "learning_rate": 9.652156727111024e-06,
28596
+ "loss": 5.4705,
28597
+ "step": 16336
28598
+ },
28599
+ {
28600
+ "epoch": 0.81,
28601
+ "grad_norm": 2.0198209285736084,
28602
+ "learning_rate": 9.642274815949404e-06,
28603
+ "loss": 5.4886,
28604
+ "step": 16340
28605
+ },
28606
+ {
28607
+ "epoch": 0.81,
28608
+ "grad_norm": 2.0686516761779785,
28609
+ "learning_rate": 9.632392904787786e-06,
28610
+ "loss": 5.5188,
28611
+ "step": 16344
28612
+ },
28613
+ {
28614
+ "epoch": 0.81,
28615
+ "grad_norm": 1.962902545928955,
28616
+ "learning_rate": 9.622510993626167e-06,
28617
+ "loss": 5.5414,
28618
+ "step": 16348
28619
+ },
28620
+ {
28621
+ "epoch": 0.81,
28622
+ "grad_norm": 2.0159122943878174,
28623
+ "learning_rate": 9.612629082464549e-06,
28624
+ "loss": 5.422,
28625
+ "step": 16352
28626
+ },
28627
+ {
28628
+ "epoch": 0.81,
28629
+ "grad_norm": 2.2383811473846436,
28630
+ "learning_rate": 9.60274717130293e-06,
28631
+ "loss": 5.5384,
28632
+ "step": 16356
28633
+ },
28634
+ {
28635
+ "epoch": 0.81,
28636
+ "grad_norm": 1.993220567703247,
28637
+ "learning_rate": 9.592865260141313e-06,
28638
+ "loss": 5.3551,
28639
+ "step": 16360
28640
+ },
28641
+ {
28642
+ "epoch": 0.81,
28643
+ "grad_norm": 2.130995035171509,
28644
+ "learning_rate": 9.582983348979693e-06,
28645
+ "loss": 5.4926,
28646
+ "step": 16364
28647
+ },
28648
+ {
28649
+ "epoch": 0.81,
28650
+ "grad_norm": 1.9019581079483032,
28651
+ "learning_rate": 9.573101437818074e-06,
28652
+ "loss": 5.5623,
28653
+ "step": 16368
28654
+ },
28655
+ {
28656
+ "epoch": 0.81,
28657
+ "grad_norm": 2.1175732612609863,
28658
+ "learning_rate": 9.563219526656456e-06,
28659
+ "loss": 5.4529,
28660
+ "step": 16372
28661
+ },
28662
+ {
28663
+ "epoch": 0.81,
28664
+ "grad_norm": 1.8680050373077393,
28665
+ "learning_rate": 9.553337615494836e-06,
28666
+ "loss": 5.4273,
28667
+ "step": 16376
28668
+ },
28669
+ {
28670
+ "epoch": 0.81,
28671
+ "grad_norm": 2.0988821983337402,
28672
+ "learning_rate": 9.543455704333218e-06,
28673
+ "loss": 5.477,
28674
+ "step": 16380
28675
+ },
28676
+ {
28677
+ "epoch": 0.81,
28678
+ "grad_norm": 2.198651075363159,
28679
+ "learning_rate": 9.5335737931716e-06,
28680
+ "loss": 5.5427,
28681
+ "step": 16384
28682
+ },
28683
+ {
28684
+ "epoch": 0.81,
28685
+ "grad_norm": 2.1054868698120117,
28686
+ "learning_rate": 9.523691882009982e-06,
28687
+ "loss": 5.4173,
28688
+ "step": 16388
28689
+ },
28690
+ {
28691
+ "epoch": 0.81,
28692
+ "grad_norm": 2.0211637020111084,
28693
+ "learning_rate": 9.513809970848363e-06,
28694
+ "loss": 5.6035,
28695
+ "step": 16392
28696
+ },
28697
+ {
28698
+ "epoch": 0.81,
28699
+ "grad_norm": 2.0547540187835693,
28700
+ "learning_rate": 9.503928059686745e-06,
28701
+ "loss": 5.4983,
28702
+ "step": 16396
28703
+ },
28704
+ {
28705
+ "epoch": 0.81,
28706
+ "grad_norm": 1.9957647323608398,
28707
+ "learning_rate": 9.494046148525125e-06,
28708
+ "loss": 5.5374,
28709
+ "step": 16400
28710
+ },
28711
+ {
28712
+ "epoch": 0.81,
28713
+ "grad_norm": 2.0535998344421387,
28714
+ "learning_rate": 9.484164237363505e-06,
28715
+ "loss": 5.5643,
28716
+ "step": 16404
28717
+ },
28718
+ {
28719
+ "epoch": 0.81,
28720
+ "grad_norm": 2.1046228408813477,
28721
+ "learning_rate": 9.474282326201887e-06,
28722
+ "loss": 5.4727,
28723
+ "step": 16408
28724
+ },
28725
+ {
28726
+ "epoch": 0.81,
28727
+ "grad_norm": 2.1698873043060303,
28728
+ "learning_rate": 9.46440041504027e-06,
28729
+ "loss": 5.5637,
28730
+ "step": 16412
28731
+ },
28732
+ {
28733
+ "epoch": 0.81,
28734
+ "grad_norm": 1.9448730945587158,
28735
+ "learning_rate": 9.454518503878652e-06,
28736
+ "loss": 5.4932,
28737
+ "step": 16416
28738
+ },
28739
+ {
28740
+ "epoch": 0.81,
28741
+ "grad_norm": 2.0663201808929443,
28742
+ "learning_rate": 9.444636592717032e-06,
28743
+ "loss": 5.5067,
28744
+ "step": 16420
28745
+ },
28746
+ {
28747
+ "epoch": 0.81,
28748
+ "grad_norm": 1.8125361204147339,
28749
+ "learning_rate": 9.434754681555414e-06,
28750
+ "loss": 5.5381,
28751
+ "step": 16424
28752
+ },
28753
+ {
28754
+ "epoch": 0.81,
28755
+ "grad_norm": 2.077420473098755,
28756
+ "learning_rate": 9.424872770393794e-06,
28757
+ "loss": 5.5642,
28758
+ "step": 16428
28759
+ },
28760
+ {
28761
+ "epoch": 0.81,
28762
+ "grad_norm": 1.9312140941619873,
28763
+ "learning_rate": 9.414990859232176e-06,
28764
+ "loss": 5.3661,
28765
+ "step": 16432
28766
+ },
28767
+ {
28768
+ "epoch": 0.81,
28769
+ "grad_norm": 1.8529763221740723,
28770
+ "learning_rate": 9.405108948070557e-06,
28771
+ "loss": 5.4776,
28772
+ "step": 16436
28773
+ },
28774
+ {
28775
+ "epoch": 0.81,
28776
+ "grad_norm": 1.8764407634735107,
28777
+ "learning_rate": 9.395227036908939e-06,
28778
+ "loss": 5.4928,
28779
+ "step": 16440
28780
+ },
28781
+ {
28782
+ "epoch": 0.81,
28783
+ "grad_norm": 2.139594316482544,
28784
+ "learning_rate": 9.385345125747321e-06,
28785
+ "loss": 5.4108,
28786
+ "step": 16444
28787
+ },
28788
+ {
28789
+ "epoch": 0.81,
28790
+ "grad_norm": 2.1105079650878906,
28791
+ "learning_rate": 9.375463214585701e-06,
28792
+ "loss": 5.4876,
28793
+ "step": 16448
28794
+ },
28795
+ {
28796
+ "epoch": 0.81,
28797
+ "grad_norm": 1.9484140872955322,
28798
+ "learning_rate": 9.365581303424083e-06,
28799
+ "loss": 5.4148,
28800
+ "step": 16452
28801
+ },
28802
+ {
28803
+ "epoch": 0.81,
28804
+ "grad_norm": 2.0571391582489014,
28805
+ "learning_rate": 9.355699392262464e-06,
28806
+ "loss": 5.6282,
28807
+ "step": 16456
28808
+ },
28809
+ {
28810
+ "epoch": 0.81,
28811
+ "grad_norm": 1.7625576257705688,
28812
+ "learning_rate": 9.345817481100846e-06,
28813
+ "loss": 5.5073,
28814
+ "step": 16460
28815
+ },
28816
+ {
28817
+ "epoch": 0.81,
28818
+ "grad_norm": 2.1183722019195557,
28819
+ "learning_rate": 9.335935569939226e-06,
28820
+ "loss": 5.4659,
28821
+ "step": 16464
28822
+ },
28823
+ {
28824
+ "epoch": 0.81,
28825
+ "grad_norm": 2.135255813598633,
28826
+ "learning_rate": 9.326053658777608e-06,
28827
+ "loss": 5.4262,
28828
+ "step": 16468
28829
+ },
28830
+ {
28831
+ "epoch": 0.81,
28832
+ "grad_norm": 1.7497916221618652,
28833
+ "learning_rate": 9.31617174761599e-06,
28834
+ "loss": 5.537,
28835
+ "step": 16472
28836
+ },
28837
+ {
28838
+ "epoch": 0.81,
28839
+ "grad_norm": 1.9297901391983032,
28840
+ "learning_rate": 9.30628983645437e-06,
28841
+ "loss": 5.4848,
28842
+ "step": 16476
28843
+ },
28844
+ {
28845
+ "epoch": 0.81,
28846
+ "grad_norm": 1.992133617401123,
28847
+ "learning_rate": 9.296407925292753e-06,
28848
+ "loss": 5.5307,
28849
+ "step": 16480
28850
+ },
28851
+ {
28852
+ "epoch": 0.81,
28853
+ "grad_norm": 1.8346421718597412,
28854
+ "learning_rate": 9.286526014131133e-06,
28855
+ "loss": 5.4071,
28856
+ "step": 16484
28857
+ },
28858
+ {
28859
+ "epoch": 0.81,
28860
+ "grad_norm": 1.9083247184753418,
28861
+ "learning_rate": 9.276644102969515e-06,
28862
+ "loss": 5.4248,
28863
+ "step": 16488
28864
+ },
28865
+ {
28866
+ "epoch": 0.81,
28867
+ "grad_norm": 2.0954606533050537,
28868
+ "learning_rate": 9.266762191807895e-06,
28869
+ "loss": 5.5175,
28870
+ "step": 16492
28871
+ },
28872
+ {
28873
+ "epoch": 0.82,
28874
+ "grad_norm": 2.101158857345581,
28875
+ "learning_rate": 9.256880280646278e-06,
28876
+ "loss": 5.5345,
28877
+ "step": 16496
28878
+ },
28879
+ {
28880
+ "epoch": 0.82,
28881
+ "grad_norm": 2.170283794403076,
28882
+ "learning_rate": 9.24699836948466e-06,
28883
+ "loss": 5.5719,
28884
+ "step": 16500
28885
+ },
28886
+ {
28887
+ "epoch": 0.82,
28888
+ "grad_norm": 2.078697681427002,
28889
+ "learning_rate": 9.23711645832304e-06,
28890
+ "loss": 5.4199,
28891
+ "step": 16504
28892
+ },
28893
+ {
28894
+ "epoch": 0.82,
28895
+ "grad_norm": 2.1254682540893555,
28896
+ "learning_rate": 9.227234547161422e-06,
28897
+ "loss": 5.3647,
28898
+ "step": 16508
28899
+ },
28900
+ {
28901
+ "epoch": 0.82,
28902
+ "grad_norm": 2.2468132972717285,
28903
+ "learning_rate": 9.217352635999802e-06,
28904
+ "loss": 5.4673,
28905
+ "step": 16512
28906
+ },
28907
+ {
28908
+ "epoch": 0.82,
28909
+ "grad_norm": 2.053579330444336,
28910
+ "learning_rate": 9.207470724838184e-06,
28911
+ "loss": 5.4083,
28912
+ "step": 16516
28913
+ },
28914
+ {
28915
+ "epoch": 0.82,
28916
+ "grad_norm": 2.2088301181793213,
28917
+ "learning_rate": 9.197588813676565e-06,
28918
+ "loss": 5.5694,
28919
+ "step": 16520
28920
+ },
28921
+ {
28922
+ "epoch": 0.82,
28923
+ "grad_norm": 1.9974719285964966,
28924
+ "learning_rate": 9.187706902514947e-06,
28925
+ "loss": 5.4396,
28926
+ "step": 16524
28927
+ },
28928
+ {
28929
+ "epoch": 0.82,
28930
+ "grad_norm": 2.066420555114746,
28931
+ "learning_rate": 9.177824991353329e-06,
28932
+ "loss": 5.5418,
28933
+ "step": 16528
28934
+ },
28935
+ {
28936
+ "epoch": 0.82,
28937
+ "grad_norm": 2.2263917922973633,
28938
+ "learning_rate": 9.16794308019171e-06,
28939
+ "loss": 5.4904,
28940
+ "step": 16532
28941
+ },
28942
+ {
28943
+ "epoch": 0.82,
28944
+ "grad_norm": 2.0956313610076904,
28945
+ "learning_rate": 9.158061169030091e-06,
28946
+ "loss": 5.47,
28947
+ "step": 16536
28948
+ },
28949
+ {
28950
+ "epoch": 0.82,
28951
+ "grad_norm": 2.120701551437378,
28952
+ "learning_rate": 9.148179257868472e-06,
28953
+ "loss": 5.4498,
28954
+ "step": 16540
28955
+ },
28956
+ {
28957
+ "epoch": 0.82,
28958
+ "grad_norm": 2.068040609359741,
28959
+ "learning_rate": 9.138297346706854e-06,
28960
+ "loss": 5.5373,
28961
+ "step": 16544
28962
+ },
28963
+ {
28964
+ "epoch": 0.82,
28965
+ "grad_norm": 2.125322103500366,
28966
+ "learning_rate": 9.128415435545234e-06,
28967
+ "loss": 5.4782,
28968
+ "step": 16548
28969
+ },
28970
+ {
28971
+ "epoch": 0.82,
28972
+ "grad_norm": 1.8984034061431885,
28973
+ "learning_rate": 9.118533524383616e-06,
28974
+ "loss": 5.5614,
28975
+ "step": 16552
28976
+ },
28977
+ {
28978
+ "epoch": 0.82,
28979
+ "grad_norm": 2.038201332092285,
28980
+ "learning_rate": 9.108651613221998e-06,
28981
+ "loss": 5.5308,
28982
+ "step": 16556
28983
+ },
28984
+ {
28985
+ "epoch": 0.82,
28986
+ "grad_norm": 2.083704948425293,
28987
+ "learning_rate": 9.098769702060379e-06,
28988
+ "loss": 5.3986,
28989
+ "step": 16560
28990
+ },
28991
+ {
28992
+ "epoch": 0.82,
28993
+ "grad_norm": 2.2222559452056885,
28994
+ "learning_rate": 9.08888779089876e-06,
28995
+ "loss": 5.4164,
28996
+ "step": 16564
28997
+ },
28998
+ {
28999
+ "epoch": 0.82,
29000
+ "grad_norm": 2.319937229156494,
29001
+ "learning_rate": 9.079005879737141e-06,
29002
+ "loss": 5.5564,
29003
+ "step": 16568
29004
+ },
29005
+ {
29006
+ "epoch": 0.82,
29007
+ "grad_norm": 1.930305004119873,
29008
+ "learning_rate": 9.069123968575523e-06,
29009
+ "loss": 5.5181,
29010
+ "step": 16572
29011
+ },
29012
+ {
29013
+ "epoch": 0.82,
29014
+ "grad_norm": 2.175090789794922,
29015
+ "learning_rate": 9.059242057413904e-06,
29016
+ "loss": 5.4816,
29017
+ "step": 16576
29018
+ },
29019
+ {
29020
+ "epoch": 0.82,
29021
+ "grad_norm": 1.9041398763656616,
29022
+ "learning_rate": 9.049360146252286e-06,
29023
+ "loss": 5.4462,
29024
+ "step": 16580
29025
+ },
29026
+ {
29027
+ "epoch": 0.82,
29028
+ "grad_norm": 1.92721426486969,
29029
+ "learning_rate": 9.039478235090668e-06,
29030
+ "loss": 5.4993,
29031
+ "step": 16584
29032
+ },
29033
+ {
29034
+ "epoch": 0.82,
29035
+ "grad_norm": 2.098320484161377,
29036
+ "learning_rate": 9.029596323929048e-06,
29037
+ "loss": 5.4334,
29038
+ "step": 16588
29039
+ },
29040
+ {
29041
+ "epoch": 0.82,
29042
+ "grad_norm": 1.988296389579773,
29043
+ "learning_rate": 9.01971441276743e-06,
29044
+ "loss": 5.5241,
29045
+ "step": 16592
29046
+ },
29047
+ {
29048
+ "epoch": 0.82,
29049
+ "grad_norm": 1.9492675065994263,
29050
+ "learning_rate": 9.00983250160581e-06,
29051
+ "loss": 5.4024,
29052
+ "step": 16596
29053
+ },
29054
+ {
29055
+ "epoch": 0.82,
29056
+ "grad_norm": 2.0381886959075928,
29057
+ "learning_rate": 8.999950590444192e-06,
29058
+ "loss": 5.4861,
29059
+ "step": 16600
29060
+ },
29061
+ {
29062
+ "epoch": 0.82,
29063
+ "grad_norm": 2.2015647888183594,
29064
+ "learning_rate": 8.990068679282573e-06,
29065
+ "loss": 5.4589,
29066
+ "step": 16604
29067
+ },
29068
+ {
29069
+ "epoch": 0.82,
29070
+ "grad_norm": 1.970509648323059,
29071
+ "learning_rate": 8.980186768120955e-06,
29072
+ "loss": 5.5072,
29073
+ "step": 16608
29074
+ },
29075
+ {
29076
+ "epoch": 0.82,
29077
+ "grad_norm": 2.048265218734741,
29078
+ "learning_rate": 8.970304856959337e-06,
29079
+ "loss": 5.4781,
29080
+ "step": 16612
29081
+ },
29082
+ {
29083
+ "epoch": 0.82,
29084
+ "grad_norm": 2.1781177520751953,
29085
+ "learning_rate": 8.960422945797719e-06,
29086
+ "loss": 5.4382,
29087
+ "step": 16616
29088
+ },
29089
+ {
29090
+ "epoch": 0.82,
29091
+ "grad_norm": 2.3206918239593506,
29092
+ "learning_rate": 8.9505410346361e-06,
29093
+ "loss": 5.5371,
29094
+ "step": 16620
29095
+ },
29096
+ {
29097
+ "epoch": 0.82,
29098
+ "grad_norm": 2.129166603088379,
29099
+ "learning_rate": 8.94065912347448e-06,
29100
+ "loss": 5.4773,
29101
+ "step": 16624
29102
+ },
29103
+ {
29104
+ "epoch": 0.82,
29105
+ "grad_norm": 1.8786160945892334,
29106
+ "learning_rate": 8.930777212312862e-06,
29107
+ "loss": 5.3599,
29108
+ "step": 16628
29109
+ },
29110
+ {
29111
+ "epoch": 0.82,
29112
+ "grad_norm": 2.114015579223633,
29113
+ "learning_rate": 8.920895301151242e-06,
29114
+ "loss": 5.5262,
29115
+ "step": 16632
29116
+ },
29117
+ {
29118
+ "epoch": 0.82,
29119
+ "grad_norm": 1.9730268716812134,
29120
+ "learning_rate": 8.911013389989624e-06,
29121
+ "loss": 5.3921,
29122
+ "step": 16636
29123
+ },
29124
+ {
29125
+ "epoch": 0.82,
29126
+ "grad_norm": 2.007050037384033,
29127
+ "learning_rate": 8.901131478828005e-06,
29128
+ "loss": 5.4521,
29129
+ "step": 16640
29130
+ },
29131
+ {
29132
+ "epoch": 0.82,
29133
+ "grad_norm": 1.9782698154449463,
29134
+ "learning_rate": 8.891249567666388e-06,
29135
+ "loss": 5.4024,
29136
+ "step": 16644
29137
+ },
29138
+ {
29139
+ "epoch": 0.82,
29140
+ "grad_norm": 2.118109941482544,
29141
+ "learning_rate": 8.881367656504769e-06,
29142
+ "loss": 5.4549,
29143
+ "step": 16648
29144
+ },
29145
+ {
29146
+ "epoch": 0.82,
29147
+ "grad_norm": 1.9269788265228271,
29148
+ "learning_rate": 8.871485745343149e-06,
29149
+ "loss": 5.3879,
29150
+ "step": 16652
29151
+ },
29152
+ {
29153
+ "epoch": 0.82,
29154
+ "grad_norm": 2.0356998443603516,
29155
+ "learning_rate": 8.861603834181531e-06,
29156
+ "loss": 5.4729,
29157
+ "step": 16656
29158
+ },
29159
+ {
29160
+ "epoch": 0.82,
29161
+ "grad_norm": 1.9945244789123535,
29162
+ "learning_rate": 8.851721923019912e-06,
29163
+ "loss": 5.489,
29164
+ "step": 16660
29165
+ },
29166
+ {
29167
+ "epoch": 0.82,
29168
+ "grad_norm": 2.1839029788970947,
29169
+ "learning_rate": 8.841840011858294e-06,
29170
+ "loss": 5.3626,
29171
+ "step": 16664
29172
+ },
29173
+ {
29174
+ "epoch": 0.82,
29175
+ "grad_norm": 1.9056282043457031,
29176
+ "learning_rate": 8.831958100696674e-06,
29177
+ "loss": 5.5155,
29178
+ "step": 16668
29179
+ },
29180
+ {
29181
+ "epoch": 0.82,
29182
+ "grad_norm": 1.971134066581726,
29183
+ "learning_rate": 8.822076189535058e-06,
29184
+ "loss": 5.4582,
29185
+ "step": 16672
29186
+ },
29187
+ {
29188
+ "epoch": 0.82,
29189
+ "grad_norm": 2.2608683109283447,
29190
+ "learning_rate": 8.812194278373438e-06,
29191
+ "loss": 5.5671,
29192
+ "step": 16676
29193
+ },
29194
+ {
29195
+ "epoch": 0.82,
29196
+ "grad_norm": 1.8873885869979858,
29197
+ "learning_rate": 8.80231236721182e-06,
29198
+ "loss": 5.5378,
29199
+ "step": 16680
29200
+ },
29201
+ {
29202
+ "epoch": 0.82,
29203
+ "grad_norm": 2.1268815994262695,
29204
+ "learning_rate": 8.7924304560502e-06,
29205
+ "loss": 5.5464,
29206
+ "step": 16684
29207
+ },
29208
+ {
29209
+ "epoch": 0.82,
29210
+ "grad_norm": 2.120333671569824,
29211
+ "learning_rate": 8.782548544888581e-06,
29212
+ "loss": 5.4566,
29213
+ "step": 16688
29214
+ },
29215
+ {
29216
+ "epoch": 0.82,
29217
+ "grad_norm": 2.202099323272705,
29218
+ "learning_rate": 8.772666633726963e-06,
29219
+ "loss": 5.3333,
29220
+ "step": 16692
29221
+ },
29222
+ {
29223
+ "epoch": 0.82,
29224
+ "grad_norm": 1.8824645280838013,
29225
+ "learning_rate": 8.762784722565343e-06,
29226
+ "loss": 5.5016,
29227
+ "step": 16696
29228
+ },
29229
+ {
29230
+ "epoch": 0.83,
29231
+ "grad_norm": 2.0147457122802734,
29232
+ "learning_rate": 8.752902811403727e-06,
29233
+ "loss": 5.4586,
29234
+ "step": 16700
29235
+ },
29236
+ {
29237
+ "epoch": 0.83,
29238
+ "grad_norm": 2.041895627975464,
29239
+ "learning_rate": 8.743020900242107e-06,
29240
+ "loss": 5.5022,
29241
+ "step": 16704
29242
+ },
29243
+ {
29244
+ "epoch": 0.83,
29245
+ "grad_norm": 2.077690362930298,
29246
+ "learning_rate": 8.73313898908049e-06,
29247
+ "loss": 5.5781,
29248
+ "step": 16708
29249
+ },
29250
+ {
29251
+ "epoch": 0.83,
29252
+ "grad_norm": 1.9477964639663696,
29253
+ "learning_rate": 8.72325707791887e-06,
29254
+ "loss": 5.5584,
29255
+ "step": 16712
29256
+ },
29257
+ {
29258
+ "epoch": 0.83,
29259
+ "grad_norm": 1.8398356437683105,
29260
+ "learning_rate": 8.713375166757252e-06,
29261
+ "loss": 5.5032,
29262
+ "step": 16716
29263
+ },
29264
+ {
29265
+ "epoch": 0.83,
29266
+ "grad_norm": 1.9973992109298706,
29267
+ "learning_rate": 8.703493255595632e-06,
29268
+ "loss": 5.37,
29269
+ "step": 16720
29270
+ },
29271
+ {
29272
+ "epoch": 0.83,
29273
+ "grad_norm": 2.0176520347595215,
29274
+ "learning_rate": 8.693611344434013e-06,
29275
+ "loss": 5.5138,
29276
+ "step": 16724
29277
+ },
29278
+ {
29279
+ "epoch": 0.83,
29280
+ "grad_norm": 2.1837217807769775,
29281
+ "learning_rate": 8.683729433272396e-06,
29282
+ "loss": 5.5308,
29283
+ "step": 16728
29284
+ },
29285
+ {
29286
+ "epoch": 0.83,
29287
+ "grad_norm": 2.0156595706939697,
29288
+ "learning_rate": 8.673847522110777e-06,
29289
+ "loss": 5.5722,
29290
+ "step": 16732
29291
+ },
29292
+ {
29293
+ "epoch": 0.83,
29294
+ "grad_norm": 2.3676466941833496,
29295
+ "learning_rate": 8.663965610949159e-06,
29296
+ "loss": 5.5775,
29297
+ "step": 16736
29298
+ },
29299
+ {
29300
+ "epoch": 0.83,
29301
+ "grad_norm": 2.270716667175293,
29302
+ "learning_rate": 8.65408369978754e-06,
29303
+ "loss": 5.4107,
29304
+ "step": 16740
29305
+ },
29306
+ {
29307
+ "epoch": 0.83,
29308
+ "grad_norm": 1.9551721811294556,
29309
+ "learning_rate": 8.644201788625921e-06,
29310
+ "loss": 5.6022,
29311
+ "step": 16744
29312
+ },
29313
+ {
29314
+ "epoch": 0.83,
29315
+ "grad_norm": 1.9827896356582642,
29316
+ "learning_rate": 8.634319877464302e-06,
29317
+ "loss": 5.4923,
29318
+ "step": 16748
29319
+ },
29320
+ {
29321
+ "epoch": 0.83,
29322
+ "grad_norm": 2.1360533237457275,
29323
+ "learning_rate": 8.624437966302682e-06,
29324
+ "loss": 5.4328,
29325
+ "step": 16752
29326
+ },
29327
+ {
29328
+ "epoch": 0.83,
29329
+ "grad_norm": 2.094109296798706,
29330
+ "learning_rate": 8.614556055141066e-06,
29331
+ "loss": 5.416,
29332
+ "step": 16756
29333
+ },
29334
+ {
29335
+ "epoch": 0.83,
29336
+ "grad_norm": 1.9513869285583496,
29337
+ "learning_rate": 8.604674143979446e-06,
29338
+ "loss": 5.403,
29339
+ "step": 16760
29340
+ },
29341
+ {
29342
+ "epoch": 0.83,
29343
+ "grad_norm": 2.0020523071289062,
29344
+ "learning_rate": 8.594792232817828e-06,
29345
+ "loss": 5.4628,
29346
+ "step": 16764
29347
+ },
29348
+ {
29349
+ "epoch": 0.83,
29350
+ "grad_norm": 2.405801296234131,
29351
+ "learning_rate": 8.584910321656209e-06,
29352
+ "loss": 5.5799,
29353
+ "step": 16768
29354
+ },
29355
+ {
29356
+ "epoch": 0.83,
29357
+ "grad_norm": 2.114650249481201,
29358
+ "learning_rate": 8.57502841049459e-06,
29359
+ "loss": 5.4151,
29360
+ "step": 16772
29361
+ },
29362
+ {
29363
+ "epoch": 0.83,
29364
+ "grad_norm": 1.9269883632659912,
29365
+ "learning_rate": 8.565146499332971e-06,
29366
+ "loss": 5.4321,
29367
+ "step": 16776
29368
+ },
29369
+ {
29370
+ "epoch": 0.83,
29371
+ "grad_norm": 1.894822597503662,
29372
+ "learning_rate": 8.555264588171353e-06,
29373
+ "loss": 5.4419,
29374
+ "step": 16780
29375
+ },
29376
+ {
29377
+ "epoch": 0.83,
29378
+ "grad_norm": 2.0186209678649902,
29379
+ "learning_rate": 8.545382677009735e-06,
29380
+ "loss": 5.5657,
29381
+ "step": 16784
29382
+ },
29383
+ {
29384
+ "epoch": 0.83,
29385
+ "grad_norm": 1.920372486114502,
29386
+ "learning_rate": 8.535500765848115e-06,
29387
+ "loss": 5.5334,
29388
+ "step": 16788
29389
+ },
29390
+ {
29391
+ "epoch": 0.83,
29392
+ "grad_norm": 2.1055715084075928,
29393
+ "learning_rate": 8.525618854686497e-06,
29394
+ "loss": 5.4952,
29395
+ "step": 16792
29396
+ },
29397
+ {
29398
+ "epoch": 0.83,
29399
+ "grad_norm": 2.0137712955474854,
29400
+ "learning_rate": 8.515736943524878e-06,
29401
+ "loss": 5.5128,
29402
+ "step": 16796
29403
+ },
29404
+ {
29405
+ "epoch": 0.83,
29406
+ "grad_norm": 2.0473227500915527,
29407
+ "learning_rate": 8.50585503236326e-06,
29408
+ "loss": 5.6793,
29409
+ "step": 16800
29410
+ },
29411
+ {
29412
+ "epoch": 0.83,
29413
+ "grad_norm": 2.0757429599761963,
29414
+ "learning_rate": 8.49597312120164e-06,
29415
+ "loss": 5.4888,
29416
+ "step": 16804
29417
+ },
29418
+ {
29419
+ "epoch": 0.83,
29420
+ "grad_norm": 1.9529187679290771,
29421
+ "learning_rate": 8.486091210040022e-06,
29422
+ "loss": 5.2731,
29423
+ "step": 16808
29424
+ },
29425
+ {
29426
+ "epoch": 0.83,
29427
+ "grad_norm": 1.897220492362976,
29428
+ "learning_rate": 8.476209298878403e-06,
29429
+ "loss": 5.4103,
29430
+ "step": 16812
29431
+ },
29432
+ {
29433
+ "epoch": 0.83,
29434
+ "grad_norm": 2.0591204166412354,
29435
+ "learning_rate": 8.466327387716785e-06,
29436
+ "loss": 5.5609,
29437
+ "step": 16816
29438
+ },
29439
+ {
29440
+ "epoch": 0.83,
29441
+ "grad_norm": 1.8429813385009766,
29442
+ "learning_rate": 8.456445476555167e-06,
29443
+ "loss": 5.3453,
29444
+ "step": 16820
29445
+ },
29446
+ {
29447
+ "epoch": 0.83,
29448
+ "grad_norm": 1.854067087173462,
29449
+ "learning_rate": 8.446563565393547e-06,
29450
+ "loss": 5.4855,
29451
+ "step": 16824
29452
+ },
29453
+ {
29454
+ "epoch": 0.83,
29455
+ "grad_norm": 1.87723708152771,
29456
+ "learning_rate": 8.43668165423193e-06,
29457
+ "loss": 5.5087,
29458
+ "step": 16828
29459
+ },
29460
+ {
29461
+ "epoch": 0.83,
29462
+ "grad_norm": 2.25486159324646,
29463
+ "learning_rate": 8.42679974307031e-06,
29464
+ "loss": 5.6419,
29465
+ "step": 16832
29466
+ },
29467
+ {
29468
+ "epoch": 0.83,
29469
+ "grad_norm": 2.1054129600524902,
29470
+ "learning_rate": 8.416917831908692e-06,
29471
+ "loss": 5.4713,
29472
+ "step": 16836
29473
+ },
29474
+ {
29475
+ "epoch": 0.83,
29476
+ "grad_norm": 1.9546363353729248,
29477
+ "learning_rate": 8.407035920747072e-06,
29478
+ "loss": 5.51,
29479
+ "step": 16840
29480
+ },
29481
+ {
29482
+ "epoch": 0.83,
29483
+ "grad_norm": 1.8574483394622803,
29484
+ "learning_rate": 8.397154009585454e-06,
29485
+ "loss": 5.4273,
29486
+ "step": 16844
29487
+ },
29488
+ {
29489
+ "epoch": 0.83,
29490
+ "grad_norm": 2.135690927505493,
29491
+ "learning_rate": 8.387272098423836e-06,
29492
+ "loss": 5.4408,
29493
+ "step": 16848
29494
+ },
29495
+ {
29496
+ "epoch": 0.83,
29497
+ "grad_norm": 1.9898631572723389,
29498
+ "learning_rate": 8.377390187262217e-06,
29499
+ "loss": 5.2905,
29500
+ "step": 16852
29501
+ },
29502
+ {
29503
+ "epoch": 0.83,
29504
+ "grad_norm": 2.016470193862915,
29505
+ "learning_rate": 8.367508276100599e-06,
29506
+ "loss": 5.3266,
29507
+ "step": 16856
29508
+ },
29509
+ {
29510
+ "epoch": 0.83,
29511
+ "grad_norm": 1.9973735809326172,
29512
+ "learning_rate": 8.357626364938979e-06,
29513
+ "loss": 5.4987,
29514
+ "step": 16860
29515
+ },
29516
+ {
29517
+ "epoch": 0.83,
29518
+ "grad_norm": 2.116567373275757,
29519
+ "learning_rate": 8.347744453777361e-06,
29520
+ "loss": 5.4306,
29521
+ "step": 16864
29522
+ },
29523
+ {
29524
+ "epoch": 0.83,
29525
+ "grad_norm": 2.044475793838501,
29526
+ "learning_rate": 8.337862542615741e-06,
29527
+ "loss": 5.4907,
29528
+ "step": 16868
29529
+ },
29530
+ {
29531
+ "epoch": 0.83,
29532
+ "grad_norm": 2.090527296066284,
29533
+ "learning_rate": 8.327980631454123e-06,
29534
+ "loss": 5.3698,
29535
+ "step": 16872
29536
+ },
29537
+ {
29538
+ "epoch": 0.83,
29539
+ "grad_norm": 1.98384428024292,
29540
+ "learning_rate": 8.318098720292506e-06,
29541
+ "loss": 5.469,
29542
+ "step": 16876
29543
+ },
29544
+ {
29545
+ "epoch": 0.83,
29546
+ "grad_norm": 1.775121808052063,
29547
+ "learning_rate": 8.308216809130886e-06,
29548
+ "loss": 5.461,
29549
+ "step": 16880
29550
+ },
29551
+ {
29552
+ "epoch": 0.83,
29553
+ "grad_norm": 1.9661427736282349,
29554
+ "learning_rate": 8.298334897969268e-06,
29555
+ "loss": 5.494,
29556
+ "step": 16884
29557
+ },
29558
+ {
29559
+ "epoch": 0.83,
29560
+ "grad_norm": 2.0031895637512207,
29561
+ "learning_rate": 8.288452986807648e-06,
29562
+ "loss": 5.5011,
29563
+ "step": 16888
29564
+ },
29565
+ {
29566
+ "epoch": 0.83,
29567
+ "grad_norm": 2.221911907196045,
29568
+ "learning_rate": 8.27857107564603e-06,
29569
+ "loss": 5.4296,
29570
+ "step": 16892
29571
+ },
29572
+ {
29573
+ "epoch": 0.83,
29574
+ "grad_norm": 2.0504343509674072,
29575
+ "learning_rate": 8.26868916448441e-06,
29576
+ "loss": 5.5495,
29577
+ "step": 16896
29578
+ },
29579
+ {
29580
+ "epoch": 0.84,
29581
+ "grad_norm": 2.1068339347839355,
29582
+ "learning_rate": 8.258807253322794e-06,
29583
+ "loss": 5.408,
29584
+ "step": 16900
29585
+ },
29586
+ {
29587
+ "epoch": 0.84,
29588
+ "grad_norm": 2.0044867992401123,
29589
+ "learning_rate": 8.248925342161175e-06,
29590
+ "loss": 5.512,
29591
+ "step": 16904
29592
+ },
29593
+ {
29594
+ "epoch": 0.84,
29595
+ "grad_norm": 2.3192813396453857,
29596
+ "learning_rate": 8.239043430999555e-06,
29597
+ "loss": 5.4185,
29598
+ "step": 16908
29599
+ },
29600
+ {
29601
+ "epoch": 0.84,
29602
+ "grad_norm": 1.8410991430282593,
29603
+ "learning_rate": 8.229161519837937e-06,
29604
+ "loss": 5.4222,
29605
+ "step": 16912
29606
+ },
29607
+ {
29608
+ "epoch": 0.84,
29609
+ "grad_norm": 2.0134191513061523,
29610
+ "learning_rate": 8.219279608676318e-06,
29611
+ "loss": 5.4357,
29612
+ "step": 16916
29613
+ },
29614
+ {
29615
+ "epoch": 0.84,
29616
+ "grad_norm": 2.0390844345092773,
29617
+ "learning_rate": 8.2093976975147e-06,
29618
+ "loss": 5.5363,
29619
+ "step": 16920
29620
+ },
29621
+ {
29622
+ "epoch": 0.84,
29623
+ "grad_norm": 2.12786602973938,
29624
+ "learning_rate": 8.19951578635308e-06,
29625
+ "loss": 5.31,
29626
+ "step": 16924
29627
+ },
29628
+ {
29629
+ "epoch": 0.84,
29630
+ "grad_norm": 1.9766027927398682,
29631
+ "learning_rate": 8.189633875191464e-06,
29632
+ "loss": 5.534,
29633
+ "step": 16928
29634
+ },
29635
+ {
29636
+ "epoch": 0.84,
29637
+ "grad_norm": 1.7689497470855713,
29638
+ "learning_rate": 8.179751964029844e-06,
29639
+ "loss": 5.3465,
29640
+ "step": 16932
29641
+ },
29642
+ {
29643
+ "epoch": 0.84,
29644
+ "grad_norm": 2.117271900177002,
29645
+ "learning_rate": 8.169870052868225e-06,
29646
+ "loss": 5.4583,
29647
+ "step": 16936
29648
+ },
29649
+ {
29650
+ "epoch": 0.84,
29651
+ "grad_norm": 2.0808498859405518,
29652
+ "learning_rate": 8.159988141706607e-06,
29653
+ "loss": 5.477,
29654
+ "step": 16940
29655
+ },
29656
+ {
29657
+ "epoch": 0.84,
29658
+ "grad_norm": 2.0178062915802,
29659
+ "learning_rate": 8.150106230544987e-06,
29660
+ "loss": 5.5178,
29661
+ "step": 16944
29662
+ },
29663
+ {
29664
+ "epoch": 0.84,
29665
+ "grad_norm": 1.7878342866897583,
29666
+ "learning_rate": 8.140224319383369e-06,
29667
+ "loss": 5.533,
29668
+ "step": 16948
29669
+ },
29670
+ {
29671
+ "epoch": 0.84,
29672
+ "grad_norm": 2.0112874507904053,
29673
+ "learning_rate": 8.13034240822175e-06,
29674
+ "loss": 5.342,
29675
+ "step": 16952
29676
+ },
29677
+ {
29678
+ "epoch": 0.84,
29679
+ "grad_norm": 2.224484443664551,
29680
+ "learning_rate": 8.120460497060133e-06,
29681
+ "loss": 5.4574,
29682
+ "step": 16956
29683
+ },
29684
+ {
29685
+ "epoch": 0.84,
29686
+ "grad_norm": 2.29886531829834,
29687
+ "learning_rate": 8.110578585898514e-06,
29688
+ "loss": 5.5541,
29689
+ "step": 16960
29690
+ },
29691
+ {
29692
+ "epoch": 0.84,
29693
+ "grad_norm": 1.8924994468688965,
29694
+ "learning_rate": 8.100696674736896e-06,
29695
+ "loss": 5.4196,
29696
+ "step": 16964
29697
+ },
29698
+ {
29699
+ "epoch": 0.84,
29700
+ "grad_norm": 2.2159488201141357,
29701
+ "learning_rate": 8.090814763575276e-06,
29702
+ "loss": 5.4989,
29703
+ "step": 16968
29704
+ },
29705
+ {
29706
+ "epoch": 0.84,
29707
+ "grad_norm": 2.170715570449829,
29708
+ "learning_rate": 8.080932852413656e-06,
29709
+ "loss": 5.5074,
29710
+ "step": 16972
29711
+ },
29712
+ {
29713
+ "epoch": 0.84,
29714
+ "grad_norm": 2.0515708923339844,
29715
+ "learning_rate": 8.071050941252038e-06,
29716
+ "loss": 5.4033,
29717
+ "step": 16976
29718
+ },
29719
+ {
29720
+ "epoch": 0.84,
29721
+ "grad_norm": 2.0467865467071533,
29722
+ "learning_rate": 8.061169030090419e-06,
29723
+ "loss": 5.4566,
29724
+ "step": 16980
29725
+ },
29726
+ {
29727
+ "epoch": 0.84,
29728
+ "grad_norm": 1.9163670539855957,
29729
+ "learning_rate": 8.0512871189288e-06,
29730
+ "loss": 5.5335,
29731
+ "step": 16984
29732
+ },
29733
+ {
29734
+ "epoch": 0.84,
29735
+ "grad_norm": 2.1390318870544434,
29736
+ "learning_rate": 8.041405207767183e-06,
29737
+ "loss": 5.5707,
29738
+ "step": 16988
29739
+ },
29740
+ {
29741
+ "epoch": 0.84,
29742
+ "grad_norm": 1.9965319633483887,
29743
+ "learning_rate": 8.031523296605565e-06,
29744
+ "loss": 5.4165,
29745
+ "step": 16992
29746
+ },
29747
+ {
29748
+ "epoch": 0.84,
29749
+ "grad_norm": 2.137233257293701,
29750
+ "learning_rate": 8.021641385443945e-06,
29751
+ "loss": 5.3596,
29752
+ "step": 16996
29753
+ },
29754
+ {
29755
+ "epoch": 0.84,
29756
+ "grad_norm": 2.152256727218628,
29757
+ "learning_rate": 8.011759474282327e-06,
29758
+ "loss": 5.3396,
29759
+ "step": 17000
29760
+ },
29761
+ {
29762
+ "epoch": 0.84,
29763
+ "grad_norm": 2.28680682182312,
29764
+ "learning_rate": 8.001877563120708e-06,
29765
+ "loss": 5.5507,
29766
+ "step": 17004
29767
+ },
29768
+ {
29769
+ "epoch": 0.84,
29770
+ "grad_norm": 2.26821231842041,
29771
+ "learning_rate": 7.991995651959088e-06,
29772
+ "loss": 5.4969,
29773
+ "step": 17008
29774
+ },
29775
+ {
29776
+ "epoch": 0.84,
29777
+ "grad_norm": 2.275667428970337,
29778
+ "learning_rate": 7.98211374079747e-06,
29779
+ "loss": 5.441,
29780
+ "step": 17012
29781
+ },
29782
+ {
29783
+ "epoch": 0.84,
29784
+ "grad_norm": 2.080756902694702,
29785
+ "learning_rate": 7.972231829635852e-06,
29786
+ "loss": 5.4398,
29787
+ "step": 17016
29788
+ },
29789
+ {
29790
+ "epoch": 0.84,
29791
+ "grad_norm": 2.10422420501709,
29792
+ "learning_rate": 7.962349918474234e-06,
29793
+ "loss": 5.437,
29794
+ "step": 17020
29795
+ },
29796
+ {
29797
+ "epoch": 0.84,
29798
+ "grad_norm": 1.858323335647583,
29799
+ "learning_rate": 7.952468007312615e-06,
29800
+ "loss": 5.5432,
29801
+ "step": 17024
29802
+ },
29803
+ {
29804
+ "epoch": 0.84,
29805
+ "grad_norm": 2.4101650714874268,
29806
+ "learning_rate": 7.942586096150997e-06,
29807
+ "loss": 5.412,
29808
+ "step": 17028
29809
+ },
29810
+ {
29811
+ "epoch": 0.84,
29812
+ "grad_norm": 2.2219436168670654,
29813
+ "learning_rate": 7.932704184989377e-06,
29814
+ "loss": 5.5279,
29815
+ "step": 17032
29816
+ },
29817
+ {
29818
+ "epoch": 0.84,
29819
+ "grad_norm": 2.182474374771118,
29820
+ "learning_rate": 7.922822273827757e-06,
29821
+ "loss": 5.4662,
29822
+ "step": 17036
29823
+ },
29824
+ {
29825
+ "epoch": 0.84,
29826
+ "grad_norm": 2.060351610183716,
29827
+ "learning_rate": 7.91294036266614e-06,
29828
+ "loss": 5.504,
29829
+ "step": 17040
29830
+ },
29831
+ {
29832
+ "epoch": 0.84,
29833
+ "grad_norm": 2.1096701622009277,
29834
+ "learning_rate": 7.903058451504522e-06,
29835
+ "loss": 5.4539,
29836
+ "step": 17044
29837
+ },
29838
+ {
29839
+ "epoch": 0.84,
29840
+ "grad_norm": 2.0492708683013916,
29841
+ "learning_rate": 7.893176540342904e-06,
29842
+ "loss": 5.4398,
29843
+ "step": 17048
29844
+ },
29845
+ {
29846
+ "epoch": 0.84,
29847
+ "grad_norm": 2.032947301864624,
29848
+ "learning_rate": 7.883294629181284e-06,
29849
+ "loss": 5.5253,
29850
+ "step": 17052
29851
+ },
29852
+ {
29853
+ "epoch": 0.84,
29854
+ "grad_norm": 2.0764636993408203,
29855
+ "learning_rate": 7.873412718019666e-06,
29856
+ "loss": 5.4655,
29857
+ "step": 17056
29858
+ },
29859
+ {
29860
+ "epoch": 0.84,
29861
+ "grad_norm": 2.105656862258911,
29862
+ "learning_rate": 7.863530806858046e-06,
29863
+ "loss": 5.4761,
29864
+ "step": 17060
29865
+ },
29866
+ {
29867
+ "epoch": 0.84,
29868
+ "grad_norm": 1.975953459739685,
29869
+ "learning_rate": 7.853648895696428e-06,
29870
+ "loss": 5.5364,
29871
+ "step": 17064
29872
+ },
29873
+ {
29874
+ "epoch": 0.84,
29875
+ "grad_norm": 2.0592944622039795,
29876
+ "learning_rate": 7.843766984534809e-06,
29877
+ "loss": 5.4987,
29878
+ "step": 17068
29879
+ },
29880
+ {
29881
+ "epoch": 0.84,
29882
+ "grad_norm": 2.1122117042541504,
29883
+ "learning_rate": 7.833885073373191e-06,
29884
+ "loss": 5.4162,
29885
+ "step": 17072
29886
+ },
29887
+ {
29888
+ "epoch": 0.84,
29889
+ "grad_norm": 2.143172264099121,
29890
+ "learning_rate": 7.824003162211573e-06,
29891
+ "loss": 5.4959,
29892
+ "step": 17076
29893
+ },
29894
+ {
29895
+ "epoch": 0.84,
29896
+ "grad_norm": 1.9919787645339966,
29897
+ "learning_rate": 7.814121251049953e-06,
29898
+ "loss": 5.469,
29899
+ "step": 17080
29900
+ },
29901
+ {
29902
+ "epoch": 0.84,
29903
+ "grad_norm": 1.9146004915237427,
29904
+ "learning_rate": 7.804239339888335e-06,
29905
+ "loss": 5.4748,
29906
+ "step": 17084
29907
+ },
29908
+ {
29909
+ "epoch": 0.84,
29910
+ "grad_norm": 2.3150486946105957,
29911
+ "learning_rate": 7.794357428726716e-06,
29912
+ "loss": 5.4056,
29913
+ "step": 17088
29914
+ },
29915
+ {
29916
+ "epoch": 0.84,
29917
+ "grad_norm": 2.1717705726623535,
29918
+ "learning_rate": 7.784475517565098e-06,
29919
+ "loss": 5.5389,
29920
+ "step": 17092
29921
+ },
29922
+ {
29923
+ "epoch": 0.84,
29924
+ "grad_norm": 2.1674489974975586,
29925
+ "learning_rate": 7.774593606403478e-06,
29926
+ "loss": 5.3378,
29927
+ "step": 17096
29928
+ },
29929
+ {
29930
+ "epoch": 0.84,
29931
+ "grad_norm": 2.17425537109375,
29932
+ "learning_rate": 7.76471169524186e-06,
29933
+ "loss": 5.5094,
29934
+ "step": 17100
29935
+ },
29936
+ {
29937
+ "epoch": 0.85,
29938
+ "grad_norm": 2.2170867919921875,
29939
+ "learning_rate": 7.754829784080242e-06,
29940
+ "loss": 5.4591,
29941
+ "step": 17104
29942
+ },
29943
+ {
29944
+ "epoch": 0.85,
29945
+ "grad_norm": 2.0710206031799316,
29946
+ "learning_rate": 7.744947872918623e-06,
29947
+ "loss": 5.461,
29948
+ "step": 17108
29949
+ },
29950
+ {
29951
+ "epoch": 0.85,
29952
+ "grad_norm": 1.9662617444992065,
29953
+ "learning_rate": 7.735065961757005e-06,
29954
+ "loss": 5.5232,
29955
+ "step": 17112
29956
+ },
29957
+ {
29958
+ "epoch": 0.85,
29959
+ "grad_norm": 2.1950018405914307,
29960
+ "learning_rate": 7.725184050595385e-06,
29961
+ "loss": 5.4845,
29962
+ "step": 17116
29963
+ },
29964
+ {
29965
+ "epoch": 0.85,
29966
+ "grad_norm": 2.166281223297119,
29967
+ "learning_rate": 7.715302139433767e-06,
29968
+ "loss": 5.4899,
29969
+ "step": 17120
29970
+ },
29971
+ {
29972
+ "epoch": 0.85,
29973
+ "grad_norm": 2.0825867652893066,
29974
+ "learning_rate": 7.705420228272148e-06,
29975
+ "loss": 5.5889,
29976
+ "step": 17124
29977
+ },
29978
+ {
29979
+ "epoch": 0.85,
29980
+ "grad_norm": 2.0458121299743652,
29981
+ "learning_rate": 7.69553831711053e-06,
29982
+ "loss": 5.4465,
29983
+ "step": 17128
29984
+ },
29985
+ {
29986
+ "epoch": 0.85,
29987
+ "grad_norm": 1.972931146621704,
29988
+ "learning_rate": 7.685656405948912e-06,
29989
+ "loss": 5.447,
29990
+ "step": 17132
29991
+ },
29992
+ {
29993
+ "epoch": 0.85,
29994
+ "grad_norm": 2.2071616649627686,
29995
+ "learning_rate": 7.675774494787292e-06,
29996
+ "loss": 5.5405,
29997
+ "step": 17136
29998
+ },
29999
+ {
30000
+ "epoch": 0.85,
30001
+ "grad_norm": 2.24798583984375,
30002
+ "learning_rate": 7.665892583625674e-06,
30003
+ "loss": 5.5034,
30004
+ "step": 17140
30005
+ },
30006
+ {
30007
+ "epoch": 0.85,
30008
+ "grad_norm": 2.3352463245391846,
30009
+ "learning_rate": 7.656010672464054e-06,
30010
+ "loss": 5.5322,
30011
+ "step": 17144
30012
+ },
30013
+ {
30014
+ "epoch": 0.85,
30015
+ "grad_norm": 2.1701347827911377,
30016
+ "learning_rate": 7.646128761302436e-06,
30017
+ "loss": 5.6042,
30018
+ "step": 17148
30019
+ },
30020
+ {
30021
+ "epoch": 0.85,
30022
+ "grad_norm": 2.0654942989349365,
30023
+ "learning_rate": 7.636246850140817e-06,
30024
+ "loss": 5.5102,
30025
+ "step": 17152
30026
+ },
30027
+ {
30028
+ "epoch": 0.85,
30029
+ "grad_norm": 2.410454273223877,
30030
+ "learning_rate": 7.626364938979198e-06,
30031
+ "loss": 5.4408,
30032
+ "step": 17156
30033
+ },
30034
+ {
30035
+ "epoch": 0.85,
30036
+ "grad_norm": 2.0221352577209473,
30037
+ "learning_rate": 7.616483027817581e-06,
30038
+ "loss": 5.4086,
30039
+ "step": 17160
30040
+ },
30041
+ {
30042
+ "epoch": 0.85,
30043
+ "grad_norm": 2.211092233657837,
30044
+ "learning_rate": 7.606601116655962e-06,
30045
+ "loss": 5.4589,
30046
+ "step": 17164
30047
+ },
30048
+ {
30049
+ "epoch": 0.85,
30050
+ "grad_norm": 1.9467920064926147,
30051
+ "learning_rate": 7.596719205494343e-06,
30052
+ "loss": 5.4867,
30053
+ "step": 17168
30054
+ },
30055
+ {
30056
+ "epoch": 0.85,
30057
+ "grad_norm": 2.1144025325775146,
30058
+ "learning_rate": 7.586837294332725e-06,
30059
+ "loss": 5.5143,
30060
+ "step": 17172
30061
+ },
30062
+ {
30063
+ "epoch": 0.85,
30064
+ "grad_norm": 2.1652915477752686,
30065
+ "learning_rate": 7.576955383171106e-06,
30066
+ "loss": 5.533,
30067
+ "step": 17176
30068
+ },
30069
+ {
30070
+ "epoch": 0.85,
30071
+ "grad_norm": 1.9289984703063965,
30072
+ "learning_rate": 7.567073472009486e-06,
30073
+ "loss": 5.5406,
30074
+ "step": 17180
30075
+ },
30076
+ {
30077
+ "epoch": 0.85,
30078
+ "grad_norm": 2.028322458267212,
30079
+ "learning_rate": 7.557191560847867e-06,
30080
+ "loss": 5.4118,
30081
+ "step": 17184
30082
+ },
30083
+ {
30084
+ "epoch": 0.85,
30085
+ "grad_norm": 2.2385053634643555,
30086
+ "learning_rate": 7.54730964968625e-06,
30087
+ "loss": 5.5023,
30088
+ "step": 17188
30089
+ },
30090
+ {
30091
+ "epoch": 0.85,
30092
+ "grad_norm": 1.8756178617477417,
30093
+ "learning_rate": 7.5374277385246315e-06,
30094
+ "loss": 5.4028,
30095
+ "step": 17192
30096
+ },
30097
+ {
30098
+ "epoch": 0.85,
30099
+ "grad_norm": 2.0008492469787598,
30100
+ "learning_rate": 7.527545827363013e-06,
30101
+ "loss": 5.4572,
30102
+ "step": 17196
30103
+ },
30104
+ {
30105
+ "epoch": 0.85,
30106
+ "grad_norm": 1.9606680870056152,
30107
+ "learning_rate": 7.517663916201394e-06,
30108
+ "loss": 5.3813,
30109
+ "step": 17200
30110
+ },
30111
+ {
30112
+ "epoch": 0.85,
30113
+ "grad_norm": 1.765757441520691,
30114
+ "learning_rate": 7.507782005039775e-06,
30115
+ "loss": 5.433,
30116
+ "step": 17204
30117
+ },
30118
+ {
30119
+ "epoch": 0.85,
30120
+ "grad_norm": 2.2999327182769775,
30121
+ "learning_rate": 7.497900093878156e-06,
30122
+ "loss": 5.4443,
30123
+ "step": 17208
30124
+ },
30125
+ {
30126
+ "epoch": 0.85,
30127
+ "grad_norm": 1.831790804862976,
30128
+ "learning_rate": 7.488018182716537e-06,
30129
+ "loss": 5.4745,
30130
+ "step": 17212
30131
+ },
30132
+ {
30133
+ "epoch": 0.85,
30134
+ "grad_norm": 2.0281448364257812,
30135
+ "learning_rate": 7.47813627155492e-06,
30136
+ "loss": 5.3546,
30137
+ "step": 17216
30138
+ },
30139
+ {
30140
+ "epoch": 0.85,
30141
+ "grad_norm": 2.163875102996826,
30142
+ "learning_rate": 7.468254360393301e-06,
30143
+ "loss": 5.4957,
30144
+ "step": 17220
30145
+ },
30146
+ {
30147
+ "epoch": 0.85,
30148
+ "grad_norm": 2.0201468467712402,
30149
+ "learning_rate": 7.458372449231682e-06,
30150
+ "loss": 5.3797,
30151
+ "step": 17224
30152
+ },
30153
+ {
30154
+ "epoch": 0.85,
30155
+ "grad_norm": 1.9520927667617798,
30156
+ "learning_rate": 7.448490538070063e-06,
30157
+ "loss": 5.5663,
30158
+ "step": 17228
30159
+ },
30160
+ {
30161
+ "epoch": 0.85,
30162
+ "grad_norm": 1.9706037044525146,
30163
+ "learning_rate": 7.4386086269084445e-06,
30164
+ "loss": 5.4462,
30165
+ "step": 17232
30166
+ },
30167
+ {
30168
+ "epoch": 0.85,
30169
+ "grad_norm": 2.2334280014038086,
30170
+ "learning_rate": 7.428726715746826e-06,
30171
+ "loss": 5.5028,
30172
+ "step": 17236
30173
+ },
30174
+ {
30175
+ "epoch": 0.85,
30176
+ "grad_norm": 2.089432716369629,
30177
+ "learning_rate": 7.418844804585207e-06,
30178
+ "loss": 5.5537,
30179
+ "step": 17240
30180
+ },
30181
+ {
30182
+ "epoch": 0.85,
30183
+ "grad_norm": 2.0354325771331787,
30184
+ "learning_rate": 7.408962893423589e-06,
30185
+ "loss": 5.4,
30186
+ "step": 17244
30187
+ },
30188
+ {
30189
+ "epoch": 0.85,
30190
+ "grad_norm": 1.9282554388046265,
30191
+ "learning_rate": 7.39908098226197e-06,
30192
+ "loss": 5.3915,
30193
+ "step": 17248
30194
+ },
30195
+ {
30196
+ "epoch": 0.85,
30197
+ "grad_norm": 2.133868455886841,
30198
+ "learning_rate": 7.389199071100351e-06,
30199
+ "loss": 5.4868,
30200
+ "step": 17252
30201
+ },
30202
+ {
30203
+ "epoch": 0.85,
30204
+ "grad_norm": 2.0873701572418213,
30205
+ "learning_rate": 7.379317159938733e-06,
30206
+ "loss": 5.6375,
30207
+ "step": 17256
30208
+ },
30209
+ {
30210
+ "epoch": 0.85,
30211
+ "grad_norm": 1.9751038551330566,
30212
+ "learning_rate": 7.369435248777114e-06,
30213
+ "loss": 5.4291,
30214
+ "step": 17260
30215
+ },
30216
+ {
30217
+ "epoch": 0.85,
30218
+ "grad_norm": 1.8549004793167114,
30219
+ "learning_rate": 7.359553337615495e-06,
30220
+ "loss": 5.4953,
30221
+ "step": 17264
30222
+ },
30223
+ {
30224
+ "epoch": 0.85,
30225
+ "grad_norm": 1.9882365465164185,
30226
+ "learning_rate": 7.349671426453876e-06,
30227
+ "loss": 5.5191,
30228
+ "step": 17268
30229
+ },
30230
+ {
30231
+ "epoch": 0.85,
30232
+ "grad_norm": 2.0008509159088135,
30233
+ "learning_rate": 7.339789515292258e-06,
30234
+ "loss": 5.573,
30235
+ "step": 17272
30236
+ },
30237
+ {
30238
+ "epoch": 0.85,
30239
+ "grad_norm": 1.9084336757659912,
30240
+ "learning_rate": 7.3299076041306395e-06,
30241
+ "loss": 5.5864,
30242
+ "step": 17276
30243
+ },
30244
+ {
30245
+ "epoch": 0.85,
30246
+ "grad_norm": 2.3234941959381104,
30247
+ "learning_rate": 7.320025692969021e-06,
30248
+ "loss": 5.4648,
30249
+ "step": 17280
30250
+ },
30251
+ {
30252
+ "epoch": 0.85,
30253
+ "grad_norm": 2.033445358276367,
30254
+ "learning_rate": 7.310143781807402e-06,
30255
+ "loss": 5.5663,
30256
+ "step": 17284
30257
+ },
30258
+ {
30259
+ "epoch": 0.85,
30260
+ "grad_norm": 2.036726474761963,
30261
+ "learning_rate": 7.300261870645783e-06,
30262
+ "loss": 5.3706,
30263
+ "step": 17288
30264
+ },
30265
+ {
30266
+ "epoch": 0.85,
30267
+ "grad_norm": 2.135927677154541,
30268
+ "learning_rate": 7.290379959484164e-06,
30269
+ "loss": 5.4365,
30270
+ "step": 17292
30271
+ },
30272
+ {
30273
+ "epoch": 0.85,
30274
+ "grad_norm": 2.0434539318084717,
30275
+ "learning_rate": 7.280498048322546e-06,
30276
+ "loss": 5.4914,
30277
+ "step": 17296
30278
+ },
30279
+ {
30280
+ "epoch": 0.85,
30281
+ "grad_norm": 1.9364794492721558,
30282
+ "learning_rate": 7.270616137160927e-06,
30283
+ "loss": 5.358,
30284
+ "step": 17300
30285
+ },
30286
+ {
30287
+ "epoch": 0.85,
30288
+ "grad_norm": 2.1555495262145996,
30289
+ "learning_rate": 7.260734225999309e-06,
30290
+ "loss": 5.4667,
30291
+ "step": 17304
30292
+ },
30293
+ {
30294
+ "epoch": 0.86,
30295
+ "grad_norm": 2.0687687397003174,
30296
+ "learning_rate": 7.25085231483769e-06,
30297
+ "loss": 5.5075,
30298
+ "step": 17308
30299
+ },
30300
+ {
30301
+ "epoch": 0.86,
30302
+ "grad_norm": 2.2169644832611084,
30303
+ "learning_rate": 7.240970403676071e-06,
30304
+ "loss": 5.5186,
30305
+ "step": 17312
30306
+ },
30307
+ {
30308
+ "epoch": 0.86,
30309
+ "grad_norm": 2.0690207481384277,
30310
+ "learning_rate": 7.2310884925144525e-06,
30311
+ "loss": 5.4944,
30312
+ "step": 17316
30313
+ },
30314
+ {
30315
+ "epoch": 0.86,
30316
+ "grad_norm": 2.172851324081421,
30317
+ "learning_rate": 7.221206581352834e-06,
30318
+ "loss": 5.5178,
30319
+ "step": 17320
30320
+ },
30321
+ {
30322
+ "epoch": 0.86,
30323
+ "grad_norm": 2.178602457046509,
30324
+ "learning_rate": 7.211324670191215e-06,
30325
+ "loss": 5.5693,
30326
+ "step": 17324
30327
+ },
30328
+ {
30329
+ "epoch": 0.86,
30330
+ "grad_norm": 1.9525049924850464,
30331
+ "learning_rate": 7.201442759029596e-06,
30332
+ "loss": 5.3773,
30333
+ "step": 17328
30334
+ },
30335
+ {
30336
+ "epoch": 0.86,
30337
+ "grad_norm": 2.0250043869018555,
30338
+ "learning_rate": 7.191560847867978e-06,
30339
+ "loss": 5.4672,
30340
+ "step": 17332
30341
+ },
30342
+ {
30343
+ "epoch": 0.86,
30344
+ "grad_norm": 2.229799747467041,
30345
+ "learning_rate": 7.1816789367063594e-06,
30346
+ "loss": 5.4451,
30347
+ "step": 17336
30348
+ },
30349
+ {
30350
+ "epoch": 0.86,
30351
+ "grad_norm": 2.2048773765563965,
30352
+ "learning_rate": 7.171797025544741e-06,
30353
+ "loss": 5.5388,
30354
+ "step": 17340
30355
+ },
30356
+ {
30357
+ "epoch": 0.86,
30358
+ "grad_norm": 2.1948986053466797,
30359
+ "learning_rate": 7.161915114383122e-06,
30360
+ "loss": 5.4121,
30361
+ "step": 17344
30362
+ },
30363
+ {
30364
+ "epoch": 0.86,
30365
+ "grad_norm": 2.410446882247925,
30366
+ "learning_rate": 7.152033203221503e-06,
30367
+ "loss": 5.5068,
30368
+ "step": 17348
30369
+ },
30370
+ {
30371
+ "epoch": 0.86,
30372
+ "grad_norm": 2.0198326110839844,
30373
+ "learning_rate": 7.142151292059884e-06,
30374
+ "loss": 5.4786,
30375
+ "step": 17352
30376
+ },
30377
+ {
30378
+ "epoch": 0.86,
30379
+ "grad_norm": 2.1943955421447754,
30380
+ "learning_rate": 7.1322693808982655e-06,
30381
+ "loss": 5.4957,
30382
+ "step": 17356
30383
+ },
30384
+ {
30385
+ "epoch": 0.86,
30386
+ "grad_norm": 2.1132426261901855,
30387
+ "learning_rate": 7.122387469736648e-06,
30388
+ "loss": 5.3965,
30389
+ "step": 17360
30390
+ },
30391
+ {
30392
+ "epoch": 0.86,
30393
+ "grad_norm": 2.0462357997894287,
30394
+ "learning_rate": 7.112505558575029e-06,
30395
+ "loss": 5.5153,
30396
+ "step": 17364
30397
+ },
30398
+ {
30399
+ "epoch": 0.86,
30400
+ "grad_norm": 2.0501723289489746,
30401
+ "learning_rate": 7.10262364741341e-06,
30402
+ "loss": 5.5062,
30403
+ "step": 17368
30404
+ },
30405
+ {
30406
+ "epoch": 0.86,
30407
+ "grad_norm": 2.148674726486206,
30408
+ "learning_rate": 7.092741736251791e-06,
30409
+ "loss": 5.2946,
30410
+ "step": 17372
30411
+ },
30412
+ {
30413
+ "epoch": 0.86,
30414
+ "grad_norm": 2.0384411811828613,
30415
+ "learning_rate": 7.082859825090172e-06,
30416
+ "loss": 5.6131,
30417
+ "step": 17376
30418
+ },
30419
+ {
30420
+ "epoch": 0.86,
30421
+ "grad_norm": 2.235848903656006,
30422
+ "learning_rate": 7.072977913928554e-06,
30423
+ "loss": 5.3982,
30424
+ "step": 17380
30425
+ },
30426
+ {
30427
+ "epoch": 0.86,
30428
+ "grad_norm": 2.0050299167633057,
30429
+ "learning_rate": 7.063096002766935e-06,
30430
+ "loss": 5.5681,
30431
+ "step": 17384
30432
+ },
30433
+ {
30434
+ "epoch": 0.86,
30435
+ "grad_norm": 1.9482308626174927,
30436
+ "learning_rate": 7.053214091605318e-06,
30437
+ "loss": 5.4242,
30438
+ "step": 17388
30439
+ },
30440
+ {
30441
+ "epoch": 0.86,
30442
+ "grad_norm": 2.077125072479248,
30443
+ "learning_rate": 7.043332180443699e-06,
30444
+ "loss": 5.4092,
30445
+ "step": 17392
30446
+ },
30447
+ {
30448
+ "epoch": 0.86,
30449
+ "grad_norm": 2.2242355346679688,
30450
+ "learning_rate": 7.033450269282079e-06,
30451
+ "loss": 5.4268,
30452
+ "step": 17396
30453
+ },
30454
+ {
30455
+ "epoch": 0.86,
30456
+ "grad_norm": 2.2366597652435303,
30457
+ "learning_rate": 7.0235683581204605e-06,
30458
+ "loss": 5.4304,
30459
+ "step": 17400
30460
+ },
30461
+ {
30462
+ "epoch": 0.86,
30463
+ "grad_norm": 2.3268561363220215,
30464
+ "learning_rate": 7.013686446958842e-06,
30465
+ "loss": 5.4141,
30466
+ "step": 17404
30467
+ },
30468
+ {
30469
+ "epoch": 0.86,
30470
+ "grad_norm": 2.1040186882019043,
30471
+ "learning_rate": 7.003804535797223e-06,
30472
+ "loss": 5.4129,
30473
+ "step": 17408
30474
+ },
30475
+ {
30476
+ "epoch": 0.86,
30477
+ "grad_norm": 2.0050957202911377,
30478
+ "learning_rate": 6.993922624635604e-06,
30479
+ "loss": 5.5029,
30480
+ "step": 17412
30481
+ },
30482
+ {
30483
+ "epoch": 0.86,
30484
+ "grad_norm": 1.914214849472046,
30485
+ "learning_rate": 6.984040713473987e-06,
30486
+ "loss": 5.5394,
30487
+ "step": 17416
30488
+ },
30489
+ {
30490
+ "epoch": 0.86,
30491
+ "grad_norm": 2.112946033477783,
30492
+ "learning_rate": 6.974158802312368e-06,
30493
+ "loss": 5.4547,
30494
+ "step": 17420
30495
+ },
30496
+ {
30497
+ "epoch": 0.86,
30498
+ "grad_norm": 1.980510950088501,
30499
+ "learning_rate": 6.9642768911507495e-06,
30500
+ "loss": 5.4997,
30501
+ "step": 17424
30502
+ },
30503
+ {
30504
+ "epoch": 0.86,
30505
+ "grad_norm": 1.985985517501831,
30506
+ "learning_rate": 6.954394979989131e-06,
30507
+ "loss": 5.3671,
30508
+ "step": 17428
30509
+ },
30510
+ {
30511
+ "epoch": 0.86,
30512
+ "grad_norm": 1.897262454032898,
30513
+ "learning_rate": 6.944513068827511e-06,
30514
+ "loss": 5.4578,
30515
+ "step": 17432
30516
+ },
30517
+ {
30518
+ "epoch": 0.86,
30519
+ "grad_norm": 1.9851828813552856,
30520
+ "learning_rate": 6.934631157665892e-06,
30521
+ "loss": 5.4318,
30522
+ "step": 17436
30523
+ },
30524
+ {
30525
+ "epoch": 0.86,
30526
+ "grad_norm": 1.8977246284484863,
30527
+ "learning_rate": 6.9247492465042735e-06,
30528
+ "loss": 5.4777,
30529
+ "step": 17440
30530
+ },
30531
+ {
30532
+ "epoch": 0.86,
30533
+ "grad_norm": 2.1280171871185303,
30534
+ "learning_rate": 6.9148673353426564e-06,
30535
+ "loss": 5.583,
30536
+ "step": 17444
30537
+ },
30538
+ {
30539
+ "epoch": 0.86,
30540
+ "grad_norm": 2.3764641284942627,
30541
+ "learning_rate": 6.904985424181038e-06,
30542
+ "loss": 5.5559,
30543
+ "step": 17448
30544
+ },
30545
+ {
30546
+ "epoch": 0.86,
30547
+ "grad_norm": 1.9994136095046997,
30548
+ "learning_rate": 6.895103513019419e-06,
30549
+ "loss": 5.5662,
30550
+ "step": 17452
30551
+ },
30552
+ {
30553
+ "epoch": 0.86,
30554
+ "grad_norm": 2.108659267425537,
30555
+ "learning_rate": 6.8852216018578e-06,
30556
+ "loss": 5.4098,
30557
+ "step": 17456
30558
+ },
30559
+ {
30560
+ "epoch": 0.86,
30561
+ "grad_norm": 1.9477959871292114,
30562
+ "learning_rate": 6.875339690696181e-06,
30563
+ "loss": 5.4161,
30564
+ "step": 17460
30565
+ },
30566
+ {
30567
+ "epoch": 0.86,
30568
+ "grad_norm": 2.2120134830474854,
30569
+ "learning_rate": 6.865457779534562e-06,
30570
+ "loss": 5.5344,
30571
+ "step": 17464
30572
+ },
30573
+ {
30574
+ "epoch": 0.86,
30575
+ "grad_norm": 1.9351931810379028,
30576
+ "learning_rate": 6.855575868372943e-06,
30577
+ "loss": 5.5115,
30578
+ "step": 17468
30579
+ },
30580
+ {
30581
+ "epoch": 0.86,
30582
+ "grad_norm": 1.9376587867736816,
30583
+ "learning_rate": 6.845693957211324e-06,
30584
+ "loss": 5.4218,
30585
+ "step": 17472
30586
+ },
30587
+ {
30588
+ "epoch": 0.86,
30589
+ "grad_norm": 1.8527143001556396,
30590
+ "learning_rate": 6.835812046049707e-06,
30591
+ "loss": 5.4781,
30592
+ "step": 17476
30593
+ },
30594
+ {
30595
+ "epoch": 0.86,
30596
+ "grad_norm": 1.9370919466018677,
30597
+ "learning_rate": 6.825930134888088e-06,
30598
+ "loss": 5.6185,
30599
+ "step": 17480
30600
+ },
30601
+ {
30602
+ "epoch": 0.86,
30603
+ "grad_norm": 1.8956094980239868,
30604
+ "learning_rate": 6.816048223726469e-06,
30605
+ "loss": 5.5335,
30606
+ "step": 17484
30607
+ },
30608
+ {
30609
+ "epoch": 0.86,
30610
+ "grad_norm": 2.1358373165130615,
30611
+ "learning_rate": 6.806166312564851e-06,
30612
+ "loss": 5.5501,
30613
+ "step": 17488
30614
+ },
30615
+ {
30616
+ "epoch": 0.86,
30617
+ "grad_norm": 1.8500255346298218,
30618
+ "learning_rate": 6.796284401403232e-06,
30619
+ "loss": 5.4718,
30620
+ "step": 17492
30621
+ },
30622
+ {
30623
+ "epoch": 0.86,
30624
+ "grad_norm": 1.9620647430419922,
30625
+ "learning_rate": 6.786402490241612e-06,
30626
+ "loss": 5.4566,
30627
+ "step": 17496
30628
+ },
30629
+ {
30630
+ "epoch": 0.86,
30631
+ "grad_norm": 2.0902743339538574,
30632
+ "learning_rate": 6.776520579079993e-06,
30633
+ "loss": 5.484,
30634
+ "step": 17500
30635
+ },
30636
+ {
30637
+ "epoch": 0.86,
30638
+ "grad_norm": 2.329399824142456,
30639
+ "learning_rate": 6.766638667918376e-06,
30640
+ "loss": 5.5337,
30641
+ "step": 17504
30642
+ },
30643
+ {
30644
+ "epoch": 0.87,
30645
+ "grad_norm": 1.9751675128936768,
30646
+ "learning_rate": 6.7567567567567575e-06,
30647
+ "loss": 5.4638,
30648
+ "step": 17508
30649
+ },
30650
+ {
30651
+ "epoch": 0.87,
30652
+ "grad_norm": 2.188885450363159,
30653
+ "learning_rate": 6.746874845595139e-06,
30654
+ "loss": 5.5174,
30655
+ "step": 17512
30656
+ },
30657
+ {
30658
+ "epoch": 0.87,
30659
+ "grad_norm": 2.1230249404907227,
30660
+ "learning_rate": 6.73699293443352e-06,
30661
+ "loss": 5.4525,
30662
+ "step": 17516
30663
+ },
30664
+ {
30665
+ "epoch": 0.87,
30666
+ "grad_norm": 2.0748202800750732,
30667
+ "learning_rate": 6.727111023271901e-06,
30668
+ "loss": 5.5633,
30669
+ "step": 17520
30670
+ },
30671
+ {
30672
+ "epoch": 0.87,
30673
+ "grad_norm": 1.8766546249389648,
30674
+ "learning_rate": 6.717229112110282e-06,
30675
+ "loss": 5.5295,
30676
+ "step": 17524
30677
+ },
30678
+ {
30679
+ "epoch": 0.87,
30680
+ "grad_norm": 2.1104044914245605,
30681
+ "learning_rate": 6.707347200948664e-06,
30682
+ "loss": 5.5265,
30683
+ "step": 17528
30684
+ },
30685
+ {
30686
+ "epoch": 0.87,
30687
+ "grad_norm": 1.9020673036575317,
30688
+ "learning_rate": 6.697465289787046e-06,
30689
+ "loss": 5.4684,
30690
+ "step": 17532
30691
+ },
30692
+ {
30693
+ "epoch": 0.87,
30694
+ "grad_norm": 1.9456652402877808,
30695
+ "learning_rate": 6.687583378625427e-06,
30696
+ "loss": 5.3826,
30697
+ "step": 17536
30698
+ },
30699
+ {
30700
+ "epoch": 0.87,
30701
+ "grad_norm": 2.117117166519165,
30702
+ "learning_rate": 6.677701467463808e-06,
30703
+ "loss": 5.4525,
30704
+ "step": 17540
30705
+ },
30706
+ {
30707
+ "epoch": 0.87,
30708
+ "grad_norm": 2.0873782634735107,
30709
+ "learning_rate": 6.667819556302189e-06,
30710
+ "loss": 5.5402,
30711
+ "step": 17544
30712
+ },
30713
+ {
30714
+ "epoch": 0.87,
30715
+ "grad_norm": 2.0289838314056396,
30716
+ "learning_rate": 6.6579376451405705e-06,
30717
+ "loss": 5.5603,
30718
+ "step": 17548
30719
+ },
30720
+ {
30721
+ "epoch": 0.87,
30722
+ "grad_norm": 2.2275471687316895,
30723
+ "learning_rate": 6.648055733978952e-06,
30724
+ "loss": 5.3792,
30725
+ "step": 17552
30726
+ },
30727
+ {
30728
+ "epoch": 0.87,
30729
+ "grad_norm": 1.9133155345916748,
30730
+ "learning_rate": 6.638173822817333e-06,
30731
+ "loss": 5.4289,
30732
+ "step": 17556
30733
+ },
30734
+ {
30735
+ "epoch": 0.87,
30736
+ "grad_norm": 2.193645477294922,
30737
+ "learning_rate": 6.628291911655715e-06,
30738
+ "loss": 5.5224,
30739
+ "step": 17560
30740
+ },
30741
+ {
30742
+ "epoch": 0.87,
30743
+ "grad_norm": 2.1608972549438477,
30744
+ "learning_rate": 6.618410000494096e-06,
30745
+ "loss": 5.5412,
30746
+ "step": 17564
30747
+ },
30748
+ {
30749
+ "epoch": 0.87,
30750
+ "grad_norm": 2.141594648361206,
30751
+ "learning_rate": 6.6085280893324774e-06,
30752
+ "loss": 5.4787,
30753
+ "step": 17568
30754
+ },
30755
+ {
30756
+ "epoch": 0.87,
30757
+ "grad_norm": 1.9416935443878174,
30758
+ "learning_rate": 6.598646178170859e-06,
30759
+ "loss": 5.3674,
30760
+ "step": 17572
30761
+ },
30762
+ {
30763
+ "epoch": 0.87,
30764
+ "grad_norm": 2.110677480697632,
30765
+ "learning_rate": 6.58876426700924e-06,
30766
+ "loss": 5.4024,
30767
+ "step": 17576
30768
+ },
30769
+ {
30770
+ "epoch": 0.87,
30771
+ "grad_norm": 2.2235372066497803,
30772
+ "learning_rate": 6.578882355847621e-06,
30773
+ "loss": 5.4458,
30774
+ "step": 17580
30775
+ },
30776
+ {
30777
+ "epoch": 0.87,
30778
+ "grad_norm": 2.280282974243164,
30779
+ "learning_rate": 6.569000444686002e-06,
30780
+ "loss": 5.5981,
30781
+ "step": 17584
30782
+ },
30783
+ {
30784
+ "epoch": 0.87,
30785
+ "grad_norm": 2.1084625720977783,
30786
+ "learning_rate": 6.559118533524384e-06,
30787
+ "loss": 5.4475,
30788
+ "step": 17588
30789
+ },
30790
+ {
30791
+ "epoch": 0.87,
30792
+ "grad_norm": 2.004232406616211,
30793
+ "learning_rate": 6.5492366223627656e-06,
30794
+ "loss": 5.4314,
30795
+ "step": 17592
30796
+ },
30797
+ {
30798
+ "epoch": 0.87,
30799
+ "grad_norm": 1.9286199808120728,
30800
+ "learning_rate": 6.539354711201147e-06,
30801
+ "loss": 5.3943,
30802
+ "step": 17596
30803
+ },
30804
+ {
30805
+ "epoch": 0.87,
30806
+ "grad_norm": 1.9742239713668823,
30807
+ "learning_rate": 6.529472800039528e-06,
30808
+ "loss": 5.4633,
30809
+ "step": 17600
30810
+ },
30811
+ {
30812
+ "epoch": 0.87,
30813
+ "grad_norm": 2.1503305435180664,
30814
+ "learning_rate": 6.519590888877909e-06,
30815
+ "loss": 5.4654,
30816
+ "step": 17604
30817
+ },
30818
+ {
30819
+ "epoch": 0.87,
30820
+ "grad_norm": 1.996319055557251,
30821
+ "learning_rate": 6.50970897771629e-06,
30822
+ "loss": 5.4479,
30823
+ "step": 17608
30824
+ },
30825
+ {
30826
+ "epoch": 0.87,
30827
+ "grad_norm": 2.1689870357513428,
30828
+ "learning_rate": 6.499827066554672e-06,
30829
+ "loss": 5.4242,
30830
+ "step": 17612
30831
+ },
30832
+ {
30833
+ "epoch": 0.87,
30834
+ "grad_norm": 2.0061464309692383,
30835
+ "learning_rate": 6.489945155393053e-06,
30836
+ "loss": 5.3833,
30837
+ "step": 17616
30838
+ },
30839
+ {
30840
+ "epoch": 0.87,
30841
+ "grad_norm": 2.1201388835906982,
30842
+ "learning_rate": 6.480063244231435e-06,
30843
+ "loss": 5.4374,
30844
+ "step": 17620
30845
+ },
30846
+ {
30847
+ "epoch": 0.87,
30848
+ "grad_norm": 2.196545124053955,
30849
+ "learning_rate": 6.470181333069816e-06,
30850
+ "loss": 5.3564,
30851
+ "step": 17624
30852
+ },
30853
+ {
30854
+ "epoch": 0.87,
30855
+ "grad_norm": 2.073232412338257,
30856
+ "learning_rate": 6.460299421908197e-06,
30857
+ "loss": 5.4057,
30858
+ "step": 17628
30859
+ },
30860
+ {
30861
+ "epoch": 0.87,
30862
+ "grad_norm": 1.9354524612426758,
30863
+ "learning_rate": 6.4504175107465785e-06,
30864
+ "loss": 5.4718,
30865
+ "step": 17632
30866
+ },
30867
+ {
30868
+ "epoch": 0.87,
30869
+ "grad_norm": 2.032994508743286,
30870
+ "learning_rate": 6.44053559958496e-06,
30871
+ "loss": 5.358,
30872
+ "step": 17636
30873
+ },
30874
+ {
30875
+ "epoch": 0.87,
30876
+ "grad_norm": 2.130598545074463,
30877
+ "learning_rate": 6.430653688423341e-06,
30878
+ "loss": 5.4658,
30879
+ "step": 17640
30880
+ },
30881
+ {
30882
+ "epoch": 0.87,
30883
+ "grad_norm": 1.8692468404769897,
30884
+ "learning_rate": 6.420771777261722e-06,
30885
+ "loss": 5.4004,
30886
+ "step": 17644
30887
+ },
30888
+ {
30889
+ "epoch": 0.87,
30890
+ "grad_norm": 1.9368531703948975,
30891
+ "learning_rate": 6.410889866100104e-06,
30892
+ "loss": 5.4809,
30893
+ "step": 17648
30894
+ },
30895
+ {
30896
+ "epoch": 0.87,
30897
+ "grad_norm": 2.235506534576416,
30898
+ "learning_rate": 6.4010079549384855e-06,
30899
+ "loss": 5.4532,
30900
+ "step": 17652
30901
+ },
30902
+ {
30903
+ "epoch": 0.87,
30904
+ "grad_norm": 1.9980324506759644,
30905
+ "learning_rate": 6.391126043776867e-06,
30906
+ "loss": 5.5156,
30907
+ "step": 17656
30908
+ },
30909
+ {
30910
+ "epoch": 0.87,
30911
+ "grad_norm": 1.947649598121643,
30912
+ "learning_rate": 6.381244132615248e-06,
30913
+ "loss": 5.3676,
30914
+ "step": 17660
30915
+ },
30916
+ {
30917
+ "epoch": 0.87,
30918
+ "grad_norm": 2.365041971206665,
30919
+ "learning_rate": 6.371362221453629e-06,
30920
+ "loss": 5.4387,
30921
+ "step": 17664
30922
+ },
30923
+ {
30924
+ "epoch": 0.87,
30925
+ "grad_norm": 2.2820627689361572,
30926
+ "learning_rate": 6.36148031029201e-06,
30927
+ "loss": 5.4541,
30928
+ "step": 17668
30929
+ },
30930
+ {
30931
+ "epoch": 0.87,
30932
+ "grad_norm": 2.02691650390625,
30933
+ "learning_rate": 6.3515983991303915e-06,
30934
+ "loss": 5.5084,
30935
+ "step": 17672
30936
+ },
30937
+ {
30938
+ "epoch": 0.87,
30939
+ "grad_norm": 2.0064783096313477,
30940
+ "learning_rate": 6.3417164879687744e-06,
30941
+ "loss": 5.5205,
30942
+ "step": 17676
30943
+ },
30944
+ {
30945
+ "epoch": 0.87,
30946
+ "grad_norm": 1.9961150884628296,
30947
+ "learning_rate": 6.331834576807155e-06,
30948
+ "loss": 5.3904,
30949
+ "step": 17680
30950
+ },
30951
+ {
30952
+ "epoch": 0.87,
30953
+ "grad_norm": 2.2273404598236084,
30954
+ "learning_rate": 6.321952665645536e-06,
30955
+ "loss": 5.4756,
30956
+ "step": 17684
30957
+ },
30958
+ {
30959
+ "epoch": 0.87,
30960
+ "grad_norm": 2.078472852706909,
30961
+ "learning_rate": 6.312070754483917e-06,
30962
+ "loss": 5.5535,
30963
+ "step": 17688
30964
+ },
30965
+ {
30966
+ "epoch": 0.87,
30967
+ "grad_norm": 1.9248629808425903,
30968
+ "learning_rate": 6.3021888433222984e-06,
30969
+ "loss": 5.4378,
30970
+ "step": 17692
30971
+ },
30972
+ {
30973
+ "epoch": 0.87,
30974
+ "grad_norm": 2.1985530853271484,
30975
+ "learning_rate": 6.29230693216068e-06,
30976
+ "loss": 5.4345,
30977
+ "step": 17696
30978
+ },
30979
+ {
30980
+ "epoch": 0.87,
30981
+ "grad_norm": 2.087536096572876,
30982
+ "learning_rate": 6.282425020999061e-06,
30983
+ "loss": 5.5135,
30984
+ "step": 17700
30985
+ },
30986
+ {
30987
+ "epoch": 0.87,
30988
+ "grad_norm": 2.0699515342712402,
30989
+ "learning_rate": 6.272543109837444e-06,
30990
+ "loss": 5.3821,
30991
+ "step": 17704
30992
+ },
30993
+ {
30994
+ "epoch": 0.87,
30995
+ "grad_norm": 2.309680223464966,
30996
+ "learning_rate": 6.262661198675825e-06,
30997
+ "loss": 5.3426,
30998
+ "step": 17708
30999
+ },
31000
+ {
31001
+ "epoch": 0.88,
31002
+ "grad_norm": 1.9877557754516602,
31003
+ "learning_rate": 6.252779287514206e-06,
31004
+ "loss": 5.554,
31005
+ "step": 17712
31006
+ },
31007
+ {
31008
+ "epoch": 0.88,
31009
+ "grad_norm": 2.1621484756469727,
31010
+ "learning_rate": 6.2428973763525866e-06,
31011
+ "loss": 5.4897,
31012
+ "step": 17716
31013
+ },
31014
+ {
31015
+ "epoch": 0.88,
31016
+ "grad_norm": 2.0568161010742188,
31017
+ "learning_rate": 6.233015465190968e-06,
31018
+ "loss": 5.4726,
31019
+ "step": 17720
31020
+ },
31021
+ {
31022
+ "epoch": 0.88,
31023
+ "grad_norm": 1.8659361600875854,
31024
+ "learning_rate": 6.22313355402935e-06,
31025
+ "loss": 5.461,
31026
+ "step": 17724
31027
+ },
31028
+ {
31029
+ "epoch": 0.88,
31030
+ "grad_norm": 1.8461517095565796,
31031
+ "learning_rate": 6.213251642867731e-06,
31032
+ "loss": 5.3407,
31033
+ "step": 17728
31034
+ },
31035
+ {
31036
+ "epoch": 0.88,
31037
+ "grad_norm": 2.2194485664367676,
31038
+ "learning_rate": 6.203369731706112e-06,
31039
+ "loss": 5.3863,
31040
+ "step": 17732
31041
+ },
31042
+ {
31043
+ "epoch": 0.88,
31044
+ "grad_norm": 2.2594525814056396,
31045
+ "learning_rate": 6.1934878205444935e-06,
31046
+ "loss": 5.4737,
31047
+ "step": 17736
31048
+ },
31049
+ {
31050
+ "epoch": 0.88,
31051
+ "grad_norm": 2.067777156829834,
31052
+ "learning_rate": 6.1836059093828755e-06,
31053
+ "loss": 5.4817,
31054
+ "step": 17740
31055
+ },
31056
+ {
31057
+ "epoch": 0.88,
31058
+ "grad_norm": 1.9696800708770752,
31059
+ "learning_rate": 6.173723998221257e-06,
31060
+ "loss": 5.5215,
31061
+ "step": 17744
31062
+ },
31063
+ {
31064
+ "epoch": 0.88,
31065
+ "grad_norm": 1.9700802564620972,
31066
+ "learning_rate": 6.163842087059637e-06,
31067
+ "loss": 5.5821,
31068
+ "step": 17748
31069
+ },
31070
+ {
31071
+ "epoch": 0.88,
31072
+ "grad_norm": 2.2519845962524414,
31073
+ "learning_rate": 6.153960175898018e-06,
31074
+ "loss": 5.5501,
31075
+ "step": 17752
31076
+ },
31077
+ {
31078
+ "epoch": 0.88,
31079
+ "grad_norm": 2.1531550884246826,
31080
+ "learning_rate": 6.1440782647364e-06,
31081
+ "loss": 5.354,
31082
+ "step": 17756
31083
+ },
31084
+ {
31085
+ "epoch": 0.88,
31086
+ "grad_norm": 2.299639940261841,
31087
+ "learning_rate": 6.134196353574782e-06,
31088
+ "loss": 5.5487,
31089
+ "step": 17760
31090
+ },
31091
+ {
31092
+ "epoch": 0.88,
31093
+ "grad_norm": 1.9032407999038696,
31094
+ "learning_rate": 6.124314442413163e-06,
31095
+ "loss": 5.4551,
31096
+ "step": 17764
31097
+ },
31098
+ {
31099
+ "epoch": 0.88,
31100
+ "grad_norm": 2.121720552444458,
31101
+ "learning_rate": 6.114432531251545e-06,
31102
+ "loss": 5.4185,
31103
+ "step": 17768
31104
+ },
31105
+ {
31106
+ "epoch": 0.88,
31107
+ "grad_norm": 1.955588698387146,
31108
+ "learning_rate": 6.104550620089926e-06,
31109
+ "loss": 5.5947,
31110
+ "step": 17772
31111
+ },
31112
+ {
31113
+ "epoch": 0.88,
31114
+ "grad_norm": 1.9518580436706543,
31115
+ "learning_rate": 6.094668708928307e-06,
31116
+ "loss": 5.4159,
31117
+ "step": 17776
31118
+ },
31119
+ {
31120
+ "epoch": 0.88,
31121
+ "grad_norm": 2.2284739017486572,
31122
+ "learning_rate": 6.084786797766688e-06,
31123
+ "loss": 5.4806,
31124
+ "step": 17780
31125
+ },
31126
+ {
31127
+ "epoch": 0.88,
31128
+ "grad_norm": 1.9473198652267456,
31129
+ "learning_rate": 6.07490488660507e-06,
31130
+ "loss": 5.3457,
31131
+ "step": 17784
31132
+ },
31133
+ {
31134
+ "epoch": 0.88,
31135
+ "grad_norm": 2.25762939453125,
31136
+ "learning_rate": 6.065022975443451e-06,
31137
+ "loss": 5.3905,
31138
+ "step": 17788
31139
+ },
31140
+ {
31141
+ "epoch": 0.88,
31142
+ "grad_norm": 2.13055682182312,
31143
+ "learning_rate": 6.055141064281832e-06,
31144
+ "loss": 5.4596,
31145
+ "step": 17792
31146
+ },
31147
+ {
31148
+ "epoch": 0.88,
31149
+ "grad_norm": 2.078608751296997,
31150
+ "learning_rate": 6.045259153120214e-06,
31151
+ "loss": 5.4836,
31152
+ "step": 17796
31153
+ },
31154
+ {
31155
+ "epoch": 0.88,
31156
+ "grad_norm": 2.032860040664673,
31157
+ "learning_rate": 6.0353772419585954e-06,
31158
+ "loss": 5.3315,
31159
+ "step": 17800
31160
+ },
31161
+ {
31162
+ "epoch": 0.88,
31163
+ "grad_norm": 2.18186616897583,
31164
+ "learning_rate": 6.025495330796977e-06,
31165
+ "loss": 5.5165,
31166
+ "step": 17804
31167
+ },
31168
+ {
31169
+ "epoch": 0.88,
31170
+ "grad_norm": 1.8949894905090332,
31171
+ "learning_rate": 6.015613419635358e-06,
31172
+ "loss": 5.4296,
31173
+ "step": 17808
31174
+ },
31175
+ {
31176
+ "epoch": 0.88,
31177
+ "grad_norm": 1.9019147157669067,
31178
+ "learning_rate": 6.005731508473739e-06,
31179
+ "loss": 5.3742,
31180
+ "step": 17812
31181
+ },
31182
+ {
31183
+ "epoch": 0.88,
31184
+ "grad_norm": 1.9749938249588013,
31185
+ "learning_rate": 5.99584959731212e-06,
31186
+ "loss": 5.4702,
31187
+ "step": 17816
31188
+ },
31189
+ {
31190
+ "epoch": 0.88,
31191
+ "grad_norm": 1.9528026580810547,
31192
+ "learning_rate": 5.9859676861505015e-06,
31193
+ "loss": 5.4179,
31194
+ "step": 17820
31195
+ },
31196
+ {
31197
+ "epoch": 0.88,
31198
+ "grad_norm": 2.04555082321167,
31199
+ "learning_rate": 5.976085774988883e-06,
31200
+ "loss": 5.4807,
31201
+ "step": 17824
31202
+ },
31203
+ {
31204
+ "epoch": 0.88,
31205
+ "grad_norm": 2.2078750133514404,
31206
+ "learning_rate": 5.966203863827265e-06,
31207
+ "loss": 5.4206,
31208
+ "step": 17828
31209
+ },
31210
+ {
31211
+ "epoch": 0.88,
31212
+ "grad_norm": 2.1232731342315674,
31213
+ "learning_rate": 5.956321952665646e-06,
31214
+ "loss": 5.5452,
31215
+ "step": 17832
31216
+ },
31217
+ {
31218
+ "epoch": 0.88,
31219
+ "grad_norm": 1.724265217781067,
31220
+ "learning_rate": 5.946440041504027e-06,
31221
+ "loss": 5.3831,
31222
+ "step": 17836
31223
+ },
31224
+ {
31225
+ "epoch": 0.88,
31226
+ "grad_norm": 2.0802602767944336,
31227
+ "learning_rate": 5.936558130342408e-06,
31228
+ "loss": 5.3925,
31229
+ "step": 17840
31230
+ },
31231
+ {
31232
+ "epoch": 0.88,
31233
+ "grad_norm": 1.913464069366455,
31234
+ "learning_rate": 5.92667621918079e-06,
31235
+ "loss": 5.4786,
31236
+ "step": 17844
31237
+ },
31238
+ {
31239
+ "epoch": 0.88,
31240
+ "grad_norm": 1.9357552528381348,
31241
+ "learning_rate": 5.916794308019171e-06,
31242
+ "loss": 5.4415,
31243
+ "step": 17848
31244
+ },
31245
+ {
31246
+ "epoch": 0.88,
31247
+ "grad_norm": 1.9869678020477295,
31248
+ "learning_rate": 5.906912396857552e-06,
31249
+ "loss": 5.4731,
31250
+ "step": 17852
31251
+ },
31252
+ {
31253
+ "epoch": 0.88,
31254
+ "grad_norm": 2.1964402198791504,
31255
+ "learning_rate": 5.897030485695934e-06,
31256
+ "loss": 5.4538,
31257
+ "step": 17856
31258
+ },
31259
+ {
31260
+ "epoch": 0.88,
31261
+ "grad_norm": 2.2341887950897217,
31262
+ "learning_rate": 5.887148574534315e-06,
31263
+ "loss": 5.4484,
31264
+ "step": 17860
31265
+ },
31266
+ {
31267
+ "epoch": 0.88,
31268
+ "grad_norm": 2.2685790061950684,
31269
+ "learning_rate": 5.8772666633726965e-06,
31270
+ "loss": 5.4935,
31271
+ "step": 17864
31272
+ },
31273
+ {
31274
+ "epoch": 0.88,
31275
+ "grad_norm": 1.9977366924285889,
31276
+ "learning_rate": 5.867384752211079e-06,
31277
+ "loss": 5.4866,
31278
+ "step": 17868
31279
+ },
31280
+ {
31281
+ "epoch": 0.88,
31282
+ "grad_norm": 1.8025336265563965,
31283
+ "learning_rate": 5.857502841049459e-06,
31284
+ "loss": 5.5315,
31285
+ "step": 17872
31286
+ },
31287
+ {
31288
+ "epoch": 0.88,
31289
+ "grad_norm": 1.8028703927993774,
31290
+ "learning_rate": 5.84762092988784e-06,
31291
+ "loss": 5.4048,
31292
+ "step": 17876
31293
+ },
31294
+ {
31295
+ "epoch": 0.88,
31296
+ "grad_norm": 2.0206375122070312,
31297
+ "learning_rate": 5.837739018726221e-06,
31298
+ "loss": 5.5057,
31299
+ "step": 17880
31300
+ },
31301
+ {
31302
+ "epoch": 0.88,
31303
+ "grad_norm": 2.2146549224853516,
31304
+ "learning_rate": 5.8278571075646034e-06,
31305
+ "loss": 5.4375,
31306
+ "step": 17884
31307
+ },
31308
+ {
31309
+ "epoch": 0.88,
31310
+ "grad_norm": 2.1485488414764404,
31311
+ "learning_rate": 5.817975196402985e-06,
31312
+ "loss": 5.4375,
31313
+ "step": 17888
31314
+ },
31315
+ {
31316
+ "epoch": 0.88,
31317
+ "grad_norm": 1.9976389408111572,
31318
+ "learning_rate": 5.808093285241366e-06,
31319
+ "loss": 5.5645,
31320
+ "step": 17892
31321
+ },
31322
+ {
31323
+ "epoch": 0.88,
31324
+ "grad_norm": 2.1577677726745605,
31325
+ "learning_rate": 5.798211374079747e-06,
31326
+ "loss": 5.4917,
31327
+ "step": 17896
31328
+ },
31329
+ {
31330
+ "epoch": 0.88,
31331
+ "grad_norm": 2.085784673690796,
31332
+ "learning_rate": 5.788329462918129e-06,
31333
+ "loss": 5.5115,
31334
+ "step": 17900
31335
+ },
31336
+ {
31337
+ "epoch": 0.88,
31338
+ "grad_norm": 2.250061511993408,
31339
+ "learning_rate": 5.77844755175651e-06,
31340
+ "loss": 5.4932,
31341
+ "step": 17904
31342
+ },
31343
+ {
31344
+ "epoch": 0.88,
31345
+ "grad_norm": 2.076542854309082,
31346
+ "learning_rate": 5.768565640594891e-06,
31347
+ "loss": 5.3858,
31348
+ "step": 17908
31349
+ },
31350
+ {
31351
+ "epoch": 0.89,
31352
+ "grad_norm": 2.0458431243896484,
31353
+ "learning_rate": 5.761154207223677e-06,
31354
+ "loss": 5.5596,
31355
+ "step": 17912
31356
+ },
31357
+ {
31358
+ "epoch": 0.89,
31359
+ "grad_norm": 2.075693130493164,
31360
+ "learning_rate": 5.7512722960620585e-06,
31361
+ "loss": 5.4962,
31362
+ "step": 17916
31363
+ },
31364
+ {
31365
+ "epoch": 0.89,
31366
+ "grad_norm": 2.09002685546875,
31367
+ "learning_rate": 5.74139038490044e-06,
31368
+ "loss": 5.3926,
31369
+ "step": 17920
31370
+ },
31371
+ {
31372
+ "epoch": 0.89,
31373
+ "grad_norm": 2.0116419792175293,
31374
+ "learning_rate": 5.731508473738821e-06,
31375
+ "loss": 5.4649,
31376
+ "step": 17924
31377
+ },
31378
+ {
31379
+ "epoch": 0.89,
31380
+ "grad_norm": 2.0170979499816895,
31381
+ "learning_rate": 5.721626562577203e-06,
31382
+ "loss": 5.4906,
31383
+ "step": 17928
31384
+ },
31385
+ {
31386
+ "epoch": 0.89,
31387
+ "grad_norm": 2.033344268798828,
31388
+ "learning_rate": 5.711744651415584e-06,
31389
+ "loss": 5.5144,
31390
+ "step": 17932
31391
+ },
31392
+ {
31393
+ "epoch": 0.89,
31394
+ "grad_norm": 1.9842782020568848,
31395
+ "learning_rate": 5.701862740253965e-06,
31396
+ "loss": 5.4478,
31397
+ "step": 17936
31398
+ },
31399
+ {
31400
+ "epoch": 0.89,
31401
+ "grad_norm": 2.059737205505371,
31402
+ "learning_rate": 5.691980829092347e-06,
31403
+ "loss": 5.4969,
31404
+ "step": 17940
31405
+ },
31406
+ {
31407
+ "epoch": 0.89,
31408
+ "grad_norm": 2.116508960723877,
31409
+ "learning_rate": 5.682098917930728e-06,
31410
+ "loss": 5.5734,
31411
+ "step": 17944
31412
+ },
31413
+ {
31414
+ "epoch": 0.89,
31415
+ "grad_norm": 2.062220573425293,
31416
+ "learning_rate": 5.672217006769109e-06,
31417
+ "loss": 5.3472,
31418
+ "step": 17948
31419
+ },
31420
+ {
31421
+ "epoch": 0.89,
31422
+ "grad_norm": 2.176339626312256,
31423
+ "learning_rate": 5.66233509560749e-06,
31424
+ "loss": 5.3356,
31425
+ "step": 17952
31426
+ },
31427
+ {
31428
+ "epoch": 0.89,
31429
+ "grad_norm": 2.2417047023773193,
31430
+ "learning_rate": 5.652453184445872e-06,
31431
+ "loss": 5.429,
31432
+ "step": 17956
31433
+ },
31434
+ {
31435
+ "epoch": 0.89,
31436
+ "grad_norm": 1.9376815557479858,
31437
+ "learning_rate": 5.6425712732842535e-06,
31438
+ "loss": 5.4263,
31439
+ "step": 17960
31440
+ },
31441
+ {
31442
+ "epoch": 0.89,
31443
+ "grad_norm": 1.9888767004013062,
31444
+ "learning_rate": 5.632689362122635e-06,
31445
+ "loss": 5.5286,
31446
+ "step": 17964
31447
+ },
31448
+ {
31449
+ "epoch": 0.89,
31450
+ "grad_norm": 2.1054704189300537,
31451
+ "learning_rate": 5.622807450961016e-06,
31452
+ "loss": 5.4473,
31453
+ "step": 17968
31454
+ },
31455
+ {
31456
+ "epoch": 0.89,
31457
+ "grad_norm": 2.3070156574249268,
31458
+ "learning_rate": 5.612925539799398e-06,
31459
+ "loss": 5.6394,
31460
+ "step": 17972
31461
+ },
31462
+ {
31463
+ "epoch": 0.89,
31464
+ "grad_norm": 1.956694483757019,
31465
+ "learning_rate": 5.603043628637779e-06,
31466
+ "loss": 5.3773,
31467
+ "step": 17976
31468
+ },
31469
+ {
31470
+ "epoch": 0.89,
31471
+ "grad_norm": 2.1512622833251953,
31472
+ "learning_rate": 5.59316171747616e-06,
31473
+ "loss": 5.5051,
31474
+ "step": 17980
31475
+ },
31476
+ {
31477
+ "epoch": 0.89,
31478
+ "grad_norm": 1.9715803861618042,
31479
+ "learning_rate": 5.583279806314542e-06,
31480
+ "loss": 5.4564,
31481
+ "step": 17984
31482
+ },
31483
+ {
31484
+ "epoch": 0.89,
31485
+ "grad_norm": 2.07094144821167,
31486
+ "learning_rate": 5.573397895152923e-06,
31487
+ "loss": 5.486,
31488
+ "step": 17988
31489
+ },
31490
+ {
31491
+ "epoch": 0.89,
31492
+ "grad_norm": 2.0776047706604004,
31493
+ "learning_rate": 5.563515983991304e-06,
31494
+ "loss": 5.4101,
31495
+ "step": 17992
31496
+ },
31497
+ {
31498
+ "epoch": 0.89,
31499
+ "grad_norm": 2.0641090869903564,
31500
+ "learning_rate": 5.553634072829685e-06,
31501
+ "loss": 5.4413,
31502
+ "step": 17996
31503
+ },
31504
+ {
31505
+ "epoch": 0.89,
31506
+ "grad_norm": 2.069200038909912,
31507
+ "learning_rate": 5.543752161668067e-06,
31508
+ "loss": 5.4239,
31509
+ "step": 18000
31510
+ },
31511
+ {
31512
+ "epoch": 0.89,
31513
+ "grad_norm": 1.9378856420516968,
31514
+ "learning_rate": 5.5338702505064486e-06,
31515
+ "loss": 5.4994,
31516
+ "step": 18004
31517
+ },
31518
+ {
31519
+ "epoch": 0.89,
31520
+ "grad_norm": 1.9170506000518799,
31521
+ "learning_rate": 5.52398833934483e-06,
31522
+ "loss": 5.4589,
31523
+ "step": 18008
31524
+ },
31525
+ {
31526
+ "epoch": 0.89,
31527
+ "grad_norm": 2.1123738288879395,
31528
+ "learning_rate": 5.514106428183211e-06,
31529
+ "loss": 5.4226,
31530
+ "step": 18012
31531
+ },
31532
+ {
31533
+ "epoch": 0.89,
31534
+ "grad_norm": 1.9895274639129639,
31535
+ "learning_rate": 5.504224517021592e-06,
31536
+ "loss": 5.522,
31537
+ "step": 18016
31538
+ },
31539
+ {
31540
+ "epoch": 0.89,
31541
+ "grad_norm": 2.216384172439575,
31542
+ "learning_rate": 5.494342605859973e-06,
31543
+ "loss": 5.4323,
31544
+ "step": 18020
31545
+ },
31546
+ {
31547
+ "epoch": 0.89,
31548
+ "grad_norm": 2.1332645416259766,
31549
+ "learning_rate": 5.484460694698355e-06,
31550
+ "loss": 5.3666,
31551
+ "step": 18024
31552
+ },
31553
+ {
31554
+ "epoch": 0.89,
31555
+ "grad_norm": 2.0537400245666504,
31556
+ "learning_rate": 5.474578783536737e-06,
31557
+ "loss": 5.4988,
31558
+ "step": 18028
31559
+ },
31560
+ {
31561
+ "epoch": 0.89,
31562
+ "grad_norm": 1.9897429943084717,
31563
+ "learning_rate": 5.464696872375118e-06,
31564
+ "loss": 5.4833,
31565
+ "step": 18032
31566
+ },
31567
+ {
31568
+ "epoch": 0.89,
31569
+ "grad_norm": 2.066513776779175,
31570
+ "learning_rate": 5.454814961213499e-06,
31571
+ "loss": 5.4636,
31572
+ "step": 18036
31573
+ },
31574
+ {
31575
+ "epoch": 0.89,
31576
+ "grad_norm": 2.251376152038574,
31577
+ "learning_rate": 5.44493305005188e-06,
31578
+ "loss": 5.5205,
31579
+ "step": 18040
31580
+ },
31581
+ {
31582
+ "epoch": 0.89,
31583
+ "grad_norm": 2.1560051441192627,
31584
+ "learning_rate": 5.4350511388902615e-06,
31585
+ "loss": 5.4888,
31586
+ "step": 18044
31587
+ },
31588
+ {
31589
+ "epoch": 0.89,
31590
+ "grad_norm": 1.9921746253967285,
31591
+ "learning_rate": 5.425169227728643e-06,
31592
+ "loss": 5.503,
31593
+ "step": 18048
31594
+ },
31595
+ {
31596
+ "epoch": 0.89,
31597
+ "grad_norm": 1.9806662797927856,
31598
+ "learning_rate": 5.415287316567024e-06,
31599
+ "loss": 5.5299,
31600
+ "step": 18052
31601
+ },
31602
+ {
31603
+ "epoch": 0.89,
31604
+ "grad_norm": 2.086308479309082,
31605
+ "learning_rate": 5.405405405405406e-06,
31606
+ "loss": 5.3775,
31607
+ "step": 18056
31608
+ },
31609
+ {
31610
+ "epoch": 0.89,
31611
+ "grad_norm": 1.9652711153030396,
31612
+ "learning_rate": 5.395523494243787e-06,
31613
+ "loss": 5.5683,
31614
+ "step": 18060
31615
+ },
31616
+ {
31617
+ "epoch": 0.89,
31618
+ "grad_norm": 1.8376736640930176,
31619
+ "learning_rate": 5.3856415830821685e-06,
31620
+ "loss": 5.3905,
31621
+ "step": 18064
31622
+ },
31623
+ {
31624
+ "epoch": 0.89,
31625
+ "grad_norm": 2.1515750885009766,
31626
+ "learning_rate": 5.37575967192055e-06,
31627
+ "loss": 5.421,
31628
+ "step": 18068
31629
+ },
31630
+ {
31631
+ "epoch": 0.89,
31632
+ "grad_norm": 2.18635630607605,
31633
+ "learning_rate": 5.365877760758931e-06,
31634
+ "loss": 5.4513,
31635
+ "step": 18072
31636
+ },
31637
+ {
31638
+ "epoch": 0.89,
31639
+ "grad_norm": 2.2322137355804443,
31640
+ "learning_rate": 5.355995849597312e-06,
31641
+ "loss": 5.5696,
31642
+ "step": 18076
31643
+ },
31644
+ {
31645
+ "epoch": 0.89,
31646
+ "grad_norm": 2.1388771533966064,
31647
+ "learning_rate": 5.346113938435693e-06,
31648
+ "loss": 5.5154,
31649
+ "step": 18080
31650
+ },
31651
+ {
31652
+ "epoch": 0.89,
31653
+ "grad_norm": 2.1032564640045166,
31654
+ "learning_rate": 5.336232027274075e-06,
31655
+ "loss": 5.4737,
31656
+ "step": 18084
31657
+ },
31658
+ {
31659
+ "epoch": 0.89,
31660
+ "grad_norm": 2.2419564723968506,
31661
+ "learning_rate": 5.326350116112457e-06,
31662
+ "loss": 5.4804,
31663
+ "step": 18088
31664
+ },
31665
+ {
31666
+ "epoch": 0.89,
31667
+ "grad_norm": 2.1092734336853027,
31668
+ "learning_rate": 5.316468204950838e-06,
31669
+ "loss": 5.523,
31670
+ "step": 18092
31671
+ },
31672
+ {
31673
+ "epoch": 0.89,
31674
+ "grad_norm": 1.9642736911773682,
31675
+ "learning_rate": 5.306586293789219e-06,
31676
+ "loss": 5.4615,
31677
+ "step": 18096
31678
+ },
31679
+ {
31680
+ "epoch": 0.89,
31681
+ "grad_norm": 2.285712480545044,
31682
+ "learning_rate": 5.296704382627601e-06,
31683
+ "loss": 5.4944,
31684
+ "step": 18100
31685
+ },
31686
+ {
31687
+ "epoch": 0.89,
31688
+ "grad_norm": 1.8048274517059326,
31689
+ "learning_rate": 5.2868224714659814e-06,
31690
+ "loss": 5.3293,
31691
+ "step": 18104
31692
+ },
31693
+ {
31694
+ "epoch": 0.89,
31695
+ "grad_norm": 1.9001215696334839,
31696
+ "learning_rate": 5.276940560304363e-06,
31697
+ "loss": 5.3799,
31698
+ "step": 18108
31699
+ },
31700
+ {
31701
+ "epoch": 0.89,
31702
+ "grad_norm": 2.052248954772949,
31703
+ "learning_rate": 5.267058649142744e-06,
31704
+ "loss": 5.54,
31705
+ "step": 18112
31706
+ },
31707
+ {
31708
+ "epoch": 0.9,
31709
+ "grad_norm": 1.9618264436721802,
31710
+ "learning_rate": 5.257176737981126e-06,
31711
+ "loss": 5.5206,
31712
+ "step": 18116
31713
+ },
31714
+ {
31715
+ "epoch": 0.9,
31716
+ "grad_norm": 2.086357355117798,
31717
+ "learning_rate": 5.247294826819507e-06,
31718
+ "loss": 5.4713,
31719
+ "step": 18120
31720
+ },
31721
+ {
31722
+ "epoch": 0.9,
31723
+ "grad_norm": 1.989790678024292,
31724
+ "learning_rate": 5.237412915657888e-06,
31725
+ "loss": 5.4183,
31726
+ "step": 18124
31727
+ },
31728
+ {
31729
+ "epoch": 0.9,
31730
+ "grad_norm": 2.4142305850982666,
31731
+ "learning_rate": 5.22753100449627e-06,
31732
+ "loss": 5.4867,
31733
+ "step": 18128
31734
+ },
31735
+ {
31736
+ "epoch": 0.9,
31737
+ "grad_norm": 1.9405925273895264,
31738
+ "learning_rate": 5.217649093334652e-06,
31739
+ "loss": 5.4259,
31740
+ "step": 18132
31741
+ },
31742
+ {
31743
+ "epoch": 0.9,
31744
+ "grad_norm": 1.9569774866104126,
31745
+ "learning_rate": 5.207767182173033e-06,
31746
+ "loss": 5.4619,
31747
+ "step": 18136
31748
+ },
31749
+ {
31750
+ "epoch": 0.9,
31751
+ "grad_norm": 2.1723146438598633,
31752
+ "learning_rate": 5.197885271011413e-06,
31753
+ "loss": 5.4719,
31754
+ "step": 18140
31755
+ },
31756
+ {
31757
+ "epoch": 0.9,
31758
+ "grad_norm": 2.3420112133026123,
31759
+ "learning_rate": 5.188003359849795e-06,
31760
+ "loss": 5.4274,
31761
+ "step": 18144
31762
+ },
31763
+ {
31764
+ "epoch": 0.9,
31765
+ "grad_norm": 2.2983791828155518,
31766
+ "learning_rate": 5.1781214486881765e-06,
31767
+ "loss": 5.5184,
31768
+ "step": 18148
31769
+ },
31770
+ {
31771
+ "epoch": 0.9,
31772
+ "grad_norm": 2.061795711517334,
31773
+ "learning_rate": 5.168239537526558e-06,
31774
+ "loss": 5.4416,
31775
+ "step": 18152
31776
+ },
31777
+ {
31778
+ "epoch": 0.9,
31779
+ "grad_norm": 2.11879301071167,
31780
+ "learning_rate": 5.15835762636494e-06,
31781
+ "loss": 5.3997,
31782
+ "step": 18156
31783
+ },
31784
+ {
31785
+ "epoch": 0.9,
31786
+ "grad_norm": 2.072601556777954,
31787
+ "learning_rate": 5.148475715203321e-06,
31788
+ "loss": 5.4744,
31789
+ "step": 18160
31790
+ },
31791
+ {
31792
+ "epoch": 0.9,
31793
+ "grad_norm": 2.037374258041382,
31794
+ "learning_rate": 5.138593804041702e-06,
31795
+ "loss": 5.5549,
31796
+ "step": 18164
31797
+ },
31798
+ {
31799
+ "epoch": 0.9,
31800
+ "grad_norm": 2.1012215614318848,
31801
+ "learning_rate": 5.128711892880083e-06,
31802
+ "loss": 5.5601,
31803
+ "step": 18168
31804
+ },
31805
+ {
31806
+ "epoch": 0.9,
31807
+ "grad_norm": 1.9614689350128174,
31808
+ "learning_rate": 5.118829981718465e-06,
31809
+ "loss": 5.3775,
31810
+ "step": 18172
31811
+ },
31812
+ {
31813
+ "epoch": 0.9,
31814
+ "grad_norm": 2.3375091552734375,
31815
+ "learning_rate": 5.108948070556846e-06,
31816
+ "loss": 5.5373,
31817
+ "step": 18176
31818
+ },
31819
+ {
31820
+ "epoch": 0.9,
31821
+ "grad_norm": 1.9838519096374512,
31822
+ "learning_rate": 5.099066159395227e-06,
31823
+ "loss": 5.3491,
31824
+ "step": 18180
31825
+ },
31826
+ {
31827
+ "epoch": 0.9,
31828
+ "grad_norm": 2.0909204483032227,
31829
+ "learning_rate": 5.089184248233608e-06,
31830
+ "loss": 5.3386,
31831
+ "step": 18184
31832
+ },
31833
+ {
31834
+ "epoch": 0.9,
31835
+ "grad_norm": 2.396127223968506,
31836
+ "learning_rate": 5.07930233707199e-06,
31837
+ "loss": 5.4791,
31838
+ "step": 18188
31839
+ },
31840
+ {
31841
+ "epoch": 0.9,
31842
+ "grad_norm": 2.117344379425049,
31843
+ "learning_rate": 5.0694204259103715e-06,
31844
+ "loss": 5.3322,
31845
+ "step": 18192
31846
+ },
31847
+ {
31848
+ "epoch": 0.9,
31849
+ "grad_norm": 2.0109291076660156,
31850
+ "learning_rate": 5.059538514748753e-06,
31851
+ "loss": 5.4152,
31852
+ "step": 18196
31853
+ },
31854
+ {
31855
+ "epoch": 0.9,
31856
+ "grad_norm": 2.051154851913452,
31857
+ "learning_rate": 5.049656603587134e-06,
31858
+ "loss": 5.504,
31859
+ "step": 18200
31860
+ },
31861
+ {
31862
+ "epoch": 0.9,
31863
+ "grad_norm": 1.9939186573028564,
31864
+ "learning_rate": 5.039774692425515e-06,
31865
+ "loss": 5.377,
31866
+ "step": 18204
31867
+ },
31868
+ {
31869
+ "epoch": 0.9,
31870
+ "grad_norm": 2.0900635719299316,
31871
+ "learning_rate": 5.029892781263896e-06,
31872
+ "loss": 5.4448,
31873
+ "step": 18208
31874
+ },
31875
+ {
31876
+ "epoch": 0.9,
31877
+ "grad_norm": 1.7526922225952148,
31878
+ "learning_rate": 5.020010870102278e-06,
31879
+ "loss": 5.2948,
31880
+ "step": 18212
31881
+ },
31882
+ {
31883
+ "epoch": 0.9,
31884
+ "grad_norm": 2.082535982131958,
31885
+ "learning_rate": 5.01012895894066e-06,
31886
+ "loss": 5.4976,
31887
+ "step": 18216
31888
  }
31889
  ],
31890
  "logging_steps": 4,
 
31892
  "num_input_tokens_seen": 0,
31893
  "num_train_epochs": 1,
31894
  "save_steps": 2024,
31895
+ "total_flos": 7.67303003626537e+16,
31896
  "train_batch_size": 8,
31897
  "trial_name": null,
31898
  "trial_params": null