csinva commited on
Commit
a50f416
1 Parent(s): 001bff6

4500 checkpoint

Browse files
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c9232f79a0a4129ebc9cc6ee0f21bb7a4d715eae675915015fd43d75811ea58
3
  size 21210715097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:199c3170fd3a41772a9babb0927700ffddd6d1c8ac5b7aaec62f8e6d0110efb0
3
  size 21210715097
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b3977a25a38d65c8381c63b8d549e59b819ec5bec1ed892867ce834555f874b
3
  size 9996970517
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e22aada20aea4b7a32f71bdf574f6a942a6fd2f64107e76eee64bc0c51124e1
3
  size 9996970517
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:547fc177de3ba4ba268864c8128f9b4e05b1196e8a8bdaa44b45d6450e33611f
3
  size 742637631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b835b661b4b00d3aca1275c295567e4a1beca1a2f2d5b628e35eb278d35ace6
3
  size 742637631
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fe4f7564f56a1ffa25a5c606d62664ceee4525fae7d67da25c48a37122eeeaf
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6430e5ad1d877b2cd1749d223eec11a131930ad358c4a7dfa9f4e6830aa494e
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8c8325f2941fb440a70e076145fff814af3bb9cb14c7d849f105ea262a5d6cf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed56d7b3709a7b50b7e2ab3a6a14bdf4c03d66f131c7f3dfccdf9e4a98638acb
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7115339655820856,
5
- "global_step": 3500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -48,11 +48,23 @@
48
  "learning_rate": 4.110410736071574e-06,
49
  "loss": 2.977,
50
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
  ],
53
  "max_steps": 19672,
54
  "num_train_epochs": 4,
55
- "total_flos": 2.1653725642752e+18,
56
  "trial_name": null,
57
  "trial_params": null
58
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9148293843198243,
5
+ "global_step": 4500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
48
  "learning_rate": 4.110410736071574e-06,
49
  "loss": 2.977,
50
  "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.81,
54
+ "learning_rate": 3.983326555510371e-06,
55
+ "loss": 2.9616,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.91,
60
+ "learning_rate": 3.856242374949167e-06,
61
+ "loss": 2.9615,
62
+ "step": 4500
63
  }
64
  ],
65
  "max_steps": 19672,
66
  "num_train_epochs": 4,
67
+ "total_flos": 2.7840504397824e+18,
68
  "trial_name": null,
69
  "trial_params": null
70
  }