joelniklaus commited on
Commit
fffa462
1 Parent(s): d4a8b56

Training in progress, step 30000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c86c6ef1e9ed073386178347021ac781b8c02e77572c298de3cabb7139ff7bb
3
  size 1668076741
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7378cec3d952e350bd0782f3f66d4685e17a260af33a26310893b255a4ef456f
3
  size 1668076741
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:567827f0a9fb4e7e213e112d1698cfdc1ed84a2aa6f8e8ac3a3d6499893d619a
3
  size 834053717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf2486a9aed6db196b4c0c2d69f6b9ce2fe47a70db1927026ee80ceceaa0b78
3
  size 834053717
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d83595397313a34341269e19cddb27d1911528d5cb71c1c4021a0f77255c067d
3
  size 17641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6577590482bd95dc8f5fc1b9185d2c76e77162ca88ae8b6a1adbd2a0c1f833f1
3
  size 17641
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ec0071e4302006204e99cd369255e47065d131099e23d2fd8502b466b28f338
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd693cfa252394680d67f0702bcf07e039830fa5cbdfd3fc37f2bc2c7ddcfe5
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:843359b4ca576c0da051410b99ae90c5c724f142fad0be29b3fca9e912a14ce4
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8a20482e53552d32fdf801d3bee556547de0b51c5f96c7d7318698a6a89bd8
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -144,11 +144,80 @@
144
  "eval_samples_per_second": 22.424,
145
  "eval_steps_per_second": 0.561,
146
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "max_steps": 50000,
150
  "num_train_epochs": 9223372036854775807,
151
- "total_flos": 6.313325101056e+18,
152
  "trial_name": null,
153
  "trial_params": null
154
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6,
5
+ "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 22.424,
145
  "eval_steps_per_second": 0.561,
146
  "step": 20000
147
+ },
148
+ {
149
+ "epoch": 0.42,
150
+ "learning_rate": 2.011372523790927e-05,
151
+ "loss": 0.8297,
152
+ "step": 21000
153
+ },
154
+ {
155
+ "epoch": 0.44,
156
+ "learning_rate": 1.9171527138850477e-05,
157
+ "loss": 0.8453,
158
+ "step": 22000
159
+ },
160
+ {
161
+ "epoch": 0.46,
162
+ "learning_rate": 1.8210155373841292e-05,
163
+ "loss": 0.8384,
164
+ "step": 23000
165
+ },
166
+ {
167
+ "epoch": 0.48,
168
+ "learning_rate": 1.7234746416166368e-05,
169
+ "loss": 0.828,
170
+ "step": 24000
171
+ },
172
+ {
173
+ "epoch": 0.5,
174
+ "learning_rate": 1.624956548101695e-05,
175
+ "loss": 0.8226,
176
+ "step": 25000
177
+ },
178
+ {
179
+ "epoch": 0.52,
180
+ "learning_rate": 1.5259912447470205e-05,
181
+ "loss": 0.8188,
182
+ "step": 26000
183
+ },
184
+ {
185
+ "epoch": 0.54,
186
+ "learning_rate": 1.4268134252092541e-05,
187
+ "loss": 0.807,
188
+ "step": 27000
189
+ },
190
+ {
191
+ "epoch": 0.56,
192
+ "learning_rate": 1.3279556319416353e-05,
193
+ "loss": 0.8235,
194
+ "step": 28000
195
+ },
196
+ {
197
+ "epoch": 0.58,
198
+ "learning_rate": 1.2298501449209877e-05,
199
+ "loss": 0.802,
200
+ "step": 29000
201
+ },
202
+ {
203
+ "epoch": 0.6,
204
+ "learning_rate": 1.1330221470267496e-05,
205
+ "loss": 0.8327,
206
+ "step": 30000
207
+ },
208
+ {
209
+ "epoch": 0.6,
210
+ "eval_accuracy": 0.8488932773912237,
211
+ "eval_loss": 0.6712204217910767,
212
+ "eval_runtime": 182.5232,
213
+ "eval_samples_per_second": 27.394,
214
+ "eval_steps_per_second": 0.685,
215
+ "step": 30000
216
  }
217
  ],
218
  "max_steps": 50000,
219
  "num_train_epochs": 9223372036854775807,
220
+ "total_flos": 9.469987651584e+18,
221
  "trial_name": null,
222
  "trial_params": null
223
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:567827f0a9fb4e7e213e112d1698cfdc1ed84a2aa6f8e8ac3a3d6499893d619a
3
  size 834053717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf2486a9aed6db196b4c0c2d69f6b9ce2fe47a70db1927026ee80ceceaa0b78
3
  size 834053717