Nadav commited on
Commit
5df5cc0
·
1 Parent(s): 3304704

Training in progress, step 15000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef358b8abdc8b32586ade83ad626ced2c2ef553c4675a7c7082630bc53b7e271
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32d05e67336f892684b8c1fd2d6e5abd7a19c7a3f8e60643ae3b6f25370775f
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be11fa803e6017eb3dbc01c05f9b1e5eb55d5c2a51273b624e4418c37cb17a50
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518db281c68ab44504d89afd4194f546b333ca0c39d6413cfd75d77184014cc2
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5b84fb60f620174879deba0342a8a31da5b88033e1cc6fd9fb9fedc3bfe020d
3
  size 15459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8f9792121fbd04e7178f7b4e43756a5886a8af837b3120c6d50a7ef7909014
3
  size 15459
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dea577c093f733c5db585f25caa4bcb5b57c2e6ee15524759cbddd90329af41
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61491770594419f1f65e70a85ff1045ef5f026004b37d4207f8d0e52435942a4
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d524b4cb1391ef7e50966a3eef7ac714ecb6ed976eedf165d99a07d29c73b99
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1bbe8751f81eef53f97fffae533ac792444b305682e60f93b951d5e4e28b33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8705493166187864,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -142,11 +142,79 @@
142
  "eval_samples_per_second": 43.472,
143
  "eval_steps_per_second": 0.687,
144
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
  ],
147
  "max_steps": 100000,
148
  "num_train_epochs": 9,
149
- "total_flos": 4.709861347295232e+20,
150
  "trial_name": null,
151
  "trial_params": null
152
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3058239749281797,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
142
  "eval_samples_per_second": 43.472,
143
  "eval_steps_per_second": 0.687,
144
  "step": 10000
145
+ },
146
+ {
147
+ "epoch": 0.91,
148
+ "learning_rate": 9.833541625738316e-05,
149
+ "loss": 0.4543,
150
+ "step": 10500
151
+ },
152
+ {
153
+ "epoch": 0.96,
154
+ "learning_rate": 9.809924007281187e-05,
155
+ "loss": 0.4532,
156
+ "step": 11000
157
+ },
158
+ {
159
+ "epoch": 1.0,
160
+ "learning_rate": 9.78525261765341e-05,
161
+ "loss": 0.4529,
162
+ "step": 11500
163
+ },
164
+ {
165
+ "epoch": 1.04,
166
+ "learning_rate": 9.759533544151208e-05,
167
+ "loss": 0.4523,
168
+ "step": 12000
169
+ },
170
+ {
171
+ "epoch": 1.09,
172
+ "learning_rate": 9.732827688303682e-05,
173
+ "loss": 0.4506,
174
+ "step": 12500
175
+ },
176
+ {
177
+ "epoch": 1.13,
178
+ "learning_rate": 9.705034604088048e-05,
179
+ "loss": 0.4502,
180
+ "step": 13000
181
+ },
182
+ {
183
+ "epoch": 1.18,
184
+ "learning_rate": 9.676213628592508e-05,
185
+ "loss": 0.4493,
186
+ "step": 13500
187
+ },
188
+ {
189
+ "epoch": 1.22,
190
+ "learning_rate": 9.64637187296151e-05,
191
+ "loss": 0.449,
192
+ "step": 14000
193
+ },
194
+ {
195
+ "epoch": 1.26,
196
+ "learning_rate": 9.615516700201724e-05,
197
+ "loss": 0.448,
198
+ "step": 14500
199
+ },
200
+ {
201
+ "epoch": 1.31,
202
+ "learning_rate": 9.583720443927501e-05,
203
+ "loss": 0.4479,
204
+ "step": 15000
205
+ },
206
+ {
207
+ "epoch": 1.31,
208
+ "eval_loss": 0.4363159239292145,
209
+ "eval_runtime": 91.0649,
210
+ "eval_samples_per_second": 54.906,
211
+ "eval_steps_per_second": 0.868,
212
+ "step": 15000
213
  }
214
  ],
215
  "max_steps": 100000,
216
  "num_train_epochs": 9,
217
+ "total_flos": 7.064748208279152e+20,
218
  "trial_name": null,
219
  "trial_params": null
220
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be11fa803e6017eb3dbc01c05f9b1e5eb55d5c2a51273b624e4418c37cb17a50
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518db281c68ab44504d89afd4194f546b333ca0c39d6413cfd75d77184014cc2
3
  size 449471589