leixa commited on
Commit
095e705
·
verified ·
1 Parent(s): 416513b

Training in progress, step 63, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14e4ad5a835a88945e0ee4038f25352c397f33c5f4adde5fca373295f998c61d
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:770140205d2dac43d3cffa97db7b856382360e2b5d689600fb6a190f3a214871
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3e22b37135b24d3ba3c1b6f87d393c15fea5d409e7cec9f00e2d6cb56242c35
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a359e38865ee20b10b8cc286fbdbd671be6b4685839e72323ccb7768a4790a4
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d1f55c7fe0357563a35d240cfc7f2436b04a081cefd509d645b538cc39e7506
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff00fff9cbfcd118e54c77073688cb2b84440267aaa2d4e090ca843731701c95
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17f7add06e301ad12114b22b3a83ca3a5bc8239932ceeb6478a372e3a37dacdc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731f9a38a306fb54b040f8655d8ca8de9e109511292676024e32dd381563f07a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5075528700906344,
5
  "eval_steps": 21,
6
- "global_step": 42,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -129,6 +129,63 @@
129
  "eval_samples_per_second": 535.377,
130
  "eval_steps_per_second": 68.834,
131
  "step": 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  }
133
  ],
134
  "logging_steps": 3,
@@ -148,7 +205,7 @@
148
  "attributes": {}
149
  }
150
  },
151
- "total_flos": 4685127745536.0,
152
  "train_batch_size": 8,
153
  "trial_name": null,
154
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7613293051359517,
5
  "eval_steps": 21,
6
+ "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
129
  "eval_samples_per_second": 535.377,
130
  "eval_steps_per_second": 68.834,
131
  "step": 42
132
+ },
133
+ {
134
+ "epoch": 0.5438066465256798,
135
+ "grad_norm": 0.15116359293460846,
136
+ "learning_rate": 9.480116264104011e-05,
137
+ "loss": 10.3382,
138
+ "step": 45
139
+ },
140
+ {
141
+ "epoch": 0.5800604229607251,
142
+ "grad_norm": 0.20655445754528046,
143
+ "learning_rate": 9.389110615965102e-05,
144
+ "loss": 10.3347,
145
+ "step": 48
146
+ },
147
+ {
148
+ "epoch": 0.6163141993957704,
149
+ "grad_norm": 0.15545906126499176,
150
+ "learning_rate": 9.291280532867302e-05,
151
+ "loss": 10.3275,
152
+ "step": 51
153
+ },
154
+ {
155
+ "epoch": 0.6525679758308157,
156
+ "grad_norm": 0.189162015914917,
157
+ "learning_rate": 9.186778126501916e-05,
158
+ "loss": 10.3294,
159
+ "step": 54
160
+ },
161
+ {
162
+ "epoch": 0.6888217522658611,
163
+ "grad_norm": 0.21338708698749542,
164
+ "learning_rate": 9.075765883062093e-05,
165
+ "loss": 10.3236,
166
+ "step": 57
167
+ },
168
+ {
169
+ "epoch": 0.7250755287009063,
170
+ "grad_norm": 0.23534299433231354,
171
+ "learning_rate": 8.958416410600187e-05,
172
+ "loss": 10.3183,
173
+ "step": 60
174
+ },
175
+ {
176
+ "epoch": 0.7613293051359517,
177
+ "grad_norm": 0.2692500054836273,
178
+ "learning_rate": 8.834912170647101e-05,
179
+ "loss": 10.3116,
180
+ "step": 63
181
+ },
182
+ {
183
+ "epoch": 0.7613293051359517,
184
+ "eval_loss": 10.310102462768555,
185
+ "eval_runtime": 0.2628,
186
+ "eval_samples_per_second": 532.785,
187
+ "eval_steps_per_second": 68.501,
188
+ "step": 63
189
  }
190
  ],
191
  "logging_steps": 3,
 
205
  "attributes": {}
206
  }
207
  },
208
+ "total_flos": 7027691618304.0,
209
  "train_batch_size": 8,
210
  "trial_name": null,
211
  "trial_params": null