cppgohan commited on
Commit
2e800bf
1 Parent(s): 03b64a6

End of training

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the cats_vs_dogs dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.0460
36
  - Accuracy: 0.9919
37
 
38
  ## Model description
 
32
 
33
  This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the cats_vs_dogs dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.0577
36
  - Accuracy: 0.9919
37
 
38
  ## Model description
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "eval_accuracy": 0.9918838103374626,
4
+ "eval_loss": 0.05769222229719162,
5
+ "eval_runtime": 27.3788,
6
+ "eval_samples_per_second": 85.504,
7
+ "eval_steps_per_second": 2.703,
8
+ "total_flos": 1.336513820941394e+18,
9
+ "train_loss": 0.2994473668617931,
10
+ "train_runtime": 953.1608,
11
+ "train_samples_per_second": 66.313,
12
+ "train_steps_per_second": 0.516
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "eval_accuracy": 0.9918838103374626,
4
+ "eval_loss": 0.05769222229719162,
5
+ "eval_runtime": 27.3788,
6
+ "eval_samples_per_second": 85.504,
7
+ "eval_steps_per_second": 2.703
8
+ }
runs/Apr07_02-08-52_e2b0f09b53b1/events.out.tfevents.1712456719.e2b0f09b53b1.1130.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2e139191cd4a929dc4ef2612be6b6426eb6a8b738024d7d4b4e2d2b5309f83
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "total_flos": 1.336513820941394e+18,
4
+ "train_loss": 0.2994473668617931,
5
+ "train_runtime": 953.1608,
6
+ "train_samples_per_second": 66.313,
7
+ "train_steps_per_second": 0.516
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9918838103374626,
3
+ "best_model_checkpoint": "resnet-50-finetuned-dog-vs-cat/checkpoint-329",
4
+ "epoch": 2.986342943854325,
5
+ "eval_steps": 500,
6
+ "global_step": 492,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "grad_norm": 0.5951987504959106,
14
+ "learning_rate": 1e-05,
15
+ "loss": 0.693,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.12,
20
+ "grad_norm": 0.743243932723999,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6912,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.18,
27
+ "grad_norm": 0.6900031566619873,
28
+ "learning_rate": 3e-05,
29
+ "loss": 0.6871,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.24,
34
+ "grad_norm": 0.7302324175834656,
35
+ "learning_rate": 4e-05,
36
+ "loss": 0.68,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.3,
41
+ "grad_norm": 0.9748005270957947,
42
+ "learning_rate": 5e-05,
43
+ "loss": 0.6664,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.36,
48
+ "grad_norm": 1.131984829902649,
49
+ "learning_rate": 4.8868778280542986e-05,
50
+ "loss": 0.6408,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.42,
55
+ "grad_norm": 1.0235670804977417,
56
+ "learning_rate": 4.7737556561085976e-05,
57
+ "loss": 0.617,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.49,
62
+ "grad_norm": 1.1163190603256226,
63
+ "learning_rate": 4.660633484162896e-05,
64
+ "loss": 0.5807,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.55,
69
+ "grad_norm": 1.282690405845642,
70
+ "learning_rate": 4.547511312217195e-05,
71
+ "loss": 0.5438,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.61,
76
+ "grad_norm": 1.068588137626648,
77
+ "learning_rate": 4.434389140271493e-05,
78
+ "loss": 0.502,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.67,
83
+ "grad_norm": 0.9256152510643005,
84
+ "learning_rate": 4.321266968325792e-05,
85
+ "loss": 0.4701,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.73,
90
+ "grad_norm": 0.9594171643257141,
91
+ "learning_rate": 4.2081447963800907e-05,
92
+ "loss": 0.447,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.79,
97
+ "grad_norm": 1.0906646251678467,
98
+ "learning_rate": 4.095022624434389e-05,
99
+ "loss": 0.4238,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.85,
104
+ "grad_norm": 0.9434269070625305,
105
+ "learning_rate": 3.981900452488688e-05,
106
+ "loss": 0.4026,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.91,
111
+ "grad_norm": 1.15977144241333,
112
+ "learning_rate": 3.868778280542987e-05,
113
+ "loss": 0.3807,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.97,
118
+ "grad_norm": 0.9563286304473877,
119
+ "learning_rate": 3.7556561085972854e-05,
120
+ "loss": 0.3357,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 1.0,
125
+ "eval_accuracy": 0.9867577958137548,
126
+ "eval_loss": 0.22545380890369415,
127
+ "eval_runtime": 27.2631,
128
+ "eval_samples_per_second": 85.867,
129
+ "eval_steps_per_second": 2.714,
130
+ "step": 164
131
+ },
132
+ {
133
+ "epoch": 1.03,
134
+ "grad_norm": 1.0790256261825562,
135
+ "learning_rate": 3.642533936651584e-05,
136
+ "loss": 0.3166,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 1.09,
141
+ "grad_norm": 0.9720374345779419,
142
+ "learning_rate": 3.529411764705883e-05,
143
+ "loss": 0.2961,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 1.15,
148
+ "grad_norm": 1.2068029642105103,
149
+ "learning_rate": 3.416289592760181e-05,
150
+ "loss": 0.2753,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.21,
155
+ "grad_norm": 1.0407794713974,
156
+ "learning_rate": 3.3031674208144794e-05,
157
+ "loss": 0.2631,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.27,
162
+ "grad_norm": 0.9938662052154541,
163
+ "learning_rate": 3.1900452488687784e-05,
164
+ "loss": 0.2427,
165
+ "step": 210
166
+ },
167
+ {
168
+ "epoch": 1.34,
169
+ "grad_norm": 0.8150984048843384,
170
+ "learning_rate": 3.0769230769230774e-05,
171
+ "loss": 0.2163,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 1.4,
176
+ "grad_norm": 0.7988576889038086,
177
+ "learning_rate": 2.9638009049773758e-05,
178
+ "loss": 0.216,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.46,
183
+ "grad_norm": 1.1607186794281006,
184
+ "learning_rate": 2.850678733031674e-05,
185
+ "loss": 0.2086,
186
+ "step": 240
187
+ },
188
+ {
189
+ "epoch": 1.52,
190
+ "grad_norm": 1.0582228899002075,
191
+ "learning_rate": 2.737556561085973e-05,
192
+ "loss": 0.1857,
193
+ "step": 250
194
+ },
195
+ {
196
+ "epoch": 1.58,
197
+ "grad_norm": 0.8427866697311401,
198
+ "learning_rate": 2.6244343891402718e-05,
199
+ "loss": 0.1808,
200
+ "step": 260
201
+ },
202
+ {
203
+ "epoch": 1.64,
204
+ "grad_norm": 0.8713784217834473,
205
+ "learning_rate": 2.51131221719457e-05,
206
+ "loss": 0.1689,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 1.7,
211
+ "grad_norm": 0.8434356451034546,
212
+ "learning_rate": 2.3981900452488688e-05,
213
+ "loss": 0.1732,
214
+ "step": 280
215
+ },
216
+ {
217
+ "epoch": 1.76,
218
+ "grad_norm": 0.7676334977149963,
219
+ "learning_rate": 2.2850678733031675e-05,
220
+ "loss": 0.1678,
221
+ "step": 290
222
+ },
223
+ {
224
+ "epoch": 1.82,
225
+ "grad_norm": 0.6907545328140259,
226
+ "learning_rate": 2.1719457013574662e-05,
227
+ "loss": 0.1624,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.88,
232
+ "grad_norm": 0.7248879075050354,
233
+ "learning_rate": 2.058823529411765e-05,
234
+ "loss": 0.1613,
235
+ "step": 310
236
+ },
237
+ {
238
+ "epoch": 1.94,
239
+ "grad_norm": 1.4968321323394775,
240
+ "learning_rate": 1.9457013574660635e-05,
241
+ "loss": 0.1683,
242
+ "step": 320
243
+ },
244
+ {
245
+ "epoch": 2.0,
246
+ "eval_accuracy": 0.9918838103374626,
247
+ "eval_loss": 0.05769222229719162,
248
+ "eval_runtime": 27.9863,
249
+ "eval_samples_per_second": 83.648,
250
+ "eval_steps_per_second": 2.644,
251
+ "step": 329
252
+ },
253
+ {
254
+ "epoch": 2.0,
255
+ "grad_norm": 1.3721855878829956,
256
+ "learning_rate": 1.832579185520362e-05,
257
+ "loss": 0.1635,
258
+ "step": 330
259
+ },
260
+ {
261
+ "epoch": 2.06,
262
+ "grad_norm": 1.0472960472106934,
263
+ "learning_rate": 1.7194570135746606e-05,
264
+ "loss": 0.1576,
265
+ "step": 340
266
+ },
267
+ {
268
+ "epoch": 2.12,
269
+ "grad_norm": 0.9706544280052185,
270
+ "learning_rate": 1.6063348416289596e-05,
271
+ "loss": 0.157,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 2.19,
276
+ "grad_norm": 0.6168745756149292,
277
+ "learning_rate": 1.493212669683258e-05,
278
+ "loss": 0.1535,
279
+ "step": 360
280
+ },
281
+ {
282
+ "epoch": 2.25,
283
+ "grad_norm": 2.201444149017334,
284
+ "learning_rate": 1.3800904977375568e-05,
285
+ "loss": 0.1494,
286
+ "step": 370
287
+ },
288
+ {
289
+ "epoch": 2.31,
290
+ "grad_norm": 0.6486875414848328,
291
+ "learning_rate": 1.2669683257918553e-05,
292
+ "loss": 0.1502,
293
+ "step": 380
294
+ },
295
+ {
296
+ "epoch": 2.37,
297
+ "grad_norm": 0.7056523561477661,
298
+ "learning_rate": 1.153846153846154e-05,
299
+ "loss": 0.1446,
300
+ "step": 390
301
+ },
302
+ {
303
+ "epoch": 2.43,
304
+ "grad_norm": 1.3912076950073242,
305
+ "learning_rate": 1.0407239819004526e-05,
306
+ "loss": 0.1428,
307
+ "step": 400
308
+ },
309
+ {
310
+ "epoch": 2.49,
311
+ "grad_norm": 1.0273276567459106,
312
+ "learning_rate": 9.276018099547511e-06,
313
+ "loss": 0.144,
314
+ "step": 410
315
+ },
316
+ {
317
+ "epoch": 2.55,
318
+ "grad_norm": 1.1831059455871582,
319
+ "learning_rate": 8.144796380090498e-06,
320
+ "loss": 0.1572,
321
+ "step": 420
322
+ },
323
+ {
324
+ "epoch": 2.61,
325
+ "grad_norm": 1.5577871799468994,
326
+ "learning_rate": 7.013574660633485e-06,
327
+ "loss": 0.1419,
328
+ "step": 430
329
+ },
330
+ {
331
+ "epoch": 2.67,
332
+ "grad_norm": 1.2978945970535278,
333
+ "learning_rate": 5.882352941176471e-06,
334
+ "loss": 0.1562,
335
+ "step": 440
336
+ },
337
+ {
338
+ "epoch": 2.73,
339
+ "grad_norm": 0.5621709823608398,
340
+ "learning_rate": 4.751131221719457e-06,
341
+ "loss": 0.1352,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 2.79,
346
+ "grad_norm": 1.0484684705734253,
347
+ "learning_rate": 3.619909502262444e-06,
348
+ "loss": 0.143,
349
+ "step": 460
350
+ },
351
+ {
352
+ "epoch": 2.85,
353
+ "grad_norm": 0.9431272149085999,
354
+ "learning_rate": 2.48868778280543e-06,
355
+ "loss": 0.1517,
356
+ "step": 470
357
+ },
358
+ {
359
+ "epoch": 2.91,
360
+ "grad_norm": 0.8055468201637268,
361
+ "learning_rate": 1.3574660633484164e-06,
362
+ "loss": 0.1455,
363
+ "step": 480
364
+ },
365
+ {
366
+ "epoch": 2.97,
367
+ "grad_norm": 1.1355047225952148,
368
+ "learning_rate": 2.2624434389140275e-07,
369
+ "loss": 0.1448,
370
+ "step": 490
371
+ },
372
+ {
373
+ "epoch": 2.99,
374
+ "eval_accuracy": 0.9918838103374626,
375
+ "eval_loss": 0.04604041948914528,
376
+ "eval_runtime": 27.4499,
377
+ "eval_samples_per_second": 85.283,
378
+ "eval_steps_per_second": 2.696,
379
+ "step": 492
380
+ },
381
+ {
382
+ "epoch": 2.99,
383
+ "step": 492,
384
+ "total_flos": 1.336513820941394e+18,
385
+ "train_loss": 0.2994473668617931,
386
+ "train_runtime": 953.1608,
387
+ "train_samples_per_second": 66.313,
388
+ "train_steps_per_second": 0.516
389
+ }
390
+ ],
391
+ "logging_steps": 10,
392
+ "max_steps": 492,
393
+ "num_input_tokens_seen": 0,
394
+ "num_train_epochs": 3,
395
+ "save_steps": 500,
396
+ "total_flos": 1.336513820941394e+18,
397
+ "train_batch_size": 32,
398
+ "trial_name": null,
399
+ "trial_params": null
400
+ }