NiharGupte commited on
Commit
bb19a0e
1 Parent(s): a1b6065

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 1.575666934345728e+17,
 
 
 
 
 
4
  "train_loss": 0.0,
5
- "train_runtime": 166.8151,
6
- "train_samples_per_second": 44.48,
7
- "train_steps_per_second": 1.409
8
  }
 
1
  {
2
+ "epoch": 4.680851063829787,
3
+ "eval_accuracy": 0.4889937106918239,
4
+ "eval_loss": NaN,
5
+ "eval_runtime": 8.3571,
6
+ "eval_samples_per_second": 76.103,
7
+ "eval_steps_per_second": 2.393,
8
+ "total_flos": 1.477984078577664e+17,
9
  "train_loss": 0.0,
10
+ "train_runtime": 167.7096,
11
+ "train_samples_per_second": 44.243,
12
+ "train_steps_per_second": 0.328
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.680851063829787,
3
+ "eval_accuracy": 0.4889937106918239,
4
+ "eval_loss": NaN,
5
+ "eval_runtime": 8.3571,
6
+ "eval_samples_per_second": 76.103,
7
+ "eval_steps_per_second": 2.393
8
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:780eabfb1ede8e9e88f021a6b3eb3e36687167bee7cd7ec380ba2499c1df5c17
3
  size 94302952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83218fc677ef895342b4874306a47dd0b316a563b91ba4c24f45fc17c74d9e8e
3
  size 94302952
runs/May04_07-56-14_4f22111e1b44/events.out.tfevents.1714809564.4f22111e1b44.9006.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d76245d32abc3c7bbea1fb89684cfdb56a8834c299c3bb8418d8dffbc0355a5
3
+ size 405
runs/May04_08-00-17_4f22111e1b44/events.out.tfevents.1714809630.4f22111e1b44.9006.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1768cdd459268561b5738d0910350fcb5d2d6bcf6ecbbd89b6176f367f723268
3
+ size 5370
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 1.575666934345728e+17,
4
  "train_loss": 0.0,
5
- "train_runtime": 166.8151,
6
- "train_samples_per_second": 44.48,
7
- "train_steps_per_second": 1.409
8
  }
 
1
  {
2
+ "epoch": 4.680851063829787,
3
+ "total_flos": 1.477984078577664e+17,
4
  "train_loss": 0.0,
5
+ "train_runtime": 167.7096,
6
+ "train_samples_per_second": 44.243,
7
+ "train_steps_per_second": 0.328
8
  }
trainer_state.json CHANGED
@@ -1,235 +1,109 @@
1
  {
2
  "best_metric": 0.4889937106918239,
3
- "best_model_checkpoint": "resnet-50-finetuned-student_kaggle/checkpoint-47",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 235,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.2127659574468085,
13
- "grad_norm": NaN,
14
- "learning_rate": 2.0833333333333336e-05,
15
- "loss": 0.0,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.425531914893617,
20
- "grad_norm": NaN,
21
- "learning_rate": 4.166666666666667e-05,
22
- "loss": 0.0,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.6382978723404256,
27
- "grad_norm": NaN,
28
- "learning_rate": 4.857819905213271e-05,
29
- "loss": 0.0,
30
- "step": 30
31
- },
32
  {
33
  "epoch": 0.851063829787234,
34
  "grad_norm": NaN,
35
- "learning_rate": 4.620853080568721e-05,
36
  "loss": 0.0,
37
- "step": 40
38
  },
39
  {
40
- "epoch": 1.0,
41
  "eval_accuracy": 0.4889937106918239,
42
  "eval_loss": NaN,
43
- "eval_runtime": 8.1507,
44
- "eval_samples_per_second": 78.03,
45
- "eval_steps_per_second": 2.454,
46
- "step": 47
47
- },
48
- {
49
- "epoch": 1.0638297872340425,
50
- "grad_norm": NaN,
51
- "learning_rate": 4.383886255924171e-05,
52
- "loss": 0.0,
53
- "step": 50
54
- },
55
- {
56
- "epoch": 1.2765957446808511,
57
- "grad_norm": NaN,
58
- "learning_rate": 4.146919431279621e-05,
59
- "loss": 0.0,
60
- "step": 60
61
- },
62
- {
63
- "epoch": 1.4893617021276595,
64
- "grad_norm": NaN,
65
- "learning_rate": 3.909952606635071e-05,
66
- "loss": 0.0,
67
- "step": 70
68
  },
69
  {
70
  "epoch": 1.702127659574468,
71
  "grad_norm": NaN,
72
- "learning_rate": 3.672985781990522e-05,
73
  "loss": 0.0,
74
- "step": 80
75
- },
76
- {
77
- "epoch": 1.9148936170212765,
78
- "grad_norm": NaN,
79
- "learning_rate": 3.4360189573459716e-05,
80
- "loss": 0.0,
81
- "step": 90
82
  },
83
  {
84
- "epoch": 2.0,
85
  "eval_accuracy": 0.4889937106918239,
86
  "eval_loss": NaN,
87
- "eval_runtime": 8.3873,
88
- "eval_samples_per_second": 75.829,
89
- "eval_steps_per_second": 2.385,
90
- "step": 94
91
- },
92
- {
93
- "epoch": 2.127659574468085,
94
- "grad_norm": NaN,
95
- "learning_rate": 3.1990521327014215e-05,
96
- "loss": 0.0,
97
- "step": 100
98
- },
99
- {
100
- "epoch": 2.3404255319148937,
101
- "grad_norm": NaN,
102
- "learning_rate": 2.962085308056872e-05,
103
- "loss": 0.0,
104
- "step": 110
105
  },
106
  {
107
  "epoch": 2.5531914893617023,
108
  "grad_norm": NaN,
109
- "learning_rate": 2.7251184834123224e-05,
110
- "loss": 0.0,
111
- "step": 120
112
- },
113
- {
114
- "epoch": 2.7659574468085104,
115
- "grad_norm": NaN,
116
- "learning_rate": 2.4881516587677726e-05,
117
  "loss": 0.0,
118
- "step": 130
119
  },
120
  {
121
  "epoch": 2.978723404255319,
122
- "grad_norm": NaN,
123
- "learning_rate": 2.251184834123223e-05,
124
- "loss": 0.0,
125
- "step": 140
126
- },
127
- {
128
- "epoch": 3.0,
129
  "eval_accuracy": 0.4889937106918239,
130
  "eval_loss": NaN,
131
- "eval_runtime": 7.8854,
132
- "eval_samples_per_second": 80.655,
133
- "eval_steps_per_second": 2.536,
134
- "step": 141
135
- },
136
- {
137
- "epoch": 3.1914893617021276,
138
- "grad_norm": NaN,
139
- "learning_rate": 2.014218009478673e-05,
140
- "loss": 0.0,
141
- "step": 150
142
  },
143
  {
144
  "epoch": 3.404255319148936,
145
  "grad_norm": NaN,
146
- "learning_rate": 1.7772511848341233e-05,
147
- "loss": 0.0,
148
- "step": 160
149
- },
150
- {
151
- "epoch": 3.617021276595745,
152
- "grad_norm": NaN,
153
- "learning_rate": 1.5402843601895736e-05,
154
- "loss": 0.0,
155
- "step": 170
156
- },
157
- {
158
- "epoch": 3.829787234042553,
159
- "grad_norm": NaN,
160
- "learning_rate": 1.3033175355450238e-05,
161
  "loss": 0.0,
162
- "step": 180
163
  },
164
  {
165
  "epoch": 4.0,
166
  "eval_accuracy": 0.4889937106918239,
167
  "eval_loss": NaN,
168
- "eval_runtime": 8.0232,
169
- "eval_samples_per_second": 79.27,
170
- "eval_steps_per_second": 2.493,
171
- "step": 188
172
- },
173
- {
174
- "epoch": 4.042553191489362,
175
- "grad_norm": NaN,
176
- "learning_rate": 1.066350710900474e-05,
177
- "loss": 0.0,
178
- "step": 190
179
  },
180
  {
181
  "epoch": 4.25531914893617,
182
  "grad_norm": NaN,
183
- "learning_rate": 8.293838862559241e-06,
184
- "loss": 0.0,
185
- "step": 200
186
- },
187
- {
188
- "epoch": 4.468085106382979,
189
- "grad_norm": NaN,
190
- "learning_rate": 5.924170616113745e-06,
191
  "loss": 0.0,
192
- "step": 210
193
  },
194
  {
195
  "epoch": 4.680851063829787,
196
- "grad_norm": NaN,
197
- "learning_rate": 3.5545023696682464e-06,
198
- "loss": 0.0,
199
- "step": 220
200
- },
201
- {
202
- "epoch": 4.8936170212765955,
203
- "grad_norm": NaN,
204
- "learning_rate": 1.1848341232227488e-06,
205
- "loss": 0.0,
206
- "step": 230
207
- },
208
- {
209
- "epoch": 5.0,
210
  "eval_accuracy": 0.4889937106918239,
211
  "eval_loss": NaN,
212
- "eval_runtime": 8.3163,
213
- "eval_samples_per_second": 76.476,
214
- "eval_steps_per_second": 2.405,
215
- "step": 235
216
  },
217
  {
218
- "epoch": 5.0,
219
- "step": 235,
220
- "total_flos": 1.575666934345728e+17,
221
  "train_loss": 0.0,
222
- "train_runtime": 166.8151,
223
- "train_samples_per_second": 44.48,
224
- "train_steps_per_second": 1.409
225
  }
226
  ],
227
  "logging_steps": 10,
228
- "max_steps": 235,
229
  "num_input_tokens_seen": 0,
230
  "num_train_epochs": 5,
231
  "save_steps": 500,
232
- "total_flos": 1.575666934345728e+17,
233
  "train_batch_size": 32,
234
  "trial_name": null,
235
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4889937106918239,
3
+ "best_model_checkpoint": "resnet-50-finetuned-student_kaggle/checkpoint-11",
4
+ "epoch": 4.680851063829787,
5
  "eval_steps": 500,
6
+ "global_step": 55,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.851063829787234,
13
  "grad_norm": NaN,
14
+ "learning_rate": 4.591836734693878e-05,
15
  "loss": 0.0,
16
+ "step": 10
17
  },
18
  {
19
+ "epoch": 0.9361702127659575,
20
  "eval_accuracy": 0.4889937106918239,
21
  "eval_loss": NaN,
22
+ "eval_runtime": 8.3443,
23
+ "eval_samples_per_second": 76.22,
24
+ "eval_steps_per_second": 2.397,
25
+ "step": 11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
  "epoch": 1.702127659574468,
29
  "grad_norm": NaN,
30
+ "learning_rate": 3.571428571428572e-05,
31
  "loss": 0.0,
32
+ "step": 20
 
 
 
 
 
 
 
33
  },
34
  {
35
+ "epoch": 1.9574468085106385,
36
  "eval_accuracy": 0.4889937106918239,
37
  "eval_loss": NaN,
38
+ "eval_runtime": 7.5463,
39
+ "eval_samples_per_second": 84.28,
40
+ "eval_steps_per_second": 2.65,
41
+ "step": 23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 2.5531914893617023,
45
  "grad_norm": NaN,
46
+ "learning_rate": 2.5510204081632654e-05,
 
 
 
 
 
 
 
47
  "loss": 0.0,
48
+ "step": 30
49
  },
50
  {
51
  "epoch": 2.978723404255319,
 
 
 
 
 
 
 
52
  "eval_accuracy": 0.4889937106918239,
53
  "eval_loss": NaN,
54
+ "eval_runtime": 8.3005,
55
+ "eval_samples_per_second": 76.621,
56
+ "eval_steps_per_second": 2.409,
57
+ "step": 35
 
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 3.404255319148936,
61
  "grad_norm": NaN,
62
+ "learning_rate": 1.5306122448979594e-05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  "loss": 0.0,
64
+ "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
  "eval_accuracy": 0.4889937106918239,
69
  "eval_loss": NaN,
70
+ "eval_runtime": 14.6795,
71
+ "eval_samples_per_second": 43.326,
72
+ "eval_steps_per_second": 1.362,
73
+ "step": 47
 
 
 
 
 
 
 
74
  },
75
  {
76
  "epoch": 4.25531914893617,
77
  "grad_norm": NaN,
78
+ "learning_rate": 5.102040816326531e-06,
 
 
 
 
 
 
 
79
  "loss": 0.0,
80
+ "step": 50
81
  },
82
  {
83
  "epoch": 4.680851063829787,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "eval_accuracy": 0.4889937106918239,
85
  "eval_loss": NaN,
86
+ "eval_runtime": 7.5742,
87
+ "eval_samples_per_second": 83.969,
88
+ "eval_steps_per_second": 2.641,
89
+ "step": 55
90
  },
91
  {
92
+ "epoch": 4.680851063829787,
93
+ "step": 55,
94
+ "total_flos": 1.477984078577664e+17,
95
  "train_loss": 0.0,
96
+ "train_runtime": 167.7096,
97
+ "train_samples_per_second": 44.243,
98
+ "train_steps_per_second": 0.328
99
  }
100
  ],
101
  "logging_steps": 10,
102
+ "max_steps": 55,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 5,
105
  "save_steps": 500,
106
+ "total_flos": 1.477984078577664e+17,
107
  "train_batch_size": 32,
108
  "trial_name": null,
109
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8121ae27cdcd1b7b5c4adc8363f0af01cc99f2b977ff73e68a871ff6fbd7636
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2f07845e5a4f177d4d37279be2bdadd143d53be652dea45ac33053323beb4cc
3
  size 5048