tangg555 commited on
Commit
259df76
1 Parent(s): 4108075

Training in progress, epoch 0

Browse files
.ipynb_checkpoints/all_results-checkpoint.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.985781990521327,
3
+ "eval_accuracy": 0.23666666666666666,
4
+ "eval_loss": 2.182861566543579,
5
+ "eval_runtime": 72.7055,
6
+ "eval_samples_per_second": 82.525,
7
+ "eval_steps_per_second": 2.586,
8
+ "total_flos": 4.411646023570175e+19,
9
+ "train_loss": 2.258255742845081,
10
+ "train_runtime": 9823.5604,
11
+ "train_samples_per_second": 16.491,
12
+ "train_steps_per_second": 0.017
13
+ }
.ipynb_checkpoints/eval_results-checkpoint.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.985781990521327,
3
+ "eval_accuracy": 0.23666666666666666,
4
+ "eval_loss": 2.182861566543579,
5
+ "eval_runtime": 72.7055,
6
+ "eval_samples_per_second": 82.525,
7
+ "eval_steps_per_second": 2.586
8
+ }
.ipynb_checkpoints/train_results-checkpoint.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.985781990521327,
3
+ "total_flos": 4.411646023570175e+19,
4
+ "train_loss": 2.258255742845081,
5
+ "train_runtime": 9823.5604,
6
+ "train_samples_per_second": 16.491,
7
+ "train_steps_per_second": 0.017
8
+ }
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 2.985781990521327,
3
- "eval_accuracy": 0.43766666666666665,
4
- "eval_loss": 2.0961079597473145,
5
- "eval_runtime": 24.6417,
6
- "eval_samples_per_second": 243.49,
7
- "eval_steps_per_second": 1.907,
8
  "total_flos": 4.411646023570175e+19,
9
- "train_loss": 2.214817982628232,
10
- "train_runtime": 948.9642,
11
- "train_samples_per_second": 170.712,
12
- "train_steps_per_second": 0.332
13
  }
 
1
  {
2
  "epoch": 2.985781990521327,
3
+ "eval_accuracy": 0.23666666666666666,
4
+ "eval_loss": 2.182861566543579,
5
+ "eval_runtime": 72.7055,
6
+ "eval_samples_per_second": 82.525,
7
+ "eval_steps_per_second": 2.586,
8
  "total_flos": 4.411646023570175e+19,
9
+ "train_loss": 2.258255742845081,
10
+ "train_runtime": 9823.5604,
11
+ "train_samples_per_second": 16.491,
12
+ "train_steps_per_second": 0.017
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.985781990521327,
3
- "eval_accuracy": 0.43766666666666665,
4
- "eval_loss": 2.0961079597473145,
5
- "eval_runtime": 24.6417,
6
- "eval_samples_per_second": 243.49,
7
- "eval_steps_per_second": 1.907
8
  }
 
1
  {
2
  "epoch": 2.985781990521327,
3
+ "eval_accuracy": 0.23666666666666666,
4
+ "eval_loss": 2.182861566543579,
5
+ "eval_runtime": 72.7055,
6
+ "eval_samples_per_second": 82.525,
7
+ "eval_steps_per_second": 2.586
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a33cf281bd713df6589f0072db8517a0a874cd790d92ce52838e3a0c4303efbf
3
  size 1212808056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8065d8c46ab562c99119416ffc302bbafeb72fe0b84f54d3baf4ec79435cc46
3
  size 1212808056
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.985781990521327,
3
  "total_flos": 4.411646023570175e+19,
4
- "train_loss": 2.214817982628232,
5
- "train_runtime": 948.9642,
6
- "train_samples_per_second": 170.712,
7
- "train_steps_per_second": 0.332
8
  }
 
1
  {
2
  "epoch": 2.985781990521327,
3
  "total_flos": 4.411646023570175e+19,
4
+ "train_loss": 2.258255742845081,
5
+ "train_runtime": 9823.5604,
6
+ "train_samples_per_second": 16.491,
7
+ "train_steps_per_second": 0.017
8
  }
trainer_state.json CHANGED
@@ -1,269 +1,164 @@
1
  {
2
- "best_metric": 0.43766666666666665,
3
- "best_model_checkpoint": "clip-vit-large-patch14-finetuned-clip-vit-large-patch14-mnist_linear_probe/checkpoint-315",
4
  "epoch": 2.985781990521327,
5
  "eval_steps": 500,
6
- "global_step": 315,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0947867298578199,
13
- "grad_norm": 2.288234233856201,
14
- "learning_rate": 1.5625e-05,
15
- "loss": 2.4094,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.1895734597156398,
20
- "grad_norm": 2.1556286811828613,
21
- "learning_rate": 3.125e-05,
22
- "loss": 2.3858,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.2843601895734597,
27
- "grad_norm": 2.1012516021728516,
28
- "learning_rate": 4.6875e-05,
29
- "loss": 2.3743,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.3791469194312796,
34
- "grad_norm": 1.212685227394104,
35
- "learning_rate": 4.858657243816255e-05,
36
- "loss": 2.3353,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.47393364928909953,
41
- "grad_norm": 1.0641510486602783,
42
- "learning_rate": 4.6819787985865726e-05,
43
- "loss": 2.3048,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.5687203791469194,
48
- "grad_norm": 0.9478774070739746,
49
- "learning_rate": 4.50530035335689e-05,
50
- "loss": 2.2905,
 
 
 
 
 
 
 
 
 
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.6635071090047393,
55
- "grad_norm": 0.7033135294914246,
56
- "learning_rate": 4.328621908127209e-05,
57
- "loss": 2.2732,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.7582938388625592,
62
- "grad_norm": 0.8783323168754578,
63
- "learning_rate": 4.1519434628975266e-05,
64
- "loss": 2.2636,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 0.8530805687203792,
69
- "grad_norm": 0.9178075790405273,
70
- "learning_rate": 3.975265017667845e-05,
71
- "loss": 2.2455,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 0.9478672985781991,
76
- "grad_norm": 0.6878641843795776,
77
- "learning_rate": 3.7985865724381624e-05,
78
- "loss": 2.2376,
79
  "step": 100
80
  },
81
  {
82
- "epoch": 0.995260663507109,
83
- "eval_accuracy": 0.2981666666666667,
84
- "eval_loss": 2.2017133235931396,
85
- "eval_runtime": 24.7638,
86
- "eval_samples_per_second": 242.289,
87
- "eval_steps_per_second": 1.898,
88
- "step": 105
89
- },
90
- {
91
- "epoch": 1.042654028436019,
92
- "grad_norm": 0.9457613229751587,
93
- "learning_rate": 3.621908127208481e-05,
94
- "loss": 2.2256,
95
  "step": 110
96
  },
97
  {
98
- "epoch": 1.1374407582938388,
99
- "grad_norm": 0.850648820400238,
100
- "learning_rate": 3.445229681978799e-05,
101
- "loss": 2.2144,
 
 
 
 
 
 
 
 
 
102
  "step": 120
103
  },
104
  {
105
- "epoch": 1.2322274881516588,
106
- "grad_norm": 0.8362263441085815,
107
- "learning_rate": 3.2685512367491163e-05,
108
- "loss": 2.2111,
109
  "step": 130
110
  },
111
  {
112
- "epoch": 1.3270142180094786,
113
- "grad_norm": 0.6047300100326538,
114
- "learning_rate": 3.0918727915194346e-05,
115
- "loss": 2.2002,
116
  "step": 140
117
  },
118
  {
119
- "epoch": 1.4218009478672986,
120
- "grad_norm": 0.9275925755500793,
121
- "learning_rate": 2.915194346289753e-05,
122
- "loss": 2.1941,
123
  "step": 150
124
  },
125
- {
126
- "epoch": 1.5165876777251186,
127
- "grad_norm": 0.8383633494377136,
128
- "learning_rate": 2.738515901060071e-05,
129
- "loss": 2.1916,
130
- "step": 160
131
- },
132
- {
133
- "epoch": 1.6113744075829384,
134
- "grad_norm": 0.7653341293334961,
135
- "learning_rate": 2.5618374558303885e-05,
136
- "loss": 2.1859,
137
- "step": 170
138
- },
139
- {
140
- "epoch": 1.7061611374407581,
141
- "grad_norm": 0.775774359703064,
142
- "learning_rate": 2.3851590106007068e-05,
143
- "loss": 2.1796,
144
- "step": 180
145
- },
146
- {
147
- "epoch": 1.8009478672985781,
148
- "grad_norm": 0.932090699672699,
149
- "learning_rate": 2.2084805653710246e-05,
150
- "loss": 2.1708,
151
- "step": 190
152
- },
153
- {
154
- "epoch": 1.8957345971563981,
155
- "grad_norm": 0.9772677421569824,
156
- "learning_rate": 2.031802120141343e-05,
157
- "loss": 2.1684,
158
- "step": 200
159
- },
160
- {
161
- "epoch": 1.9905213270142181,
162
- "grad_norm": 0.7905783653259277,
163
- "learning_rate": 1.855123674911661e-05,
164
- "loss": 2.1641,
165
- "step": 210
166
- },
167
- {
168
- "epoch": 2.0,
169
- "eval_accuracy": 0.4041666666666667,
170
- "eval_loss": 2.1207876205444336,
171
- "eval_runtime": 24.705,
172
- "eval_samples_per_second": 242.866,
173
- "eval_steps_per_second": 1.902,
174
- "step": 211
175
- },
176
- {
177
- "epoch": 2.085308056872038,
178
- "grad_norm": 1.0608142614364624,
179
- "learning_rate": 1.678445229681979e-05,
180
- "loss": 2.1633,
181
- "step": 220
182
- },
183
- {
184
- "epoch": 2.1800947867298577,
185
- "grad_norm": 1.0211544036865234,
186
- "learning_rate": 1.501766784452297e-05,
187
- "loss": 2.1518,
188
- "step": 230
189
- },
190
- {
191
- "epoch": 2.2748815165876777,
192
- "grad_norm": 0.9483183026313782,
193
- "learning_rate": 1.3250883392226149e-05,
194
- "loss": 2.1507,
195
- "step": 240
196
- },
197
- {
198
- "epoch": 2.3696682464454977,
199
- "grad_norm": 0.6434796452522278,
200
- "learning_rate": 1.148409893992933e-05,
201
- "loss": 2.1477,
202
- "step": 250
203
- },
204
- {
205
- "epoch": 2.4644549763033177,
206
- "grad_norm": 0.7338018417358398,
207
- "learning_rate": 9.717314487632508e-06,
208
- "loss": 2.1439,
209
- "step": 260
210
- },
211
- {
212
- "epoch": 2.5592417061611377,
213
- "grad_norm": 0.8191038966178894,
214
- "learning_rate": 7.950530035335689e-06,
215
- "loss": 2.1466,
216
- "step": 270
217
- },
218
- {
219
- "epoch": 2.654028436018957,
220
- "grad_norm": 0.8383843898773193,
221
- "learning_rate": 6.18374558303887e-06,
222
- "loss": 2.1435,
223
- "step": 280
224
- },
225
- {
226
- "epoch": 2.748815165876777,
227
- "grad_norm": 0.7335329055786133,
228
- "learning_rate": 4.41696113074205e-06,
229
- "loss": 2.1399,
230
- "step": 290
231
- },
232
  {
233
  "epoch": 2.843601895734597,
234
- "grad_norm": 0.9002769589424133,
235
- "learning_rate": 2.65017667844523e-06,
236
- "loss": 2.142,
237
- "step": 300
238
- },
239
- {
240
- "epoch": 2.938388625592417,
241
- "grad_norm": 0.8580403327941895,
242
- "learning_rate": 8.8339222614841e-07,
243
- "loss": 2.1453,
244
- "step": 310
245
  },
246
  {
247
  "epoch": 2.985781990521327,
248
- "eval_accuracy": 0.43766666666666665,
249
- "eval_loss": 2.0961079597473145,
250
- "eval_runtime": 24.9225,
251
- "eval_samples_per_second": 240.747,
252
- "eval_steps_per_second": 1.886,
253
- "step": 315
254
  },
255
  {
256
  "epoch": 2.985781990521327,
257
- "step": 315,
258
  "total_flos": 4.411646023570175e+19,
259
- "train_loss": 2.214817982628232,
260
- "train_runtime": 948.9642,
261
- "train_samples_per_second": 170.712,
262
- "train_steps_per_second": 0.332
263
  }
264
  ],
265
  "logging_steps": 10,
266
- "max_steps": 315,
267
  "num_input_tokens_seen": 0,
268
  "num_train_epochs": 3,
269
  "save_steps": 500,
@@ -280,7 +175,7 @@
280
  }
281
  },
282
  "total_flos": 4.411646023570175e+19,
283
- "train_batch_size": 128,
284
  "trial_name": null,
285
  "trial_params": null
286
  }
 
1
  {
2
+ "best_metric": 0.23666666666666666,
3
+ "best_model_checkpoint": "clip-vit-large-patch14-finetuned-clip-vit-large-patch14-mnist_linear_probe/checkpoint-168",
4
  "epoch": 2.985781990521327,
5
  "eval_steps": 500,
6
+ "global_step": 168,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.17772511848341233,
13
+ "grad_norm": 2.373176097869873,
14
+ "learning_rate": 2.9411764705882354e-05,
15
+ "loss": 2.4028,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.35545023696682465,
20
+ "grad_norm": 2.2507705688476562,
21
+ "learning_rate": 4.900662251655629e-05,
22
+ "loss": 2.3848,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.533175355450237,
27
+ "grad_norm": 1.5781500339508057,
28
+ "learning_rate": 4.56953642384106e-05,
29
+ "loss": 2.3427,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.7109004739336493,
34
+ "grad_norm": 1.5205079317092896,
35
+ "learning_rate": 4.23841059602649e-05,
36
+ "loss": 2.3118,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.8886255924170616,
41
+ "grad_norm": 1.119712233543396,
42
+ "learning_rate": 3.907284768211921e-05,
43
+ "loss": 2.2886,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.995260663507109,
48
+ "eval_accuracy": 0.157,
49
+ "eval_loss": 2.2661008834838867,
50
+ "eval_runtime": 478.4456,
51
+ "eval_samples_per_second": 12.541,
52
+ "eval_steps_per_second": 0.393,
53
+ "step": 56
54
+ },
55
+ {
56
+ "epoch": 1.066350710900474,
57
+ "grad_norm": 1.0753259658813477,
58
+ "learning_rate": 3.576158940397351e-05,
59
+ "loss": 2.2701,
60
  "step": 60
61
  },
62
  {
63
+ "epoch": 1.2440758293838863,
64
+ "grad_norm": 0.8510423302650452,
65
+ "learning_rate": 3.2450331125827816e-05,
66
+ "loss": 2.2516,
67
  "step": 70
68
  },
69
  {
70
+ "epoch": 1.4218009478672986,
71
+ "grad_norm": 0.9210019111633301,
72
+ "learning_rate": 2.913907284768212e-05,
73
+ "loss": 2.2406,
74
  "step": 80
75
  },
76
  {
77
+ "epoch": 1.599526066350711,
78
+ "grad_norm": 0.9463822245597839,
79
+ "learning_rate": 2.5827814569536424e-05,
80
+ "loss": 2.2312,
81
  "step": 90
82
  },
83
  {
84
+ "epoch": 1.7772511848341233,
85
+ "grad_norm": 0.8657775521278381,
86
+ "learning_rate": 2.2516556291390732e-05,
87
+ "loss": 2.2231,
88
  "step": 100
89
  },
90
  {
91
+ "epoch": 1.9549763033175356,
92
+ "grad_norm": 0.8078551292419434,
93
+ "learning_rate": 1.9205298013245036e-05,
94
+ "loss": 2.2153,
 
 
 
 
 
 
 
 
 
95
  "step": 110
96
  },
97
  {
98
+ "epoch": 1.9905213270142181,
99
+ "eval_accuracy": 0.1945,
100
+ "eval_loss": 2.2003655433654785,
101
+ "eval_runtime": 527.8354,
102
+ "eval_samples_per_second": 11.367,
103
+ "eval_steps_per_second": 0.356,
104
+ "step": 112
105
+ },
106
+ {
107
+ "epoch": 2.132701421800948,
108
+ "grad_norm": 0.7690733075141907,
109
+ "learning_rate": 1.589403973509934e-05,
110
+ "loss": 2.2121,
111
  "step": 120
112
  },
113
  {
114
+ "epoch": 2.31042654028436,
115
+ "grad_norm": 0.6478149890899658,
116
+ "learning_rate": 1.2582781456953644e-05,
117
+ "loss": 2.2047,
118
  "step": 130
119
  },
120
  {
121
+ "epoch": 2.4881516587677726,
122
+ "grad_norm": 0.6131073832511902,
123
+ "learning_rate": 9.271523178807948e-06,
124
+ "loss": 2.2018,
125
  "step": 140
126
  },
127
  {
128
+ "epoch": 2.665876777251185,
129
+ "grad_norm": 0.7310860753059387,
130
+ "learning_rate": 5.9602649006622515e-06,
131
+ "loss": 2.2025,
132
  "step": 150
133
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  {
135
  "epoch": 2.843601895734597,
136
+ "grad_norm": 0.6716310381889343,
137
+ "learning_rate": 2.6490066225165563e-06,
138
+ "loss": 2.1981,
139
+ "step": 160
 
 
 
 
 
 
 
140
  },
141
  {
142
  "epoch": 2.985781990521327,
143
+ "eval_accuracy": 0.23666666666666666,
144
+ "eval_loss": 2.182861566543579,
145
+ "eval_runtime": 73.356,
146
+ "eval_samples_per_second": 81.793,
147
+ "eval_steps_per_second": 2.563,
148
+ "step": 168
149
  },
150
  {
151
  "epoch": 2.985781990521327,
152
+ "step": 168,
153
  "total_flos": 4.411646023570175e+19,
154
+ "train_loss": 2.258255742845081,
155
+ "train_runtime": 9823.5604,
156
+ "train_samples_per_second": 16.491,
157
+ "train_steps_per_second": 0.017
158
  }
159
  ],
160
  "logging_steps": 10,
161
+ "max_steps": 168,
162
  "num_input_tokens_seen": 0,
163
  "num_train_epochs": 3,
164
  "save_steps": 500,
 
175
  }
176
  },
177
  "total_flos": 4.411646023570175e+19,
178
+ "train_batch_size": 32,
179
  "trial_name": null,
180
  "trial_params": null
181
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:281039b64ae59b55c230306a566c0dc20a32c69abd89b52f9831b2ba17247217
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5db6ddb9e63cd782c8fcac21a98b1f86c7908deeafe6f83803f0eab7db084c
3
  size 5304