NiharGupte commited on
Commit
8e66653
1 Parent(s): cec259b

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "epoch": 4.680851063829787,
3
- "eval_accuracy": 0.4889937106918239,
4
- "eval_loss": NaN,
5
- "eval_runtime": 8.3571,
6
- "eval_samples_per_second": 76.103,
7
- "eval_steps_per_second": 2.393,
8
- "total_flos": 1.477984078577664e+17,
9
- "train_loss": 3.320157440986396e+22,
10
- "train_runtime": 166.0473,
11
- "train_samples_per_second": 44.686,
12
- "train_steps_per_second": 0.331
13
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 1.5762161898233856e+17,
4
+ "train_loss": 2.3917236512990602e+24,
5
+ "train_runtime": 186.7027,
6
+ "train_samples_per_second": 39.742,
7
+ "train_steps_per_second": 1.259
 
 
 
 
 
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b653022e181754b798bdd423faee19214ceaa4a2298473870417efcb27cb1d38
3
  size 94335752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333e2c98f70384b91224f113065a2550926cbddfee048da3333a23ca4eba8733
3
  size 94335752
runs/May04_12-13-04_c4081513bb3b/events.out.tfevents.1714824796.c4081513bb3b.415.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ebe4a8e9b1f9317dc9aabbcdd350d3f8802040a3cf1a8ec7c0a6cd0af36d2c
3
+ size 6147
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.680851063829787,
3
- "total_flos": 1.477984078577664e+17,
4
- "train_loss": 3.320157440986396e+22,
5
- "train_runtime": 166.0473,
6
- "train_samples_per_second": 44.686,
7
- "train_steps_per_second": 0.331
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 1.5762161898233856e+17,
4
+ "train_loss": 2.3917236512990602e+24,
5
+ "train_runtime": 186.7027,
6
+ "train_samples_per_second": 39.742,
7
+ "train_steps_per_second": 1.259
8
  }
trainer_state.json CHANGED
@@ -1,109 +1,235 @@
1
  {
2
- "best_metric": 0.4889937106918239,
3
- "best_model_checkpoint": "resnet-50-finetuned-student_kaggle/checkpoint-11",
4
- "epoch": 4.680851063829787,
5
  "eval_steps": 500,
6
- "global_step": 55,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.851063829787234,
13
- "grad_norm": 492.8765869140625,
14
- "learning_rate": 4.591836734693878e-05,
15
- "loss": 3.371893679888266e+22,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.9361702127659575,
20
- "eval_accuracy": 0.4889937106918239,
21
- "eval_loss": 3.489738920977707e+22,
22
- "eval_runtime": 7.7509,
23
- "eval_samples_per_second": 82.055,
24
- "eval_steps_per_second": 2.58,
25
- "step": 11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
  "epoch": 1.702127659574468,
29
- "grad_norm": 954.1488647460938,
30
- "learning_rate": 3.571428571428572e-05,
31
- "loss": 3.243846974994898e+22,
32
- "step": 20
 
 
 
 
 
 
 
33
  },
34
  {
35
- "epoch": 1.9574468085106385,
36
- "eval_accuracy": 0.4889937106918239,
37
- "eval_loss": 3.489738920977707e+22,
38
- "eval_runtime": 8.4777,
39
- "eval_samples_per_second": 75.021,
40
- "eval_steps_per_second": 2.359,
41
- "step": 23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 2.5531914893617023,
45
- "grad_norm": 704.236083984375,
46
- "learning_rate": 2.5510204081632654e-05,
47
- "loss": 3.336324610319264e+22,
48
- "step": 30
 
 
 
 
 
 
 
49
  },
50
  {
51
  "epoch": 2.978723404255319,
52
- "eval_accuracy": 0.4889937106918239,
53
- "eval_loss": 3.489738920977707e+22,
54
- "eval_runtime": 8.3616,
55
- "eval_samples_per_second": 76.062,
56
- "eval_steps_per_second": 2.392,
57
- "step": 35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 3.404255319148936,
61
- "grad_norm": 400.68768310546875,
62
- "learning_rate": 1.5306122448979594e-05,
63
- "loss": 3.295420756775664e+22,
64
- "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.4889937106918239,
69
- "eval_loss": 3.489738920977707e+22,
70
- "eval_runtime": 8.378,
71
- "eval_samples_per_second": 75.913,
72
  "eval_steps_per_second": 2.387,
73
- "step": 47
 
 
 
 
 
 
 
74
  },
75
  {
76
  "epoch": 4.25531914893617,
77
- "grad_norm": 1062.6368408203125,
78
- "learning_rate": 5.102040816326531e-06,
79
- "loss": 3.2794156842759295e+22,
80
- "step": 50
81
  },
82
  {
83
- "epoch": 4.680851063829787,
84
- "eval_accuracy": 0.4889937106918239,
85
- "eval_loss": 3.489738920977707e+22,
86
- "eval_runtime": 8.4114,
87
- "eval_samples_per_second": 75.612,
88
- "eval_steps_per_second": 2.378,
89
- "step": 55
90
  },
91
  {
92
  "epoch": 4.680851063829787,
93
- "step": 55,
94
- "total_flos": 1.477984078577664e+17,
95
- "train_loss": 3.320157440986396e+22,
96
- "train_runtime": 166.0473,
97
- "train_samples_per_second": 44.686,
98
- "train_steps_per_second": 0.331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "logging_steps": 10,
102
- "max_steps": 55,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 5,
105
  "save_steps": 500,
106
- "total_flos": 1.477984078577664e+17,
107
  "train_batch_size": 32,
108
  "trial_name": null,
109
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.1949685534591195,
3
+ "best_model_checkpoint": "resnet-50-finetuned-student_kaggle/checkpoint-47",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 235,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.2127659574468085,
13
+ "grad_norm": Infinity,
14
+ "learning_rate": 2.0833333333333336e-05,
15
+ "loss": 2.498036407537461e+24,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.425531914893617,
20
+ "grad_norm": Infinity,
21
+ "learning_rate": 4.166666666666667e-05,
22
+ "loss": 2.2438091453211188e+24,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.6382978723404256,
27
+ "grad_norm": Infinity,
28
+ "learning_rate": 4.857819905213271e-05,
29
+ "loss": 2.323816594301913e+24,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.851063829787234,
34
+ "grad_norm": Infinity,
35
+ "learning_rate": 4.620853080568721e-05,
36
+ "loss": 2.2316656536973958e+24,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_accuracy": 0.1949685534591195,
42
+ "eval_loss": 2.703493994833504e+24,
43
+ "eval_runtime": 8.5486,
44
+ "eval_samples_per_second": 74.398,
45
+ "eval_steps_per_second": 2.34,
46
+ "step": 47
47
+ },
48
+ {
49
+ "epoch": 1.0638297872340425,
50
+ "grad_norm": Infinity,
51
+ "learning_rate": 4.383886255924171e-05,
52
+ "loss": 2.2648591861522306e+24,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 1.2765957446808511,
57
+ "grad_norm": Infinity,
58
+ "learning_rate": 4.146919431279621e-05,
59
+ "loss": 2.2616990283081033e+24,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 1.4893617021276595,
64
+ "grad_norm": Infinity,
65
+ "learning_rate": 3.909952606635071e-05,
66
+ "loss": 2.3967580179652723e+24,
67
+ "step": 70
68
  },
69
  {
70
  "epoch": 1.702127659574468,
71
+ "grad_norm": Infinity,
72
+ "learning_rate": 3.672985781990522e-05,
73
+ "loss": 2.3675921014946312e+24,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 1.9148936170212765,
78
+ "grad_norm": Infinity,
79
+ "learning_rate": 3.4360189573459716e-05,
80
+ "loss": 2.448978214010634e+24,
81
+ "step": 90
82
  },
83
  {
84
+ "epoch": 2.0,
85
+ "eval_accuracy": 0.1949685534591195,
86
+ "eval_loss": 2.805605946653523e+24,
87
+ "eval_runtime": 8.3333,
88
+ "eval_samples_per_second": 76.32,
89
+ "eval_steps_per_second": 2.4,
90
+ "step": 94
91
+ },
92
+ {
93
+ "epoch": 2.127659574468085,
94
+ "grad_norm": Infinity,
95
+ "learning_rate": 3.1990521327014215e-05,
96
+ "loss": 2.491016037911609e+24,
97
+ "step": 100
98
+ },
99
+ {
100
+ "epoch": 2.3404255319148937,
101
+ "grad_norm": Infinity,
102
+ "learning_rate": 2.962085308056872e-05,
103
+ "loss": 2.4256373181498684e+24,
104
+ "step": 110
105
  },
106
  {
107
  "epoch": 2.5531914893617023,
108
+ "grad_norm": Infinity,
109
+ "learning_rate": 2.7251184834123224e-05,
110
+ "loss": 2.3682234413105537e+24,
111
+ "step": 120
112
+ },
113
+ {
114
+ "epoch": 2.7659574468085104,
115
+ "grad_norm": Infinity,
116
+ "learning_rate": 2.4881516587677726e-05,
117
+ "loss": 2.4713693331359047e+24,
118
+ "step": 130
119
  },
120
  {
121
  "epoch": 2.978723404255319,
122
+ "grad_norm": Infinity,
123
+ "learning_rate": 2.251184834123223e-05,
124
+ "loss": 2.3645329395743072e+24,
125
+ "step": 140
126
+ },
127
+ {
128
+ "epoch": 3.0,
129
+ "eval_accuracy": 0.1949685534591195,
130
+ "eval_loss": 2.8451802655295293e+24,
131
+ "eval_runtime": 12.7674,
132
+ "eval_samples_per_second": 49.814,
133
+ "eval_steps_per_second": 1.566,
134
+ "step": 141
135
+ },
136
+ {
137
+ "epoch": 3.1914893617021276,
138
+ "grad_norm": Infinity,
139
+ "learning_rate": 2.014218009478673e-05,
140
+ "loss": 2.495697821557516e+24,
141
+ "step": 150
142
  },
143
  {
144
  "epoch": 3.404255319148936,
145
+ "grad_norm": Infinity,
146
+ "learning_rate": 1.7772511848341233e-05,
147
+ "loss": 2.4826769566687875e+24,
148
+ "step": 160
149
+ },
150
+ {
151
+ "epoch": 3.617021276595745,
152
+ "grad_norm": Infinity,
153
+ "learning_rate": 1.5402843601895736e-05,
154
+ "loss": 2.4615111625186133e+24,
155
+ "step": 170
156
+ },
157
+ {
158
+ "epoch": 3.829787234042553,
159
+ "grad_norm": Infinity,
160
+ "learning_rate": 1.3033175355450238e-05,
161
+ "loss": 2.3318623723139621e+24,
162
+ "step": 180
163
  },
164
  {
165
  "epoch": 4.0,
166
+ "eval_accuracy": 0.1949685534591195,
167
+ "eval_loss": 3.2710429521366926e+24,
168
+ "eval_runtime": 8.3778,
169
+ "eval_samples_per_second": 75.915,
170
  "eval_steps_per_second": 2.387,
171
+ "step": 188
172
+ },
173
+ {
174
+ "epoch": 4.042553191489362,
175
+ "grad_norm": Infinity,
176
+ "learning_rate": 1.066350710900474e-05,
177
+ "loss": 2.4159188816189355e+24,
178
+ "step": 190
179
  },
180
  {
181
  "epoch": 4.25531914893617,
182
+ "grad_norm": Infinity,
183
+ "learning_rate": 8.293838862559241e-06,
184
+ "loss": 2.434787825547632e+24,
185
+ "step": 200
186
  },
187
  {
188
+ "epoch": 4.468085106382979,
189
+ "grad_norm": Infinity,
190
+ "learning_rate": 5.924170616113745e-06,
191
+ "loss": 2.4733954773881007e+24,
192
+ "step": 210
 
 
193
  },
194
  {
195
  "epoch": 4.680851063829787,
196
+ "grad_norm": Infinity,
197
+ "learning_rate": 3.5545023696682464e-06,
198
+ "loss": 2.1876454965614e+24,
199
+ "step": 220
200
+ },
201
+ {
202
+ "epoch": 4.8936170212765955,
203
+ "grad_norm": Infinity,
204
+ "learning_rate": 1.1848341232227488e-06,
205
+ "loss": 2.5842763195878584e+24,
206
+ "step": 230
207
+ },
208
+ {
209
+ "epoch": 5.0,
210
+ "eval_accuracy": 0.1949685534591195,
211
+ "eval_loss": 2.9667564146510736e+24,
212
+ "eval_runtime": 8.5512,
213
+ "eval_samples_per_second": 74.375,
214
+ "eval_steps_per_second": 2.339,
215
+ "step": 235
216
+ },
217
+ {
218
+ "epoch": 5.0,
219
+ "step": 235,
220
+ "total_flos": 1.5762161898233856e+17,
221
+ "train_loss": 2.3917236512990602e+24,
222
+ "train_runtime": 186.7027,
223
+ "train_samples_per_second": 39.742,
224
+ "train_steps_per_second": 1.259
225
  }
226
  ],
227
  "logging_steps": 10,
228
+ "max_steps": 235,
229
  "num_input_tokens_seen": 0,
230
  "num_train_epochs": 5,
231
  "save_steps": 500,
232
+ "total_flos": 1.5762161898233856e+17,
233
  "train_batch_size": 32,
234
  "trial_name": null,
235
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd42ad5527d2847bda666b89158b22342f2bf13371109a2ec2cb7d1f510ed7fd
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d17c7a89c9eae9d31295b68b48df1a2fccb0bb14e3ee400506fa7306597a7c
3
  size 5048