Angy309 commited on
Commit
1fa0948
·
verified ·
1 Parent(s): ee25a64

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9717868338557993,
4
+ "eval_loss": 0.11080693453550339,
5
+ "eval_runtime": 4.3469,
6
+ "eval_samples_per_second": 146.771,
7
+ "eval_steps_per_second": 4.601,
8
+ "total_flos": 7.137702770984755e+17,
9
+ "train_loss": 0.747228291299608,
10
+ "train_runtime": 6337.5569,
11
+ "train_samples_per_second": 4.529,
12
+ "train_steps_per_second": 0.036
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9717868338557993,
4
+ "eval_loss": 0.11080693453550339,
5
+ "eval_runtime": 4.3469,
6
+ "eval_samples_per_second": 146.771,
7
+ "eval_steps_per_second": 4.601
8
+ }
runs/May13_10-18-28_6aa421215e05/events.out.tfevents.1715601877.6aa421215e05.377.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eabe40125ded7e793ffd06a627e5a0de28ce5e9df2df4db40c465582d16dd76
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 7.137702770984755e+17,
4
+ "train_loss": 0.747228291299608,
5
+ "train_runtime": 6337.5569,
6
+ "train_samples_per_second": 4.529,
7
+ "train_steps_per_second": 0.036
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9717868338557993,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-LEGO/checkpoint-225",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 225,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2222222222222222,
13
+ "grad_norm": 4.567494869232178,
14
+ "learning_rate": 2.173913043478261e-05,
15
+ "loss": 2.7413,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.4444444444444444,
20
+ "grad_norm": 8.966611862182617,
21
+ "learning_rate": 4.347826086956522e-05,
22
+ "loss": 2.4945,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.6666666666666666,
27
+ "grad_norm": 16.994686126708984,
28
+ "learning_rate": 4.826732673267327e-05,
29
+ "loss": 1.8167,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.8888888888888888,
34
+ "grad_norm": 23.02696990966797,
35
+ "learning_rate": 4.57920792079208e-05,
36
+ "loss": 1.2301,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_accuracy": 0.713166144200627,
42
+ "eval_loss": 0.7920515537261963,
43
+ "eval_runtime": 617.4774,
44
+ "eval_samples_per_second": 1.033,
45
+ "eval_steps_per_second": 0.032,
46
+ "step": 45
47
+ },
48
+ {
49
+ "epoch": 1.1111111111111112,
50
+ "grad_norm": 15.22125244140625,
51
+ "learning_rate": 4.331683168316832e-05,
52
+ "loss": 0.9193,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 1.3333333333333333,
57
+ "grad_norm": 12.350472450256348,
58
+ "learning_rate": 4.0841584158415844e-05,
59
+ "loss": 0.7267,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 1.5555555555555556,
64
+ "grad_norm": 20.679580688476562,
65
+ "learning_rate": 3.8366336633663367e-05,
66
+ "loss": 0.6571,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.7777777777777777,
71
+ "grad_norm": 11.873376846313477,
72
+ "learning_rate": 3.589108910891089e-05,
73
+ "loss": 0.591,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.0,
78
+ "grad_norm": 17.2018985748291,
79
+ "learning_rate": 3.341584158415842e-05,
80
+ "loss": 0.5433,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 2.0,
85
+ "eval_accuracy": 0.8918495297805643,
86
+ "eval_loss": 0.30467158555984497,
87
+ "eval_runtime": 4.017,
88
+ "eval_samples_per_second": 158.823,
89
+ "eval_steps_per_second": 4.979,
90
+ "step": 90
91
+ },
92
+ {
93
+ "epoch": 2.2222222222222223,
94
+ "grad_norm": 6.819035053253174,
95
+ "learning_rate": 3.094059405940594e-05,
96
+ "loss": 0.5013,
97
+ "step": 100
98
+ },
99
+ {
100
+ "epoch": 2.4444444444444446,
101
+ "grad_norm": 9.604576110839844,
102
+ "learning_rate": 2.8465346534653464e-05,
103
+ "loss": 0.4186,
104
+ "step": 110
105
+ },
106
+ {
107
+ "epoch": 2.6666666666666665,
108
+ "grad_norm": 13.680152893066406,
109
+ "learning_rate": 2.5990099009900993e-05,
110
+ "loss": 0.4364,
111
+ "step": 120
112
+ },
113
+ {
114
+ "epoch": 2.888888888888889,
115
+ "grad_norm": 6.642324924468994,
116
+ "learning_rate": 2.3514851485148515e-05,
117
+ "loss": 0.4067,
118
+ "step": 130
119
+ },
120
+ {
121
+ "epoch": 3.0,
122
+ "eval_accuracy": 0.9278996865203761,
123
+ "eval_loss": 0.20282776653766632,
124
+ "eval_runtime": 4.147,
125
+ "eval_samples_per_second": 153.847,
126
+ "eval_steps_per_second": 4.823,
127
+ "step": 135
128
+ },
129
+ {
130
+ "epoch": 3.111111111111111,
131
+ "grad_norm": 10.98905086517334,
132
+ "learning_rate": 2.103960396039604e-05,
133
+ "loss": 0.4095,
134
+ "step": 140
135
+ },
136
+ {
137
+ "epoch": 3.3333333333333335,
138
+ "grad_norm": 10.862679481506348,
139
+ "learning_rate": 1.8564356435643564e-05,
140
+ "loss": 0.3771,
141
+ "step": 150
142
+ },
143
+ {
144
+ "epoch": 3.5555555555555554,
145
+ "grad_norm": 12.117408752441406,
146
+ "learning_rate": 1.608910891089109e-05,
147
+ "loss": 0.3815,
148
+ "step": 160
149
+ },
150
+ {
151
+ "epoch": 3.7777777777777777,
152
+ "grad_norm": 8.852999687194824,
153
+ "learning_rate": 1.3613861386138616e-05,
154
+ "loss": 0.3363,
155
+ "step": 170
156
+ },
157
+ {
158
+ "epoch": 4.0,
159
+ "grad_norm": 11.115220069885254,
160
+ "learning_rate": 1.113861386138614e-05,
161
+ "loss": 0.3297,
162
+ "step": 180
163
+ },
164
+ {
165
+ "epoch": 4.0,
166
+ "eval_accuracy": 0.957680250783699,
167
+ "eval_loss": 0.12823660671710968,
168
+ "eval_runtime": 4.1114,
169
+ "eval_samples_per_second": 155.177,
170
+ "eval_steps_per_second": 4.864,
171
+ "step": 180
172
+ },
173
+ {
174
+ "epoch": 4.222222222222222,
175
+ "grad_norm": 17.916852951049805,
176
+ "learning_rate": 8.663366336633663e-06,
177
+ "loss": 0.3482,
178
+ "step": 190
179
+ },
180
+ {
181
+ "epoch": 4.444444444444445,
182
+ "grad_norm": 7.79290771484375,
183
+ "learning_rate": 6.1881188118811885e-06,
184
+ "loss": 0.3002,
185
+ "step": 200
186
+ },
187
+ {
188
+ "epoch": 4.666666666666667,
189
+ "grad_norm": 10.635743141174316,
190
+ "learning_rate": 3.7128712871287128e-06,
191
+ "loss": 0.3364,
192
+ "step": 210
193
+ },
194
+ {
195
+ "epoch": 4.888888888888889,
196
+ "grad_norm": 9.049588203430176,
197
+ "learning_rate": 1.2376237623762377e-06,
198
+ "loss": 0.3334,
199
+ "step": 220
200
+ },
201
+ {
202
+ "epoch": 5.0,
203
+ "eval_accuracy": 0.9717868338557993,
204
+ "eval_loss": 0.11080693453550339,
205
+ "eval_runtime": 3.9373,
206
+ "eval_samples_per_second": 162.039,
207
+ "eval_steps_per_second": 5.08,
208
+ "step": 225
209
+ },
210
+ {
211
+ "epoch": 5.0,
212
+ "step": 225,
213
+ "total_flos": 7.137702770984755e+17,
214
+ "train_loss": 0.747228291299608,
215
+ "train_runtime": 6337.5569,
216
+ "train_samples_per_second": 4.529,
217
+ "train_steps_per_second": 0.036
218
+ }
219
+ ],
220
+ "logging_steps": 10,
221
+ "max_steps": 225,
222
+ "num_input_tokens_seen": 0,
223
+ "num_train_epochs": 5,
224
+ "save_steps": 500,
225
+ "total_flos": 7.137702770984755e+17,
226
+ "train_batch_size": 32,
227
+ "trial_name": null,
228
+ "trial_params": null
229
+ }