ppak10 commited on
Commit
b17e998
1 Parent(s): c36317a

test_ViT-Masked_5

Browse files
Files changed (5) hide show
  1. README.md +50 -0
  2. config.json +22 -0
  3. model.safetensors +3 -0
  4. trainer_state.json +380 -0
  5. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: test_ViT-Masked_5
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # test_ViT-Masked_5
13
+
14
+ This model is a fine-tuned version of [](https://huggingface.co/) on the ppak10/Melt-Pool-Thermal-Images dataset.
15
+
16
+ ## Model description
17
+
18
+ More information needed
19
+
20
+ ## Intended uses & limitations
21
+
22
+ More information needed
23
+
24
+ ## Training and evaluation data
25
+
26
+ More information needed
27
+
28
+ ## Training procedure
29
+
30
+ ### Training hyperparameters
31
+
32
+ The following hyperparameters were used during training:
33
+ - learning_rate: 1e-05
34
+ - train_batch_size: 2048
35
+ - eval_batch_size: 16
36
+ - seed: 42
37
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
38
+ - lr_scheduler_type: linear
39
+ - num_epochs: 50
40
+
41
+ ### Training results
42
+
43
+
44
+
45
+ ### Framework versions
46
+
47
+ - Transformers 4.40.1
48
+ - Pytorch 2.0.1+cu117
49
+ - Datasets 2.19.0
50
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForMaskedImageModeling"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "encoder_stride": 8,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "image_size": 64,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "model_type": "vit",
15
+ "num_attention_heads": 12,
16
+ "num_channels": 1,
17
+ "num_hidden_layers": 12,
18
+ "patch_size": 8,
19
+ "qkv_bias": true,
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.40.1"
22
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddfca584a236659427dcb6cb5e42ccc8d6e47b86af70a15e217da0bbb6e0ebf9
3
+ size 340849656
trainer_state.json ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "eval_steps": 500,
6
+ "global_step": 12950,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.9305019305019306,
13
+ "grad_norm": 2.738110065460205,
14
+ "learning_rate": 9.613899613899614e-06,
15
+ "loss": 0.0888,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.9305019305019306,
20
+ "eval_runtime": 667.5755,
21
+ "eval_samples_per_second": 264.418,
22
+ "eval_steps_per_second": 16.527,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 3.861003861003861,
27
+ "grad_norm": 4.450708389282227,
28
+ "learning_rate": 9.227799227799229e-06,
29
+ "loss": 0.0558,
30
+ "step": 1000
31
+ },
32
+ {
33
+ "epoch": 3.861003861003861,
34
+ "eval_runtime": 651.8038,
35
+ "eval_samples_per_second": 270.816,
36
+ "eval_steps_per_second": 16.927,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 5.7915057915057915,
41
+ "grad_norm": 5.049165725708008,
42
+ "learning_rate": 8.841698841698842e-06,
43
+ "loss": 0.0409,
44
+ "step": 1500
45
+ },
46
+ {
47
+ "epoch": 5.7915057915057915,
48
+ "eval_runtime": 650.4821,
49
+ "eval_samples_per_second": 271.366,
50
+ "eval_steps_per_second": 16.961,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 7.722007722007722,
55
+ "grad_norm": 3.824125051498413,
56
+ "learning_rate": 8.455598455598457e-06,
57
+ "loss": 0.0319,
58
+ "step": 2000
59
+ },
60
+ {
61
+ "epoch": 7.722007722007722,
62
+ "eval_runtime": 652.7164,
63
+ "eval_samples_per_second": 270.438,
64
+ "eval_steps_per_second": 16.903,
65
+ "step": 2000
66
+ },
67
+ {
68
+ "epoch": 9.652509652509652,
69
+ "grad_norm": 3.7455878257751465,
70
+ "learning_rate": 8.06949806949807e-06,
71
+ "loss": 0.0277,
72
+ "step": 2500
73
+ },
74
+ {
75
+ "epoch": 9.652509652509652,
76
+ "eval_runtime": 652.469,
77
+ "eval_samples_per_second": 270.54,
78
+ "eval_steps_per_second": 16.91,
79
+ "step": 2500
80
+ },
81
+ {
82
+ "epoch": 11.583011583011583,
83
+ "grad_norm": 2.825524091720581,
84
+ "learning_rate": 7.683397683397685e-06,
85
+ "loss": 0.0249,
86
+ "step": 3000
87
+ },
88
+ {
89
+ "epoch": 11.583011583011583,
90
+ "eval_runtime": 724.8262,
91
+ "eval_samples_per_second": 243.533,
92
+ "eval_steps_per_second": 15.222,
93
+ "step": 3000
94
+ },
95
+ {
96
+ "epoch": 13.513513513513514,
97
+ "grad_norm": 1.5855698585510254,
98
+ "learning_rate": 7.297297297297298e-06,
99
+ "loss": 0.0233,
100
+ "step": 3500
101
+ },
102
+ {
103
+ "epoch": 13.513513513513514,
104
+ "eval_runtime": 721.7805,
105
+ "eval_samples_per_second": 244.56,
106
+ "eval_steps_per_second": 15.286,
107
+ "step": 3500
108
+ },
109
+ {
110
+ "epoch": 15.444015444015443,
111
+ "grad_norm": 2.516690254211426,
112
+ "learning_rate": 6.911196911196911e-06,
113
+ "loss": 0.0217,
114
+ "step": 4000
115
+ },
116
+ {
117
+ "epoch": 15.444015444015443,
118
+ "eval_runtime": 723.8804,
119
+ "eval_samples_per_second": 243.851,
120
+ "eval_steps_per_second": 15.241,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 17.374517374517374,
125
+ "grad_norm": 2.6874454021453857,
126
+ "learning_rate": 6.525096525096526e-06,
127
+ "loss": 0.0207,
128
+ "step": 4500
129
+ },
130
+ {
131
+ "epoch": 17.374517374517374,
132
+ "eval_runtime": 726.2467,
133
+ "eval_samples_per_second": 243.057,
134
+ "eval_steps_per_second": 15.192,
135
+ "step": 4500
136
+ },
137
+ {
138
+ "epoch": 19.305019305019304,
139
+ "grad_norm": 2.5405662059783936,
140
+ "learning_rate": 6.13899613899614e-06,
141
+ "loss": 0.0198,
142
+ "step": 5000
143
+ },
144
+ {
145
+ "epoch": 19.305019305019304,
146
+ "eval_runtime": 725.831,
147
+ "eval_samples_per_second": 243.196,
148
+ "eval_steps_per_second": 15.201,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 21.235521235521237,
153
+ "grad_norm": 2.5149359703063965,
154
+ "learning_rate": 5.752895752895753e-06,
155
+ "loss": 0.0191,
156
+ "step": 5500
157
+ },
158
+ {
159
+ "epoch": 21.235521235521237,
160
+ "eval_runtime": 724.4353,
161
+ "eval_samples_per_second": 243.664,
162
+ "eval_steps_per_second": 15.23,
163
+ "step": 5500
164
+ },
165
+ {
166
+ "epoch": 23.166023166023166,
167
+ "grad_norm": 1.5971148014068604,
168
+ "learning_rate": 5.366795366795368e-06,
169
+ "loss": 0.0183,
170
+ "step": 6000
171
+ },
172
+ {
173
+ "epoch": 23.166023166023166,
174
+ "eval_runtime": 725.1569,
175
+ "eval_samples_per_second": 243.422,
176
+ "eval_steps_per_second": 15.215,
177
+ "step": 6000
178
+ },
179
+ {
180
+ "epoch": 25.096525096525095,
181
+ "grad_norm": 2.0284860134124756,
182
+ "learning_rate": 4.980694980694981e-06,
183
+ "loss": 0.0178,
184
+ "step": 6500
185
+ },
186
+ {
187
+ "epoch": 25.096525096525095,
188
+ "eval_runtime": 726.8832,
189
+ "eval_samples_per_second": 242.844,
190
+ "eval_steps_per_second": 15.179,
191
+ "step": 6500
192
+ },
193
+ {
194
+ "epoch": 27.027027027027028,
195
+ "grad_norm": 2.005959987640381,
196
+ "learning_rate": 4.594594594594596e-06,
197
+ "loss": 0.0174,
198
+ "step": 7000
199
+ },
200
+ {
201
+ "epoch": 27.027027027027028,
202
+ "eval_runtime": 725.3449,
203
+ "eval_samples_per_second": 243.359,
204
+ "eval_steps_per_second": 15.211,
205
+ "step": 7000
206
+ },
207
+ {
208
+ "epoch": 28.957528957528957,
209
+ "grad_norm": 1.96770441532135,
210
+ "learning_rate": 4.208494208494209e-06,
211
+ "loss": 0.0168,
212
+ "step": 7500
213
+ },
214
+ {
215
+ "epoch": 28.957528957528957,
216
+ "eval_runtime": 765.8326,
217
+ "eval_samples_per_second": 230.493,
218
+ "eval_steps_per_second": 14.407,
219
+ "step": 7500
220
+ },
221
+ {
222
+ "epoch": 30.888030888030887,
223
+ "grad_norm": 1.844897747039795,
224
+ "learning_rate": 3.822393822393823e-06,
225
+ "loss": 0.0165,
226
+ "step": 8000
227
+ },
228
+ {
229
+ "epoch": 30.888030888030887,
230
+ "eval_runtime": 819.9764,
231
+ "eval_samples_per_second": 215.273,
232
+ "eval_steps_per_second": 13.455,
233
+ "step": 8000
234
+ },
235
+ {
236
+ "epoch": 32.818532818532816,
237
+ "grad_norm": 1.615881323814392,
238
+ "learning_rate": 3.4362934362934363e-06,
239
+ "loss": 0.0162,
240
+ "step": 8500
241
+ },
242
+ {
243
+ "epoch": 32.818532818532816,
244
+ "eval_runtime": 821.2267,
245
+ "eval_samples_per_second": 214.946,
246
+ "eval_steps_per_second": 13.435,
247
+ "step": 8500
248
+ },
249
+ {
250
+ "epoch": 34.74903474903475,
251
+ "grad_norm": 1.6435213088989258,
252
+ "learning_rate": 3.0501930501930503e-06,
253
+ "loss": 0.0158,
254
+ "step": 9000
255
+ },
256
+ {
257
+ "epoch": 34.74903474903475,
258
+ "eval_runtime": 819.0429,
259
+ "eval_samples_per_second": 215.519,
260
+ "eval_steps_per_second": 13.471,
261
+ "step": 9000
262
+ },
263
+ {
264
+ "epoch": 36.67953667953668,
265
+ "grad_norm": 1.7934831380844116,
266
+ "learning_rate": 2.6640926640926647e-06,
267
+ "loss": 0.0155,
268
+ "step": 9500
269
+ },
270
+ {
271
+ "epoch": 36.67953667953668,
272
+ "eval_runtime": 820.1169,
273
+ "eval_samples_per_second": 215.236,
274
+ "eval_steps_per_second": 13.453,
275
+ "step": 9500
276
+ },
277
+ {
278
+ "epoch": 38.61003861003861,
279
+ "grad_norm": 1.1239484548568726,
280
+ "learning_rate": 2.2779922779922782e-06,
281
+ "loss": 0.0152,
282
+ "step": 10000
283
+ },
284
+ {
285
+ "epoch": 38.61003861003861,
286
+ "eval_runtime": 821.2669,
287
+ "eval_samples_per_second": 214.935,
288
+ "eval_steps_per_second": 13.434,
289
+ "step": 10000
290
+ },
291
+ {
292
+ "epoch": 40.54054054054054,
293
+ "grad_norm": 1.256516456604004,
294
+ "learning_rate": 1.8918918918918922e-06,
295
+ "loss": 0.015,
296
+ "step": 10500
297
+ },
298
+ {
299
+ "epoch": 40.54054054054054,
300
+ "eval_runtime": 818.9208,
301
+ "eval_samples_per_second": 215.551,
302
+ "eval_steps_per_second": 13.473,
303
+ "step": 10500
304
+ },
305
+ {
306
+ "epoch": 42.47104247104247,
307
+ "grad_norm": 0.4177967607975006,
308
+ "learning_rate": 1.505791505791506e-06,
309
+ "loss": 0.0148,
310
+ "step": 11000
311
+ },
312
+ {
313
+ "epoch": 42.47104247104247,
314
+ "eval_runtime": 823.9199,
315
+ "eval_samples_per_second": 214.243,
316
+ "eval_steps_per_second": 13.391,
317
+ "step": 11000
318
+ },
319
+ {
320
+ "epoch": 44.4015444015444,
321
+ "grad_norm": 0.3761753737926483,
322
+ "learning_rate": 1.1196911196911197e-06,
323
+ "loss": 0.0146,
324
+ "step": 11500
325
+ },
326
+ {
327
+ "epoch": 44.4015444015444,
328
+ "eval_runtime": 821.1987,
329
+ "eval_samples_per_second": 214.953,
330
+ "eval_steps_per_second": 13.435,
331
+ "step": 11500
332
+ },
333
+ {
334
+ "epoch": 46.33204633204633,
335
+ "grad_norm": 0.4973774552345276,
336
+ "learning_rate": 7.335907335907337e-07,
337
+ "loss": 0.0145,
338
+ "step": 12000
339
+ },
340
+ {
341
+ "epoch": 46.33204633204633,
342
+ "eval_runtime": 821.7593,
343
+ "eval_samples_per_second": 214.806,
344
+ "eval_steps_per_second": 13.426,
345
+ "step": 12000
346
+ },
347
+ {
348
+ "epoch": 48.262548262548265,
349
+ "grad_norm": 0.3773488998413086,
350
+ "learning_rate": 3.474903474903475e-07,
351
+ "loss": 0.0144,
352
+ "step": 12500
353
+ },
354
+ {
355
+ "epoch": 48.262548262548265,
356
+ "eval_runtime": 824.3743,
357
+ "eval_samples_per_second": 214.125,
358
+ "eval_steps_per_second": 13.383,
359
+ "step": 12500
360
+ },
361
+ {
362
+ "epoch": 50.0,
363
+ "step": 12950,
364
+ "total_flos": 5.544539360447693e+19,
365
+ "train_loss": 0.023562463738283135,
366
+ "train_runtime": 98807.7466,
367
+ "train_samples_per_second": 267.972,
368
+ "train_steps_per_second": 0.131
369
+ }
370
+ ],
371
+ "logging_steps": 500,
372
+ "max_steps": 12950,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 50,
375
+ "save_steps": 100,
376
+ "total_flos": 5.544539360447693e+19,
377
+ "train_batch_size": 2048,
378
+ "trial_name": null,
379
+ "trial_params": null
380
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0942d68f90d6569eea9def5463bf1355978cdba23a55c8aa7a19449584d203d3
3
+ size 4539