k4black commited on
Commit
7d7afb0
1 Parent(s): e193b61

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "_name_or_path": "roberta-base",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "entailment",
15
- "1": "neutral",
16
- "2": "contradiction"
17
- },
18
- "initializer_range": 0.02,
19
- "intermediate_size": 3072,
20
- "label2id": {
21
- "contradiction": 2,
22
- "entailment": 0,
23
- "neutral": 1
24
- },
25
- "layer_norm_eps": 1e-05,
26
- "max_position_embeddings": 514,
27
- "model_type": "roberta",
28
- "num_attention_heads": 12,
29
- "num_hidden_layers": 12,
30
- "pad_token_id": 1,
31
- "position_embedding_type": "absolute",
32
- "problem_type": "single_label_classification",
33
- "torch_dtype": "float32",
34
- "transformers_version": "4.27.1",
35
- "type_vocab_size": 1,
36
- "use_cache": true,
37
- "vocab_size": 50265
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7978bf8054801da6f4c756ac3ee82d92c0f486af86b40dc0bf28323cf03296
3
- size 997349515
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:19f88d4ac00fa7feb29c8dd174036ce80b27f550f2e7dcaa40ee80769e3c83a2
3
- size 498663405
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc559e8f3be7931753acd7e14135ef3c355c45429768068a18a2cdbd924ae84e
3
- size 14503
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ac5f79294afe5b58702f8cfd595843c97cf8ecf3eb18e0b1f699ed162ad0894
3
- size 559
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac05aa2ac2c93c43b69f88d2a334721a2027f6a3102b80f21e4e6745984e25d
3
- size 623
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<s>",
4
- "cls_token": "<s>",
5
- "eos_token": "</s>",
6
- "errors": "replace",
7
- "mask_token": "<mask>",
8
- "model_max_length": 512,
9
- "pad_token": "<pad>",
10
- "sep_token": "</s>",
11
- "special_tokens_map_file": null,
12
- "tokenizer_class": "RobertaTokenizer",
13
- "trim_offsets": true,
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,368 +0,0 @@
1
- {
2
- "best_metric": 0.9120778013038088,
3
- "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-base-e-snli-classification-nli-base/checkpoint-8000",
4
- "epoch": 1.0251630941286114,
5
- "global_step": 8800,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.05,
12
- "learning_rate": 3.0900621118012425e-06,
13
- "loss": 1.0317,
14
- "step": 400
15
- },
16
- {
17
- "epoch": 0.05,
18
- "eval_accuracy": 0.7803292013818329,
19
- "eval_f1": 0.7770741838405094,
20
- "eval_loss": 0.5733650326728821,
21
- "eval_runtime": 5.0438,
22
- "eval_samples_per_second": 1951.29,
23
- "eval_steps_per_second": 30.532,
24
- "step": 400
25
- },
26
- {
27
- "epoch": 0.09,
28
- "learning_rate": 6.187888198757764e-06,
29
- "loss": 0.544,
30
- "step": 800
31
- },
32
- {
33
- "epoch": 0.09,
34
- "eval_accuracy": 0.855517171306645,
35
- "eval_f1": 0.8547612368327439,
36
- "eval_loss": 0.3994133174419403,
37
- "eval_runtime": 4.3487,
38
- "eval_samples_per_second": 2263.221,
39
- "eval_steps_per_second": 35.413,
40
- "step": 800
41
- },
42
- {
43
- "epoch": 0.14,
44
- "learning_rate": 9.293478260869566e-06,
45
- "loss": 0.4604,
46
- "step": 1200
47
- },
48
- {
49
- "epoch": 0.14,
50
- "eval_accuracy": 0.8687258687258688,
51
- "eval_f1": 0.8681066915368941,
52
- "eval_loss": 0.3491659164428711,
53
- "eval_runtime": 4.2453,
54
- "eval_samples_per_second": 2318.326,
55
- "eval_steps_per_second": 36.275,
56
- "step": 1200
57
- },
58
- {
59
- "epoch": 0.19,
60
- "learning_rate": 9.87369195552649e-06,
61
- "loss": 0.4235,
62
- "step": 1600
63
- },
64
- {
65
- "epoch": 0.19,
66
- "eval_accuracy": 0.8776671408250356,
67
- "eval_f1": 0.8764133428741641,
68
- "eval_loss": 0.3322618901729584,
69
- "eval_runtime": 4.4615,
70
- "eval_samples_per_second": 2205.99,
71
- "eval_steps_per_second": 34.518,
72
- "step": 1600
73
- },
74
- {
75
- "epoch": 0.23,
76
- "learning_rate": 9.710186396337476e-06,
77
- "loss": 0.3934,
78
- "step": 2000
79
- },
80
- {
81
- "epoch": 0.23,
82
- "eval_accuracy": 0.884068278805121,
83
- "eval_f1": 0.8831390301360367,
84
- "eval_loss": 0.32249000668525696,
85
- "eval_runtime": 4.2395,
86
- "eval_samples_per_second": 2321.481,
87
- "eval_steps_per_second": 36.325,
88
- "step": 2000
89
- },
90
- {
91
- "epoch": 0.28,
92
- "learning_rate": 9.546680837148464e-06,
93
- "loss": 0.3863,
94
- "step": 2400
95
- },
96
- {
97
- "epoch": 0.28,
98
- "eval_accuracy": 0.8872180451127819,
99
- "eval_f1": 0.887543528243619,
100
- "eval_loss": 0.3085917532444,
101
- "eval_runtime": 4.3491,
102
- "eval_samples_per_second": 2263.01,
103
- "eval_steps_per_second": 35.41,
104
- "step": 2400
105
- },
106
- {
107
- "epoch": 0.33,
108
- "learning_rate": 9.38317527795945e-06,
109
- "loss": 0.3767,
110
- "step": 2800
111
- },
112
- {
113
- "epoch": 0.33,
114
- "eval_accuracy": 0.8897581792318634,
115
- "eval_f1": 0.8891603343028002,
116
- "eval_loss": 0.29720813035964966,
117
- "eval_runtime": 4.3717,
118
- "eval_samples_per_second": 2251.299,
119
- "eval_steps_per_second": 35.227,
120
- "step": 2800
121
- },
122
- {
123
- "epoch": 0.37,
124
- "learning_rate": 9.219669718770439e-06,
125
- "loss": 0.3726,
126
- "step": 3200
127
- },
128
- {
129
- "epoch": 0.37,
130
- "eval_accuracy": 0.8936191830928673,
131
- "eval_f1": 0.8931996056385407,
132
- "eval_loss": 0.29100456833839417,
133
- "eval_runtime": 4.4246,
134
- "eval_samples_per_second": 2224.393,
135
- "eval_steps_per_second": 34.806,
136
- "step": 3200
137
- },
138
- {
139
- "epoch": 0.42,
140
- "learning_rate": 9.056164159581427e-06,
141
- "loss": 0.3624,
142
- "step": 3600
143
- },
144
- {
145
- "epoch": 0.42,
146
- "eval_accuracy": 0.8937207884576306,
147
- "eval_f1": 0.8933970762962105,
148
- "eval_loss": 0.2934032082557678,
149
- "eval_runtime": 4.3374,
150
- "eval_samples_per_second": 2269.111,
151
- "eval_steps_per_second": 35.505,
152
- "step": 3600
153
- },
154
- {
155
- "epoch": 0.47,
156
- "learning_rate": 8.892658600392414e-06,
157
- "loss": 0.361,
158
- "step": 4000
159
- },
160
- {
161
- "epoch": 0.47,
162
- "eval_accuracy": 0.8989026620605568,
163
- "eval_f1": 0.898874821280442,
164
- "eval_loss": 0.28308674693107605,
165
- "eval_runtime": 4.2135,
166
- "eval_samples_per_second": 2335.846,
167
- "eval_steps_per_second": 36.55,
168
- "step": 4000
169
- },
170
- {
171
- "epoch": 0.51,
172
- "learning_rate": 8.729153041203402e-06,
173
- "loss": 0.3553,
174
- "step": 4400
175
- },
176
- {
177
- "epoch": 0.51,
178
- "eval_accuracy": 0.8993090835196098,
179
- "eval_f1": 0.898521939461863,
180
- "eval_loss": 0.29054638743400574,
181
- "eval_runtime": 4.2186,
182
- "eval_samples_per_second": 2332.993,
183
- "eval_steps_per_second": 36.505,
184
- "step": 4400
185
- },
186
- {
187
- "epoch": 0.56,
188
- "learning_rate": 8.565647482014388e-06,
189
- "loss": 0.3451,
190
- "step": 4800
191
- },
192
- {
193
- "epoch": 0.56,
194
- "eval_accuracy": 0.9023572444625076,
195
- "eval_f1": 0.901894187198396,
196
- "eval_loss": 0.27245137095451355,
197
- "eval_runtime": 4.2088,
198
- "eval_samples_per_second": 2338.443,
199
- "eval_steps_per_second": 36.59,
200
- "step": 4800
201
- },
202
- {
203
- "epoch": 0.61,
204
- "learning_rate": 8.402141922825377e-06,
205
- "loss": 0.3475,
206
- "step": 5200
207
- },
208
- {
209
- "epoch": 0.61,
210
- "eval_accuracy": 0.9051005893111156,
211
- "eval_f1": 0.9046245607228055,
212
- "eval_loss": 0.2711792588233948,
213
- "eval_runtime": 4.2086,
214
- "eval_samples_per_second": 2338.56,
215
- "eval_steps_per_second": 36.592,
216
- "step": 5200
217
- },
218
- {
219
- "epoch": 0.65,
220
- "learning_rate": 8.238636363636365e-06,
221
- "loss": 0.3398,
222
- "step": 5600
223
- },
224
- {
225
- "epoch": 0.65,
226
- "eval_accuracy": 0.9027636659215607,
227
- "eval_f1": 0.9024225307003263,
228
- "eval_loss": 0.2787366211414337,
229
- "eval_runtime": 4.3854,
230
- "eval_samples_per_second": 2244.277,
231
- "eval_steps_per_second": 35.117,
232
- "step": 5600
233
- },
234
- {
235
- "epoch": 0.7,
236
- "learning_rate": 8.075130804447351e-06,
237
- "loss": 0.3322,
238
- "step": 6000
239
- },
240
- {
241
- "epoch": 0.7,
242
- "eval_accuracy": 0.9045925624872994,
243
- "eval_f1": 0.9043146299021979,
244
- "eval_loss": 0.2696707248687744,
245
- "eval_runtime": 4.2347,
246
- "eval_samples_per_second": 2324.111,
247
- "eval_steps_per_second": 36.366,
248
- "step": 6000
249
- },
250
- {
251
- "epoch": 0.75,
252
- "learning_rate": 7.912034009156312e-06,
253
- "loss": 0.3288,
254
- "step": 6400
255
- },
256
- {
257
- "epoch": 0.75,
258
- "eval_accuracy": 0.9013411908148751,
259
- "eval_f1": 0.90060674757625,
260
- "eval_loss": 0.2721998691558838,
261
- "eval_runtime": 4.1964,
262
- "eval_samples_per_second": 2345.34,
263
- "eval_steps_per_second": 36.698,
264
- "step": 6400
265
- },
266
- {
267
- "epoch": 0.79,
268
- "learning_rate": 7.7485284499673e-06,
269
- "loss": 0.324,
270
- "step": 6800
271
- },
272
- {
273
- "epoch": 0.79,
274
- "eval_accuracy": 0.9066246697825645,
275
- "eval_f1": 0.9065691454907331,
276
- "eval_loss": 0.26766958832740784,
277
- "eval_runtime": 4.5081,
278
- "eval_samples_per_second": 2183.165,
279
- "eval_steps_per_second": 34.16,
280
- "step": 6800
281
- },
282
- {
283
- "epoch": 0.84,
284
- "learning_rate": 7.585022890778288e-06,
285
- "loss": 0.3335,
286
- "step": 7200
287
- },
288
- {
289
- "epoch": 0.84,
290
- "eval_accuracy": 0.9077423287949604,
291
- "eval_f1": 0.907479569921263,
292
- "eval_loss": 0.2629023790359497,
293
- "eval_runtime": 4.168,
294
- "eval_samples_per_second": 2361.347,
295
- "eval_steps_per_second": 36.949,
296
- "step": 7200
297
- },
298
- {
299
- "epoch": 0.89,
300
- "learning_rate": 7.421517331589274e-06,
301
- "loss": 0.3309,
302
- "step": 7600
303
- },
304
- {
305
- "epoch": 0.89,
306
- "eval_accuracy": 0.9061166429587483,
307
- "eval_f1": 0.9058061283874342,
308
- "eval_loss": 0.2577354609966278,
309
- "eval_runtime": 4.4559,
310
- "eval_samples_per_second": 2208.773,
311
- "eval_steps_per_second": 34.561,
312
- "step": 7600
313
- },
314
- {
315
- "epoch": 0.93,
316
- "learning_rate": 7.258011772400262e-06,
317
- "loss": 0.3236,
318
- "step": 8000
319
- },
320
- {
321
- "epoch": 0.93,
322
- "eval_accuracy": 0.9121113594797805,
323
- "eval_f1": 0.9120778013038088,
324
- "eval_loss": 0.25613412261009216,
325
- "eval_runtime": 4.1922,
326
- "eval_samples_per_second": 2347.706,
327
- "eval_steps_per_second": 36.735,
328
- "step": 8000
329
- },
330
- {
331
- "epoch": 0.98,
332
- "learning_rate": 7.09450621321125e-06,
333
- "loss": 0.3183,
334
- "step": 8400
335
- },
336
- {
337
- "epoch": 0.98,
338
- "eval_accuracy": 0.908758382442593,
339
- "eval_f1": 0.9083914158392551,
340
- "eval_loss": 0.2555808424949646,
341
- "eval_runtime": 4.2479,
342
- "eval_samples_per_second": 2316.917,
343
- "eval_steps_per_second": 36.253,
344
- "step": 8400
345
- },
346
- {
347
- "epoch": 1.03,
348
- "learning_rate": 6.931000654022237e-06,
349
- "loss": 0.3022,
350
- "step": 8800
351
- },
352
- {
353
- "epoch": 1.03,
354
- "eval_accuracy": 0.906421459053038,
355
- "eval_f1": 0.9056135727779063,
356
- "eval_loss": 0.2667880356311798,
357
- "eval_runtime": 4.4383,
358
- "eval_samples_per_second": 2217.519,
359
- "eval_steps_per_second": 34.698,
360
- "step": 8800
361
- }
362
- ],
363
- "max_steps": 25752,
364
- "num_train_epochs": 3,
365
- "total_flos": 1.536997071500478e+16,
366
- "trial_name": null,
367
- "trial_params": null
368
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:66cc9af448b4d5ffdf0aedbd94ba171b260816026a211b6bf3012033c4b680bb
3
- size 3695
 
 
 
 
last-checkpoint/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19f88d4ac00fa7feb29c8dd174036ce80b27f550f2e7dcaa40ee80769e3c83a2
3
  size 498663405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85cf3d22c5ddf332ef20a787904d9f9e91c003a948fe68bae406c94abaa9515d
3
  size 498663405