k4black commited on
Commit
aaf3c61
1 Parent(s): c20ed3b

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,39 +0,0 @@
1
- {
2
- "_name_or_path": "textattack/roberta-base-MNLI",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "finetuning_task": "mnli",
11
- "hidden_act": "gelu",
12
- "hidden_dropout_prob": 0.1,
13
- "hidden_size": 768,
14
- "id2label": {
15
- "0": "entailment",
16
- "1": "neutral",
17
- "2": "contradiction"
18
- },
19
- "initializer_range": 0.02,
20
- "intermediate_size": 3072,
21
- "label2id": {
22
- "contradiction": 2,
23
- "entailment": 0,
24
- "neutral": 1
25
- },
26
- "layer_norm_eps": 1e-05,
27
- "max_position_embeddings": 514,
28
- "model_type": "roberta",
29
- "num_attention_heads": 12,
30
- "num_hidden_layers": 12,
31
- "pad_token_id": 1,
32
- "position_embedding_type": "absolute",
33
- "problem_type": "single_label_classification",
34
- "torch_dtype": "float32",
35
- "transformers_version": "4.27.1",
36
- "type_vocab_size": 1,
37
- "use_cache": true,
38
- "vocab_size": 50265
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a10e9d1b17c22c8b81855d9e4b54c63fbb2c987aee21637eb45ddab0c9f1d31
3
- size 997349515
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b4f713ac5da9d171cb132cf2c74cf0f0fa401b25773e9acb38e22d818270f5c
3
- size 498663405
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc559e8f3be7931753acd7e14135ef3c355c45429768068a18a2cdbd924ae84e
3
- size 14503
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a96da2617fd1df0df7952478b447c0cfa383a6f4b13ae4ecb6da8a8b8fde5bdf
3
- size 559
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:601c2aea291c4f39ce9473e44c77e21714bd96f90420b8d09477bbcfeffc6321
3
- size 623
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,64 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": {
4
- "__type": "AddedToken",
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false
10
- },
11
- "cls_token": {
12
- "__type": "AddedToken",
13
- "content": "<s>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false
18
- },
19
- "eos_token": {
20
- "__type": "AddedToken",
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false
26
- },
27
- "errors": "replace",
28
- "mask_token": {
29
- "__type": "AddedToken",
30
- "content": "<mask>",
31
- "lstrip": true,
32
- "normalized": true,
33
- "rstrip": false,
34
- "single_word": false
35
- },
36
- "model_max_length": 512,
37
- "pad_token": {
38
- "__type": "AddedToken",
39
- "content": "<pad>",
40
- "lstrip": false,
41
- "normalized": true,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- "sep_token": {
46
- "__type": "AddedToken",
47
- "content": "</s>",
48
- "lstrip": false,
49
- "normalized": true,
50
- "rstrip": false,
51
- "single_word": false
52
- },
53
- "special_tokens_map_file": "/home/s5431786/.cache/huggingface/hub/models--textattack--roberta-base-MNLI/snapshots/6f2e633322381bc5897405e417ec531ea3633a3f/special_tokens_map.json",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "trim_offsets": true,
56
- "unk_token": {
57
- "__type": "AddedToken",
58
- "content": "<unk>",
59
- "lstrip": false,
60
- "normalized": true,
61
- "rstrip": false,
62
- "single_word": false
63
- }
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,368 +0,0 @@
1
- {
2
- "best_metric": 0.9114793850238726,
3
- "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/textattack-roberta-base-MNLI-e-snli-classification-nli-base/checkpoint-7600",
4
- "epoch": 1.0251630941286114,
5
- "global_step": 8800,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.05,
12
- "learning_rate": 3.082298136645963e-06,
13
- "loss": 1.5376,
14
- "step": 400
15
- },
16
- {
17
- "epoch": 0.05,
18
- "eval_accuracy": 0.8556187766714083,
19
- "eval_f1": 0.8556462363132852,
20
- "eval_loss": 0.40095046162605286,
21
- "eval_runtime": 4.2759,
22
- "eval_samples_per_second": 2301.749,
23
- "eval_steps_per_second": 36.016,
24
- "step": 400
25
- },
26
- {
27
- "epoch": 0.09,
28
- "learning_rate": 6.187888198757764e-06,
29
- "loss": 0.4352,
30
- "step": 800
31
- },
32
- {
33
- "epoch": 0.09,
34
- "eval_accuracy": 0.8800040642145905,
35
- "eval_f1": 0.8795301912940783,
36
- "eval_loss": 0.3349034786224365,
37
- "eval_runtime": 4.5813,
38
- "eval_samples_per_second": 2148.309,
39
- "eval_steps_per_second": 33.615,
40
- "step": 800
41
- },
42
- {
43
- "epoch": 0.14,
44
- "learning_rate": 9.293478260869566e-06,
45
- "loss": 0.4,
46
- "step": 1200
47
- },
48
- {
49
- "epoch": 0.14,
50
- "eval_accuracy": 0.8853891485470433,
51
- "eval_f1": 0.8850952248341096,
52
- "eval_loss": 0.31801462173461914,
53
- "eval_runtime": 4.595,
54
- "eval_samples_per_second": 2141.913,
55
- "eval_steps_per_second": 33.515,
56
- "step": 1200
57
- },
58
- {
59
- "epoch": 0.19,
60
- "learning_rate": 9.87369195552649e-06,
61
- "loss": 0.3801,
62
- "step": 1600
63
- },
64
- {
65
- "epoch": 0.19,
66
- "eval_accuracy": 0.8920951026214184,
67
- "eval_f1": 0.8917568158937579,
68
- "eval_loss": 0.29749178886413574,
69
- "eval_runtime": 4.426,
70
- "eval_samples_per_second": 2223.668,
71
- "eval_steps_per_second": 34.794,
72
- "step": 1600
73
- },
74
- {
75
- "epoch": 0.23,
76
- "learning_rate": 9.710186396337476e-06,
77
- "loss": 0.3599,
78
- "step": 2000
79
- },
80
- {
81
- "epoch": 0.23,
82
- "eval_accuracy": 0.8955496850233692,
83
- "eval_f1": 0.8950963090954073,
84
- "eval_loss": 0.29492226243019104,
85
- "eval_runtime": 4.2584,
86
- "eval_samples_per_second": 2311.208,
87
- "eval_steps_per_second": 36.164,
88
- "step": 2000
89
- },
90
- {
91
- "epoch": 0.28,
92
- "learning_rate": 9.546680837148464e-06,
93
- "loss": 0.3612,
94
- "step": 2400
95
- },
96
- {
97
- "epoch": 0.28,
98
- "eval_accuracy": 0.8986994513310302,
99
- "eval_f1": 0.8986773852884219,
100
- "eval_loss": 0.2801545560359955,
101
- "eval_runtime": 4.4994,
102
- "eval_samples_per_second": 2187.409,
103
- "eval_steps_per_second": 34.227,
104
- "step": 2400
105
- },
106
- {
107
- "epoch": 0.33,
108
- "learning_rate": 9.38317527795945e-06,
109
- "loss": 0.3519,
110
- "step": 2800
111
- },
112
- {
113
- "epoch": 0.33,
114
- "eval_accuracy": 0.8979882137776874,
115
- "eval_f1": 0.8976730067239987,
116
- "eval_loss": 0.2762671411037445,
117
- "eval_runtime": 4.4383,
118
- "eval_samples_per_second": 2217.515,
119
- "eval_steps_per_second": 34.698,
120
- "step": 2800
121
- },
122
- {
123
- "epoch": 0.37,
124
- "learning_rate": 9.219669718770439e-06,
125
- "loss": 0.349,
126
- "step": 3200
127
- },
128
- {
129
- "epoch": 0.37,
130
- "eval_accuracy": 0.9022556390977443,
131
- "eval_f1": 0.9020033896210373,
132
- "eval_loss": 0.27657467126846313,
133
- "eval_runtime": 4.5933,
134
- "eval_samples_per_second": 2142.703,
135
- "eval_steps_per_second": 33.527,
136
- "step": 3200
137
- },
138
- {
139
- "epoch": 0.42,
140
- "learning_rate": 9.056164159581427e-06,
141
- "loss": 0.3432,
142
- "step": 3600
143
- },
144
- {
145
- "epoch": 0.42,
146
- "eval_accuracy": 0.9001219264377159,
147
- "eval_f1": 0.8999893300639911,
148
- "eval_loss": 0.2747589945793152,
149
- "eval_runtime": 4.3172,
150
- "eval_samples_per_second": 2279.706,
151
- "eval_steps_per_second": 35.671,
152
- "step": 3600
153
- },
154
- {
155
- "epoch": 0.47,
156
- "learning_rate": 8.892658600392414e-06,
157
- "loss": 0.3435,
158
- "step": 4000
159
- },
160
- {
161
- "epoch": 0.47,
162
- "eval_accuracy": 0.9051005893111156,
163
- "eval_f1": 0.9051215853221407,
164
- "eval_loss": 0.27019360661506653,
165
- "eval_runtime": 4.5917,
166
- "eval_samples_per_second": 2143.426,
167
- "eval_steps_per_second": 33.539,
168
- "step": 4000
169
- },
170
- {
171
- "epoch": 0.51,
172
- "learning_rate": 8.729153041203402e-06,
173
- "loss": 0.3352,
174
- "step": 4400
175
- },
176
- {
177
- "epoch": 0.51,
178
- "eval_accuracy": 0.9038813249339566,
179
- "eval_f1": 0.9034061899733951,
180
- "eval_loss": 0.2727869153022766,
181
- "eval_runtime": 4.4338,
182
- "eval_samples_per_second": 2219.754,
183
- "eval_steps_per_second": 34.733,
184
- "step": 4400
185
- },
186
- {
187
- "epoch": 0.56,
188
- "learning_rate": 8.565647482014388e-06,
189
- "loss": 0.3277,
190
- "step": 4800
191
- },
192
- {
193
- "epoch": 0.56,
194
- "eval_accuracy": 0.9042877463930096,
195
- "eval_f1": 0.9039391226328383,
196
- "eval_loss": 0.2633576989173889,
197
- "eval_runtime": 4.474,
198
- "eval_samples_per_second": 2199.802,
199
- "eval_steps_per_second": 34.421,
200
- "step": 4800
201
- },
202
- {
203
- "epoch": 0.61,
204
- "learning_rate": 8.402550686723349e-06,
205
- "loss": 0.3307,
206
- "step": 5200
207
- },
208
- {
209
- "epoch": 0.61,
210
- "eval_accuracy": 0.9057102214996952,
211
- "eval_f1": 0.9050187432087493,
212
- "eval_loss": 0.26230183243751526,
213
- "eval_runtime": 4.1881,
214
- "eval_samples_per_second": 2349.966,
215
- "eval_steps_per_second": 36.77,
216
- "step": 5200
217
- },
218
- {
219
- "epoch": 0.65,
220
- "learning_rate": 8.239045127534336e-06,
221
- "loss": 0.3247,
222
- "step": 5600
223
- },
224
- {
225
- "epoch": 0.65,
226
- "eval_accuracy": 0.9063198536882747,
227
- "eval_f1": 0.9059067839331951,
228
- "eval_loss": 0.26849961280822754,
229
- "eval_runtime": 4.4315,
230
- "eval_samples_per_second": 2220.93,
231
- "eval_steps_per_second": 34.751,
232
- "step": 5600
233
- },
234
- {
235
- "epoch": 0.7,
236
- "learning_rate": 8.075539568345324e-06,
237
- "loss": 0.3175,
238
- "step": 6000
239
- },
240
- {
241
- "epoch": 0.7,
242
- "eval_accuracy": 0.90835196098354,
243
- "eval_f1": 0.9081309299681798,
244
- "eval_loss": 0.25888413190841675,
245
- "eval_runtime": 4.4315,
246
- "eval_samples_per_second": 2220.898,
247
- "eval_steps_per_second": 34.751,
248
- "step": 6000
249
- },
250
- {
251
- "epoch": 0.75,
252
- "learning_rate": 7.912034009156312e-06,
253
- "loss": 0.3144,
254
- "step": 6400
255
- },
256
- {
257
- "epoch": 0.75,
258
- "eval_accuracy": 0.9092664092664092,
259
- "eval_f1": 0.9087749519181698,
260
- "eval_loss": 0.2586296796798706,
261
- "eval_runtime": 4.4165,
262
- "eval_samples_per_second": 2228.443,
263
- "eval_steps_per_second": 34.869,
264
- "step": 6400
265
- },
266
- {
267
- "epoch": 0.79,
268
- "learning_rate": 7.7485284499673e-06,
269
- "loss": 0.3102,
270
- "step": 6800
271
- },
272
- {
273
- "epoch": 0.79,
274
- "eval_accuracy": 0.9089615931721194,
275
- "eval_f1": 0.9088222146205912,
276
- "eval_loss": 0.2547251582145691,
277
- "eval_runtime": 4.5049,
278
- "eval_samples_per_second": 2184.723,
279
- "eval_steps_per_second": 34.185,
280
- "step": 6800
281
- },
282
- {
283
- "epoch": 0.84,
284
- "learning_rate": 7.585022890778288e-06,
285
- "loss": 0.3223,
286
- "step": 7200
287
- },
288
- {
289
- "epoch": 0.84,
290
- "eval_accuracy": 0.909571225360699,
291
- "eval_f1": 0.9093158037084038,
292
- "eval_loss": 0.25255095958709717,
293
- "eval_runtime": 4.2651,
294
- "eval_samples_per_second": 2307.581,
295
- "eval_steps_per_second": 36.107,
296
- "step": 7200
297
- },
298
- {
299
- "epoch": 0.89,
300
- "learning_rate": 7.421517331589274e-06,
301
- "loss": 0.3166,
302
- "step": 7600
303
- },
304
- {
305
- "epoch": 0.89,
306
- "eval_accuracy": 0.9118065433854907,
307
- "eval_f1": 0.9114793850238726,
308
- "eval_loss": 0.24902105331420898,
309
- "eval_runtime": 4.4308,
310
- "eval_samples_per_second": 2221.264,
311
- "eval_steps_per_second": 34.757,
312
- "step": 7600
313
- },
314
- {
315
- "epoch": 0.93,
316
- "learning_rate": 7.258011772400262e-06,
317
- "loss": 0.3124,
318
- "step": 8000
319
- },
320
- {
321
- "epoch": 0.93,
322
- "eval_accuracy": 0.9106888843730949,
323
- "eval_f1": 0.9105538161740804,
324
- "eval_loss": 0.2503485083580017,
325
- "eval_runtime": 4.4709,
326
- "eval_samples_per_second": 2201.33,
327
- "eval_steps_per_second": 34.445,
328
- "step": 8000
329
- },
330
- {
331
- "epoch": 0.98,
332
- "learning_rate": 7.0949149771092216e-06,
333
- "loss": 0.3053,
334
- "step": 8400
335
- },
336
- {
337
- "epoch": 0.98,
338
- "eval_accuracy": 0.9100792521845154,
339
- "eval_f1": 0.9098862073513235,
340
- "eval_loss": 0.2452017217874527,
341
- "eval_runtime": 4.4231,
342
- "eval_samples_per_second": 2225.145,
343
- "eval_steps_per_second": 34.817,
344
- "step": 8400
345
- },
346
- {
347
- "epoch": 1.03,
348
- "learning_rate": 6.93140941792021e-06,
349
- "loss": 0.2908,
350
- "step": 8800
351
- },
352
- {
353
- "epoch": 1.03,
354
- "eval_accuracy": 0.911908148750254,
355
- "eval_f1": 0.9113463195976007,
356
- "eval_loss": 0.2575433552265167,
357
- "eval_runtime": 4.383,
358
- "eval_samples_per_second": 2245.491,
359
- "eval_steps_per_second": 35.136,
360
- "step": 8800
361
- }
362
- ],
363
- "max_steps": 25752,
364
- "num_train_epochs": 3,
365
- "total_flos": 1.536997071500478e+16,
366
- "trial_name": null,
367
- "trial_params": null
368
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1902e90a9e14b843b5170a79e45072d9242111eb8136de151581eceb0f4c7e52
3
- size 3759
 
 
 
 
last-checkpoint/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b4f713ac5da9d171cb132cf2c74cf0f0fa401b25773e9acb38e22d818270f5c
3
  size 498663405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f1b2bc7c2c05add017e0a368f237527ae4bdd70f14d7023a384e1ec6d2289c
3
  size 498663405