k4black commited on
Commit
f69a776
1 Parent(s): 104d921

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "_name_or_path": "roberta-large",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 1024,
13
- "id2label": {
14
- "0": "entailment",
15
- "1": "neutral",
16
- "2": "contradiction"
17
- },
18
- "initializer_range": 0.02,
19
- "intermediate_size": 4096,
20
- "label2id": {
21
- "contradiction": 2,
22
- "entailment": 0,
23
- "neutral": 1
24
- },
25
- "layer_norm_eps": 1e-05,
26
- "max_position_embeddings": 514,
27
- "model_type": "roberta",
28
- "num_attention_heads": 16,
29
- "num_hidden_layers": 24,
30
- "pad_token_id": 1,
31
- "position_embedding_type": "absolute",
32
- "problem_type": "single_label_classification",
33
- "torch_dtype": "float32",
34
- "transformers_version": "4.27.1",
35
- "type_vocab_size": 1,
36
- "use_cache": true,
37
- "vocab_size": 50265
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43b746406019bd322df48fd0f5832641bf2c23f192d1ba2c1bcf2cc6ec4661ff
3
- size 2843228158
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c70a2b61096d5fd16d601664f84ccd8cb72633596aa2bd5105a81863e667c6b3
3
- size 1421588461
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b11b41a45679a6cc286e868bec1372008710d641cbd28a995ea530dff638bd50
3
- size 14503
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad34206adf67443db75af07189b4384fe36c9a4922d3cd2b4375c9ae9591e609
3
- size 559
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce44e4c1c23e749cd9382e42f9e5375956ba931527c483dc2fade7c594fb4a25
3
- size 623
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<s>",
4
- "cls_token": "<s>",
5
- "eos_token": "</s>",
6
- "errors": "replace",
7
- "mask_token": "<mask>",
8
- "model_max_length": 512,
9
- "pad_token": "<pad>",
10
- "sep_token": "</s>",
11
- "special_tokens_map_file": null,
12
- "tokenizer_class": "RobertaTokenizer",
13
- "trim_offsets": true,
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,288 +0,0 @@
1
- {
2
- "best_metric": 0.9274853061519247,
3
- "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-base/checkpoint-6000",
4
- "epoch": 0.7921714818266542,
5
- "global_step": 6800,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.05,
12
- "learning_rate": 3.082298136645963e-06,
13
- "loss": 0.9995,
14
- "step": 400
15
- },
16
- {
17
- "epoch": 0.05,
18
- "eval_accuracy": 0.8464742938427149,
19
- "eval_f1": 0.8436602565292818,
20
- "eval_loss": 0.42355620861053467,
21
- "eval_runtime": 9.7799,
22
- "eval_samples_per_second": 1006.353,
23
- "eval_steps_per_second": 15.747,
24
- "step": 400
25
- },
26
- {
27
- "epoch": 0.09,
28
- "learning_rate": 6.187888198757764e-06,
29
- "loss": 0.4089,
30
- "step": 800
31
- },
32
- {
33
- "epoch": 0.09,
34
- "eval_accuracy": 0.8933143669985776,
35
- "eval_f1": 0.8925604380368285,
36
- "eval_loss": 0.2960757315158844,
37
- "eval_runtime": 9.9483,
38
- "eval_samples_per_second": 989.314,
39
- "eval_steps_per_second": 15.48,
40
- "step": 800
41
- },
42
- {
43
- "epoch": 0.14,
44
- "learning_rate": 9.285714285714288e-06,
45
- "loss": 0.3681,
46
- "step": 1200
47
- },
48
- {
49
- "epoch": 0.14,
50
- "eval_accuracy": 0.8923999187157082,
51
- "eval_f1": 0.8914399447140089,
52
- "eval_loss": 0.2979675531387329,
53
- "eval_runtime": 11.3415,
54
- "eval_samples_per_second": 867.789,
55
- "eval_steps_per_second": 13.578,
56
- "step": 1200
57
- },
58
- {
59
- "epoch": 0.19,
60
- "learning_rate": 9.874100719424462e-06,
61
- "loss": 0.3467,
62
- "step": 1600
63
- },
64
- {
65
- "epoch": 0.19,
66
- "eval_accuracy": 0.89900426742532,
67
- "eval_f1": 0.8976972154704725,
68
- "eval_loss": 0.28719303011894226,
69
- "eval_runtime": 9.8854,
70
- "eval_samples_per_second": 995.605,
71
- "eval_steps_per_second": 15.578,
72
- "step": 1600
73
- },
74
- {
75
- "epoch": 0.23,
76
- "learning_rate": 9.71100392413342e-06,
77
- "loss": 0.324,
78
- "step": 2000
79
- },
80
- {
81
- "epoch": 0.23,
82
- "eval_accuracy": 0.9109937004673847,
83
- "eval_f1": 0.9105643338042256,
84
- "eval_loss": 0.2506079077720642,
85
- "eval_runtime": 9.9303,
86
- "eval_samples_per_second": 991.111,
87
- "eval_steps_per_second": 15.508,
88
- "step": 2000
89
- },
90
- {
91
- "epoch": 0.28,
92
- "learning_rate": 9.547498364944409e-06,
93
- "loss": 0.3222,
94
- "step": 2400
95
- },
96
- {
97
- "epoch": 0.28,
98
- "eval_accuracy": 0.9128225970331233,
99
- "eval_f1": 0.9131812993042298,
100
- "eval_loss": 0.2551884353160858,
101
- "eval_runtime": 9.9844,
102
- "eval_samples_per_second": 985.737,
103
- "eval_steps_per_second": 15.424,
104
- "step": 2400
105
- },
106
- {
107
- "epoch": 0.33,
108
- "learning_rate": 9.383992805755397e-06,
109
- "loss": 0.3138,
110
- "step": 2800
111
- },
112
- {
113
- "epoch": 0.33,
114
- "eval_accuracy": 0.9183092867303394,
115
- "eval_f1": 0.9182731413386942,
116
- "eval_loss": 0.23789888620376587,
117
- "eval_runtime": 10.0071,
118
- "eval_samples_per_second": 983.498,
119
- "eval_steps_per_second": 15.389,
120
- "step": 2800
121
- },
122
- {
123
- "epoch": 0.37,
124
- "learning_rate": 9.220487246566384e-06,
125
- "loss": 0.3107,
126
- "step": 3200
127
- },
128
- {
129
- "epoch": 0.37,
130
- "eval_accuracy": 0.9155659418817313,
131
- "eval_f1": 0.9151783750171821,
132
- "eval_loss": 0.23962362110614777,
133
- "eval_runtime": 9.9366,
134
- "eval_samples_per_second": 990.484,
135
- "eval_steps_per_second": 15.498,
136
- "step": 3200
137
- },
138
- {
139
- "epoch": 0.42,
140
- "learning_rate": 9.056981687377372e-06,
141
- "loss": 0.304,
142
- "step": 3600
143
- },
144
- {
145
- "epoch": 0.42,
146
- "eval_accuracy": 0.9176996545417598,
147
- "eval_f1": 0.9174168238144101,
148
- "eval_loss": 0.23535017669200897,
149
- "eval_runtime": 9.936,
150
- "eval_samples_per_second": 990.544,
151
- "eval_steps_per_second": 15.499,
152
- "step": 3600
153
- },
154
- {
155
- "epoch": 0.47,
156
- "learning_rate": 8.893476128188358e-06,
157
- "loss": 0.3027,
158
- "step": 4000
159
- },
160
- {
161
- "epoch": 0.47,
162
- "eval_accuracy": 0.9191221296484454,
163
- "eval_f1": 0.9190571565466592,
164
- "eval_loss": 0.23602379858493805,
165
- "eval_runtime": 10.6438,
166
- "eval_samples_per_second": 924.674,
167
- "eval_steps_per_second": 14.469,
168
- "step": 4000
169
- },
170
- {
171
- "epoch": 0.51,
172
- "learning_rate": 8.729970568999347e-06,
173
- "loss": 0.2968,
174
- "step": 4400
175
- },
176
- {
177
- "epoch": 0.51,
178
- "eval_accuracy": 0.9187157081893924,
179
- "eval_f1": 0.9182261319193824,
180
- "eval_loss": 0.23287305235862732,
181
- "eval_runtime": 9.9312,
182
- "eval_samples_per_second": 991.014,
183
- "eval_steps_per_second": 15.507,
184
- "step": 4400
185
- },
186
- {
187
- "epoch": 0.56,
188
- "learning_rate": 8.566465009810335e-06,
189
- "loss": 0.2888,
190
- "step": 4800
191
- },
192
- {
193
- "epoch": 0.56,
194
- "eval_accuracy": 0.9196301564722618,
195
- "eval_f1": 0.9189361658403055,
196
- "eval_loss": 0.24621723592281342,
197
- "eval_runtime": 9.9085,
198
- "eval_samples_per_second": 993.291,
199
- "eval_steps_per_second": 15.542,
200
- "step": 4800
201
- },
202
- {
203
- "epoch": 0.61,
204
- "learning_rate": 8.402959450621321e-06,
205
- "loss": 0.2898,
206
- "step": 5200
207
- },
208
- {
209
- "epoch": 0.61,
210
- "eval_accuracy": 0.9211542369437107,
211
- "eval_f1": 0.9206360860900841,
212
- "eval_loss": 0.23345668613910675,
213
- "eval_runtime": 10.0954,
214
- "eval_samples_per_second": 974.901,
215
- "eval_steps_per_second": 15.254,
216
- "step": 5200
217
- },
218
- {
219
- "epoch": 0.65,
220
- "learning_rate": 8.23945389143231e-06,
221
- "loss": 0.288,
222
- "step": 5600
223
- },
224
- {
225
- "epoch": 0.65,
226
- "eval_accuracy": 0.9222718959561065,
227
- "eval_f1": 0.9220252130752401,
228
- "eval_loss": 0.23495520651340485,
229
- "eval_runtime": 9.9904,
230
- "eval_samples_per_second": 985.146,
231
- "eval_steps_per_second": 15.415,
232
- "step": 5600
233
- },
234
- {
235
- "epoch": 0.7,
236
- "learning_rate": 8.07635709614127e-06,
237
- "loss": 0.2746,
238
- "step": 6000
239
- },
240
- {
241
- "epoch": 0.7,
242
- "eval_accuracy": 0.9277585856533225,
243
- "eval_f1": 0.9274853061519247,
244
- "eval_loss": 0.22077496349811554,
245
- "eval_runtime": 9.9415,
246
- "eval_samples_per_second": 989.988,
247
- "eval_steps_per_second": 15.491,
248
- "step": 6000
249
- },
250
- {
251
- "epoch": 0.75,
252
- "learning_rate": 7.912851536952257e-06,
253
- "loss": 0.2756,
254
- "step": 6400
255
- },
256
- {
257
- "epoch": 0.75,
258
- "eval_accuracy": 0.9215606584027637,
259
- "eval_f1": 0.9208559714907353,
260
- "eval_loss": 0.23040013015270233,
261
- "eval_runtime": 9.916,
262
- "eval_samples_per_second": 992.538,
263
- "eval_steps_per_second": 15.53,
264
- "step": 6400
265
- },
266
- {
267
- "epoch": 0.79,
268
- "learning_rate": 7.749345977763243e-06,
269
- "loss": 0.272,
270
- "step": 6800
271
- },
272
- {
273
- "epoch": 0.79,
274
- "eval_accuracy": 0.9237959764275554,
275
- "eval_f1": 0.9236808650336354,
276
- "eval_loss": 0.2243068963289261,
277
- "eval_runtime": 11.5455,
278
- "eval_samples_per_second": 852.451,
279
- "eval_steps_per_second": 13.338,
280
- "step": 6800
281
- }
282
- ],
283
- "max_steps": 25752,
284
- "num_train_epochs": 3,
285
- "total_flos": 4.208639555132851e+16,
286
- "trial_name": null,
287
- "trial_params": null
288
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c226ff8a79fc8c43647efdd29cbdbc642d1c7fa8c770b0a81535e6a61aee411
3
- size 3695
 
 
 
 
last-checkpoint/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c70a2b61096d5fd16d601664f84ccd8cb72633596aa2bd5105a81863e667c6b3
3
  size 1421588461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d27bd0c8fce811ee26a2630d332b495ceabc992b88ed7e0afd7cdda82dc97b4
3
  size 1421588461