abhishek HF staff commited on
Commit
ed49a0c
1 Parent(s): bb399f7

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-regression
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - autotrain-m1b56-8ger6/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Regression
15
+
16
+ ## Validation Metrics
17
+ loss: 0.3305796980857849
18
+
19
+ mse: 0.3305796980857849
20
+
21
+ mae: 0.44259902834892273
22
+
23
+ r2: 0.7003744220771353
24
+
25
+ rmse: 0.5749605894088745
26
+
27
+ explained_variance: 0.700911283493042
checkpoint-3462/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "FacebookAI/roberta-base",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "target"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "target": 0
21
+ },
22
+ "layer_norm_eps": 1e-05,
23
+ "max_position_embeddings": 514,
24
+ "model_type": "roberta",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 1,
28
+ "position_embedding_type": "absolute",
29
+ "problem_type": "regression",
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.40.1",
32
+ "type_vocab_size": 1,
33
+ "use_cache": true,
34
+ "vocab_size": 50265
35
+ }
checkpoint-3462/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775d65fd7488be3a614de8f7c06ab4e48f197ada2653a63cc0fbc5b2c12d4db5
3
+ size 498609748
checkpoint-3462/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8618b735e09b52d75a226b518f028976605ce57b2bfaf6f1516f5c6fa50fad06
3
+ size 997339386
checkpoint-3462/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:378fff54753e0b270452883b1f2e7eac45752acd9d0ee301b67ff6ed7b4d8a67
3
+ size 14244
checkpoint-3462/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3799d2969b06463edfb0d77525690e6c403209eb7f7d6bcc2471e87a5296959
3
+ size 1064
checkpoint-3462/trainer_state.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3305796980857849,
3
+ "best_model_checkpoint": "autotrain-m1b56-8ger6/checkpoint-3462",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3462,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.014442518775274409,
13
+ "grad_norm": 52.66768264770508,
14
+ "learning_rate": 2.0192307692307692e-06,
15
+ "loss": 8.6614,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.028885037550548817,
20
+ "grad_norm": 70.3343276977539,
21
+ "learning_rate": 4.423076923076924e-06,
22
+ "loss": 7.3266,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.043327556325823226,
27
+ "grad_norm": 64.09215545654297,
28
+ "learning_rate": 6.730769230769231e-06,
29
+ "loss": 1.6514,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.057770075101097634,
34
+ "grad_norm": 29.524045944213867,
35
+ "learning_rate": 9.134615384615384e-06,
36
+ "loss": 1.0548,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.07221259387637204,
41
+ "grad_norm": 14.635738372802734,
42
+ "learning_rate": 1.153846153846154e-05,
43
+ "loss": 0.9082,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.08665511265164645,
48
+ "grad_norm": 19.21646499633789,
49
+ "learning_rate": 1.3942307692307693e-05,
50
+ "loss": 0.5165,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.10109763142692085,
55
+ "grad_norm": 34.39830017089844,
56
+ "learning_rate": 1.6346153846153847e-05,
57
+ "loss": 0.5166,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.11554015020219527,
62
+ "grad_norm": 12.46200180053711,
63
+ "learning_rate": 1.8750000000000002e-05,
64
+ "loss": 0.6898,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.12998266897746968,
69
+ "grad_norm": 44.481101989746094,
70
+ "learning_rate": 2.1153846153846154e-05,
71
+ "loss": 0.5929,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.14442518775274407,
76
+ "grad_norm": 14.731172561645508,
77
+ "learning_rate": 2.355769230769231e-05,
78
+ "loss": 0.5474,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.1588677065280185,
83
+ "grad_norm": 27.139829635620117,
84
+ "learning_rate": 2.586538461538462e-05,
85
+ "loss": 0.7523,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.1733102253032929,
90
+ "grad_norm": 13.601739883422852,
91
+ "learning_rate": 2.826923076923077e-05,
92
+ "loss": 0.6244,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.1877527440785673,
97
+ "grad_norm": 18.18332862854004,
98
+ "learning_rate": 3.0673076923076926e-05,
99
+ "loss": 0.618,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.2021952628538417,
104
+ "grad_norm": 6.123499870300293,
105
+ "learning_rate": 3.307692307692308e-05,
106
+ "loss": 0.7482,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.21663778162911612,
111
+ "grad_norm": 10.999884605407715,
112
+ "learning_rate": 3.548076923076924e-05,
113
+ "loss": 0.5383,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.23108030040439054,
118
+ "grad_norm": 22.577754974365234,
119
+ "learning_rate": 3.788461538461538e-05,
120
+ "loss": 0.5854,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.24552281917966493,
125
+ "grad_norm": 31.469331741333008,
126
+ "learning_rate": 4.028846153846154e-05,
127
+ "loss": 0.5403,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.25996533795493937,
132
+ "grad_norm": 8.64493465423584,
133
+ "learning_rate": 4.269230769230769e-05,
134
+ "loss": 0.6795,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.27440785673021373,
139
+ "grad_norm": 7.3773393630981445,
140
+ "learning_rate": 4.509615384615385e-05,
141
+ "loss": 0.5801,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.28885037550548814,
146
+ "grad_norm": 11.987143516540527,
147
+ "learning_rate": 4.75e-05,
148
+ "loss": 0.508,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.30329289428076256,
153
+ "grad_norm": 7.039488792419434,
154
+ "learning_rate": 4.9903846153846154e-05,
155
+ "loss": 0.5415,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.317735413056037,
160
+ "grad_norm": 13.371904373168945,
161
+ "learning_rate": 4.9743205649475715e-05,
162
+ "loss": 0.5646,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.3321779318313114,
167
+ "grad_norm": 32.50100326538086,
168
+ "learning_rate": 4.947571153434625e-05,
169
+ "loss": 0.7043,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.3466204506065858,
174
+ "grad_norm": 25.765533447265625,
175
+ "learning_rate": 4.920821741921678e-05,
176
+ "loss": 0.5854,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.3610629693818602,
181
+ "grad_norm": 33.29277420043945,
182
+ "learning_rate": 4.894072330408731e-05,
183
+ "loss": 0.7418,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.3755054881571346,
188
+ "grad_norm": 23.995582580566406,
189
+ "learning_rate": 4.8673229188957844e-05,
190
+ "loss": 0.7084,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.389948006932409,
195
+ "grad_norm": 24.641578674316406,
196
+ "learning_rate": 4.840573507382838e-05,
197
+ "loss": 0.6657,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.4043905257076834,
202
+ "grad_norm": 16.46844482421875,
203
+ "learning_rate": 4.813824095869891e-05,
204
+ "loss": 0.4334,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.41883304448295783,
209
+ "grad_norm": 15.955053329467773,
210
+ "learning_rate": 4.7870746843569445e-05,
211
+ "loss": 0.4989,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.43327556325823224,
216
+ "grad_norm": 20.922630310058594,
217
+ "learning_rate": 4.760325272843998e-05,
218
+ "loss": 0.5395,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.44771808203350666,
223
+ "grad_norm": 33.461490631103516,
224
+ "learning_rate": 4.733575861331051e-05,
225
+ "loss": 0.7912,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.4621606008087811,
230
+ "grad_norm": 28.123592376708984,
231
+ "learning_rate": 4.7068264498181045e-05,
232
+ "loss": 0.7634,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.47660311958405543,
237
+ "grad_norm": 6.648211479187012,
238
+ "learning_rate": 4.6800770383051574e-05,
239
+ "loss": 0.5912,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.49104563835932985,
244
+ "grad_norm": 6.500886917114258,
245
+ "learning_rate": 4.653327626792211e-05,
246
+ "loss": 0.595,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.5054881571346043,
251
+ "grad_norm": 17.182079315185547,
252
+ "learning_rate": 4.626578215279264e-05,
253
+ "loss": 0.6416,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.5199306759098787,
258
+ "grad_norm": 20.7849063873291,
259
+ "learning_rate": 4.5998288037663175e-05,
260
+ "loss": 0.4613,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.5343731946851531,
265
+ "grad_norm": 6.231338977813721,
266
+ "learning_rate": 4.5730793922533704e-05,
267
+ "loss": 0.4601,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.5488157134604275,
272
+ "grad_norm": 37.750099182128906,
273
+ "learning_rate": 4.546329980740424e-05,
274
+ "loss": 0.6588,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.5632582322357019,
279
+ "grad_norm": 13.106170654296875,
280
+ "learning_rate": 4.5195805692274775e-05,
281
+ "loss": 0.6352,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.5777007510109763,
286
+ "grad_norm": 12.079797744750977,
287
+ "learning_rate": 4.4928311577145304e-05,
288
+ "loss": 0.6447,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.5921432697862508,
293
+ "grad_norm": 14.380491256713867,
294
+ "learning_rate": 4.466081746201583e-05,
295
+ "loss": 0.4655,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.6065857885615251,
300
+ "grad_norm": 18.562135696411133,
301
+ "learning_rate": 4.4393323346886376e-05,
302
+ "loss": 0.4547,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.6210283073367996,
307
+ "grad_norm": 22.869455337524414,
308
+ "learning_rate": 4.4125829231756905e-05,
309
+ "loss": 0.5574,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.635470826112074,
314
+ "grad_norm": 15.650333404541016,
315
+ "learning_rate": 4.3858335116627433e-05,
316
+ "loss": 0.5149,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.6499133448873483,
321
+ "grad_norm": 19.41951560974121,
322
+ "learning_rate": 4.359084100149797e-05,
323
+ "loss": 0.5395,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.6643558636626228,
328
+ "grad_norm": 14.825141906738281,
329
+ "learning_rate": 4.3323346886368505e-05,
330
+ "loss": 0.3763,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.6787983824378971,
335
+ "grad_norm": 10.986475944519043,
336
+ "learning_rate": 4.3055852771239034e-05,
337
+ "loss": 0.383,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.6932409012131716,
342
+ "grad_norm": 6.805363655090332,
343
+ "learning_rate": 4.278835865610957e-05,
344
+ "loss": 0.4967,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.707683419988446,
349
+ "grad_norm": 9.07087230682373,
350
+ "learning_rate": 4.25208645409801e-05,
351
+ "loss": 0.4451,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.7221259387637204,
356
+ "grad_norm": 15.876053810119629,
357
+ "learning_rate": 4.2253370425850634e-05,
358
+ "loss": 0.4178,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.7365684575389948,
363
+ "grad_norm": 10.015765190124512,
364
+ "learning_rate": 4.198587631072117e-05,
365
+ "loss": 0.5196,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.7510109763142692,
370
+ "grad_norm": 14.343164443969727,
371
+ "learning_rate": 4.17183821955917e-05,
372
+ "loss": 0.3987,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.7654534950895436,
377
+ "grad_norm": 7.555254936218262,
378
+ "learning_rate": 4.145088808046223e-05,
379
+ "loss": 0.4268,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.779896013864818,
384
+ "grad_norm": 19.02402114868164,
385
+ "learning_rate": 4.1183393965332764e-05,
386
+ "loss": 0.5197,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.7943385326400925,
391
+ "grad_norm": 17.702478408813477,
392
+ "learning_rate": 4.09158998502033e-05,
393
+ "loss": 0.3667,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.8087810514153668,
398
+ "grad_norm": 14.175180435180664,
399
+ "learning_rate": 4.064840573507383e-05,
400
+ "loss": 0.4353,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.8232235701906413,
405
+ "grad_norm": 11.903855323791504,
406
+ "learning_rate": 4.0380911619944364e-05,
407
+ "loss": 0.5338,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.8376660889659157,
412
+ "grad_norm": 13.651313781738281,
413
+ "learning_rate": 4.01134175048149e-05,
414
+ "loss": 0.4009,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.85210860774119,
419
+ "grad_norm": 5.367980480194092,
420
+ "learning_rate": 3.984592338968543e-05,
421
+ "loss": 0.5129,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.8665511265164645,
426
+ "grad_norm": 18.119848251342773,
427
+ "learning_rate": 3.957842927455596e-05,
428
+ "loss": 0.4251,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.8809936452917388,
433
+ "grad_norm": 8.821969032287598,
434
+ "learning_rate": 3.9310935159426494e-05,
435
+ "loss": 0.3965,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.8954361640670133,
440
+ "grad_norm": 5.4203782081604,
441
+ "learning_rate": 3.904344104429703e-05,
442
+ "loss": 0.427,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.9098786828422877,
447
+ "grad_norm": 10.101963996887207,
448
+ "learning_rate": 3.877594692916756e-05,
449
+ "loss": 0.3891,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.9243212016175621,
454
+ "grad_norm": 5.893563747406006,
455
+ "learning_rate": 3.8508452814038094e-05,
456
+ "loss": 0.4165,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.9387637203928365,
461
+ "grad_norm": 6.127294540405273,
462
+ "learning_rate": 3.8240958698908623e-05,
463
+ "loss": 0.5194,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.9532062391681109,
468
+ "grad_norm": 13.313063621520996,
469
+ "learning_rate": 3.797346458377916e-05,
470
+ "loss": 0.4108,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.9676487579433853,
475
+ "grad_norm": 14.13697624206543,
476
+ "learning_rate": 3.7705970468649695e-05,
477
+ "loss": 0.4612,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.9820912767186597,
482
+ "grad_norm": 8.540029525756836,
483
+ "learning_rate": 3.7438476353520224e-05,
484
+ "loss": 0.4081,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.9965337954939342,
489
+ "grad_norm": 15.01349925994873,
490
+ "learning_rate": 3.717098223839075e-05,
491
+ "loss": 0.4559,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 1.0,
496
+ "eval_explained_variance": 0.6565504670143127,
497
+ "eval_loss": 0.5362390279769897,
498
+ "eval_mae": 0.5734534859657288,
499
+ "eval_mse": 0.5362390279769897,
500
+ "eval_r2": 0.5139721769346014,
501
+ "eval_rmse": 0.7322834134101868,
502
+ "eval_runtime": 21.5346,
503
+ "eval_samples_per_second": 160.765,
504
+ "eval_steps_per_second": 10.077,
505
+ "step": 1731
506
+ },
507
+ {
508
+ "epoch": 1.0109763142692085,
509
+ "grad_norm": 21.412073135375977,
510
+ "learning_rate": 3.6903488123261295e-05,
511
+ "loss": 0.4508,
512
+ "step": 1750
513
+ },
514
+ {
515
+ "epoch": 1.025418833044483,
516
+ "grad_norm": 8.56926155090332,
517
+ "learning_rate": 3.6635994008131824e-05,
518
+ "loss": 0.4829,
519
+ "step": 1775
520
+ },
521
+ {
522
+ "epoch": 1.0398613518197575,
523
+ "grad_norm": 9.545137405395508,
524
+ "learning_rate": 3.6368499893002353e-05,
525
+ "loss": 0.4499,
526
+ "step": 1800
527
+ },
528
+ {
529
+ "epoch": 1.0543038705950318,
530
+ "grad_norm": 27.51245880126953,
531
+ "learning_rate": 3.610100577787289e-05,
532
+ "loss": 0.4109,
533
+ "step": 1825
534
+ },
535
+ {
536
+ "epoch": 1.0687463893703062,
537
+ "grad_norm": 12.80823802947998,
538
+ "learning_rate": 3.583351166274342e-05,
539
+ "loss": 0.3662,
540
+ "step": 1850
541
+ },
542
+ {
543
+ "epoch": 1.0831889081455806,
544
+ "grad_norm": 7.883637428283691,
545
+ "learning_rate": 3.5566017547613954e-05,
546
+ "loss": 0.4184,
547
+ "step": 1875
548
+ },
549
+ {
550
+ "epoch": 1.097631426920855,
551
+ "grad_norm": 7.406681537628174,
552
+ "learning_rate": 3.529852343248449e-05,
553
+ "loss": 0.3439,
554
+ "step": 1900
555
+ },
556
+ {
557
+ "epoch": 1.1120739456961295,
558
+ "grad_norm": 19.036779403686523,
559
+ "learning_rate": 3.503102931735502e-05,
560
+ "loss": 0.4009,
561
+ "step": 1925
562
+ },
563
+ {
564
+ "epoch": 1.1265164644714039,
565
+ "grad_norm": 8.52979850769043,
566
+ "learning_rate": 3.476353520222555e-05,
567
+ "loss": 0.4397,
568
+ "step": 1950
569
+ },
570
+ {
571
+ "epoch": 1.1409589832466782,
572
+ "grad_norm": 16.250051498413086,
573
+ "learning_rate": 3.449604108709609e-05,
574
+ "loss": 0.4651,
575
+ "step": 1975
576
+ },
577
+ {
578
+ "epoch": 1.1554015020219526,
579
+ "grad_norm": 5.9189300537109375,
580
+ "learning_rate": 3.422854697196662e-05,
581
+ "loss": 0.4419,
582
+ "step": 2000
583
+ },
584
+ {
585
+ "epoch": 1.169844020797227,
586
+ "grad_norm": 11.332290649414062,
587
+ "learning_rate": 3.396105285683715e-05,
588
+ "loss": 0.3755,
589
+ "step": 2025
590
+ },
591
+ {
592
+ "epoch": 1.1842865395725015,
593
+ "grad_norm": 9.792673110961914,
594
+ "learning_rate": 3.3693558741707684e-05,
595
+ "loss": 0.358,
596
+ "step": 2050
597
+ },
598
+ {
599
+ "epoch": 1.1987290583477759,
600
+ "grad_norm": 14.335423469543457,
601
+ "learning_rate": 3.342606462657822e-05,
602
+ "loss": 0.3512,
603
+ "step": 2075
604
+ },
605
+ {
606
+ "epoch": 1.2131715771230502,
607
+ "grad_norm": 9.749696731567383,
608
+ "learning_rate": 3.315857051144875e-05,
609
+ "loss": 0.4072,
610
+ "step": 2100
611
+ },
612
+ {
613
+ "epoch": 1.2276140958983246,
614
+ "grad_norm": 9.317971229553223,
615
+ "learning_rate": 3.2891076396319284e-05,
616
+ "loss": 0.4359,
617
+ "step": 2125
618
+ },
619
+ {
620
+ "epoch": 1.242056614673599,
621
+ "grad_norm": 14.866842269897461,
622
+ "learning_rate": 3.262358228118981e-05,
623
+ "loss": 0.4314,
624
+ "step": 2150
625
+ },
626
+ {
627
+ "epoch": 1.2564991334488735,
628
+ "grad_norm": 6.312429428100586,
629
+ "learning_rate": 3.235608816606035e-05,
630
+ "loss": 0.3819,
631
+ "step": 2175
632
+ },
633
+ {
634
+ "epoch": 1.270941652224148,
635
+ "grad_norm": 5.175512313842773,
636
+ "learning_rate": 3.208859405093088e-05,
637
+ "loss": 0.4469,
638
+ "step": 2200
639
+ },
640
+ {
641
+ "epoch": 1.2853841709994223,
642
+ "grad_norm": 16.6768856048584,
643
+ "learning_rate": 3.1821099935801414e-05,
644
+ "loss": 0.4156,
645
+ "step": 2225
646
+ },
647
+ {
648
+ "epoch": 1.2998266897746968,
649
+ "grad_norm": 5.419372081756592,
650
+ "learning_rate": 3.155360582067194e-05,
651
+ "loss": 0.3966,
652
+ "step": 2250
653
+ },
654
+ {
655
+ "epoch": 1.314269208549971,
656
+ "grad_norm": 9.6641263961792,
657
+ "learning_rate": 3.128611170554248e-05,
658
+ "loss": 0.3622,
659
+ "step": 2275
660
+ },
661
+ {
662
+ "epoch": 1.3287117273252456,
663
+ "grad_norm": 11.433446884155273,
664
+ "learning_rate": 3.1018617590413014e-05,
665
+ "loss": 0.5056,
666
+ "step": 2300
667
+ },
668
+ {
669
+ "epoch": 1.34315424610052,
670
+ "grad_norm": 8.54787540435791,
671
+ "learning_rate": 3.075112347528354e-05,
672
+ "loss": 0.3228,
673
+ "step": 2325
674
+ },
675
+ {
676
+ "epoch": 1.3575967648757943,
677
+ "grad_norm": 24.707653045654297,
678
+ "learning_rate": 3.0483629360154076e-05,
679
+ "loss": 0.3755,
680
+ "step": 2350
681
+ },
682
+ {
683
+ "epoch": 1.3720392836510689,
684
+ "grad_norm": 13.02287483215332,
685
+ "learning_rate": 3.021613524502461e-05,
686
+ "loss": 0.313,
687
+ "step": 2375
688
+ },
689
+ {
690
+ "epoch": 1.3864818024263432,
691
+ "grad_norm": 7.0347771644592285,
692
+ "learning_rate": 2.9948641129895144e-05,
693
+ "loss": 0.3618,
694
+ "step": 2400
695
+ },
696
+ {
697
+ "epoch": 1.4009243212016176,
698
+ "grad_norm": 3.799116611480713,
699
+ "learning_rate": 2.9681147014765676e-05,
700
+ "loss": 0.3338,
701
+ "step": 2425
702
+ },
703
+ {
704
+ "epoch": 1.415366839976892,
705
+ "grad_norm": 10.154156684875488,
706
+ "learning_rate": 2.941365289963621e-05,
707
+ "loss": 0.3184,
708
+ "step": 2450
709
+ },
710
+ {
711
+ "epoch": 1.4298093587521663,
712
+ "grad_norm": 22.4088191986084,
713
+ "learning_rate": 2.9146158784506744e-05,
714
+ "loss": 0.3218,
715
+ "step": 2475
716
+ },
717
+ {
718
+ "epoch": 1.4442518775274409,
719
+ "grad_norm": 5.571261405944824,
720
+ "learning_rate": 2.8878664669377277e-05,
721
+ "loss": 0.4167,
722
+ "step": 2500
723
+ },
724
+ {
725
+ "epoch": 1.4586943963027152,
726
+ "grad_norm": 10.851147651672363,
727
+ "learning_rate": 2.8611170554247806e-05,
728
+ "loss": 0.3881,
729
+ "step": 2525
730
+ },
731
+ {
732
+ "epoch": 1.4731369150779896,
733
+ "grad_norm": 30.00836181640625,
734
+ "learning_rate": 2.8343676439118338e-05,
735
+ "loss": 0.3706,
736
+ "step": 2550
737
+ },
738
+ {
739
+ "epoch": 1.487579433853264,
740
+ "grad_norm": 12.909472465515137,
741
+ "learning_rate": 2.8076182323988874e-05,
742
+ "loss": 0.4325,
743
+ "step": 2575
744
+ },
745
+ {
746
+ "epoch": 1.5020219526285383,
747
+ "grad_norm": 10.231127738952637,
748
+ "learning_rate": 2.7808688208859406e-05,
749
+ "loss": 0.3357,
750
+ "step": 2600
751
+ },
752
+ {
753
+ "epoch": 1.516464471403813,
754
+ "grad_norm": 7.157652378082275,
755
+ "learning_rate": 2.754119409372994e-05,
756
+ "loss": 0.3927,
757
+ "step": 2625
758
+ },
759
+ {
760
+ "epoch": 1.5309069901790873,
761
+ "grad_norm": 13.10181999206543,
762
+ "learning_rate": 2.727369997860047e-05,
763
+ "loss": 0.3189,
764
+ "step": 2650
765
+ },
766
+ {
767
+ "epoch": 1.5453495089543616,
768
+ "grad_norm": 12.194095611572266,
769
+ "learning_rate": 2.7006205863471007e-05,
770
+ "loss": 0.4226,
771
+ "step": 2675
772
+ },
773
+ {
774
+ "epoch": 1.5597920277296362,
775
+ "grad_norm": 18.289899826049805,
776
+ "learning_rate": 2.673871174834154e-05,
777
+ "loss": 0.2865,
778
+ "step": 2700
779
+ },
780
+ {
781
+ "epoch": 1.5742345465049103,
782
+ "grad_norm": 4.3070068359375,
783
+ "learning_rate": 2.647121763321207e-05,
784
+ "loss": 0.3107,
785
+ "step": 2725
786
+ },
787
+ {
788
+ "epoch": 1.588677065280185,
789
+ "grad_norm": 26.21879768371582,
790
+ "learning_rate": 2.62037235180826e-05,
791
+ "loss": 0.3243,
792
+ "step": 2750
793
+ },
794
+ {
795
+ "epoch": 1.6031195840554593,
796
+ "grad_norm": 8.495038986206055,
797
+ "learning_rate": 2.593622940295314e-05,
798
+ "loss": 0.3285,
799
+ "step": 2775
800
+ },
801
+ {
802
+ "epoch": 1.6175621028307337,
803
+ "grad_norm": 17.74176788330078,
804
+ "learning_rate": 2.566873528782367e-05,
805
+ "loss": 0.3819,
806
+ "step": 2800
807
+ },
808
+ {
809
+ "epoch": 1.6320046216060082,
810
+ "grad_norm": 8.67226505279541,
811
+ "learning_rate": 2.54012411726942e-05,
812
+ "loss": 0.4388,
813
+ "step": 2825
814
+ },
815
+ {
816
+ "epoch": 1.6464471403812824,
817
+ "grad_norm": 9.305310249328613,
818
+ "learning_rate": 2.5133747057564733e-05,
819
+ "loss": 0.3801,
820
+ "step": 2850
821
+ },
822
+ {
823
+ "epoch": 1.660889659156557,
824
+ "grad_norm": 16.156944274902344,
825
+ "learning_rate": 2.4866252942435266e-05,
826
+ "loss": 0.337,
827
+ "step": 2875
828
+ },
829
+ {
830
+ "epoch": 1.6753321779318313,
831
+ "grad_norm": 18.950183868408203,
832
+ "learning_rate": 2.45987588273058e-05,
833
+ "loss": 0.3913,
834
+ "step": 2900
835
+ },
836
+ {
837
+ "epoch": 1.6897746967071057,
838
+ "grad_norm": 4.8534321784973145,
839
+ "learning_rate": 2.4331264712176334e-05,
840
+ "loss": 0.3677,
841
+ "step": 2925
842
+ },
843
+ {
844
+ "epoch": 1.7042172154823803,
845
+ "grad_norm": 7.860241413116455,
846
+ "learning_rate": 2.4063770597046866e-05,
847
+ "loss": 0.3495,
848
+ "step": 2950
849
+ },
850
+ {
851
+ "epoch": 1.7186597342576544,
852
+ "grad_norm": 10.027009010314941,
853
+ "learning_rate": 2.37962764819174e-05,
854
+ "loss": 0.3787,
855
+ "step": 2975
856
+ },
857
+ {
858
+ "epoch": 1.733102253032929,
859
+ "grad_norm": 13.39957046508789,
860
+ "learning_rate": 2.352878236678793e-05,
861
+ "loss": 0.3064,
862
+ "step": 3000
863
+ },
864
+ {
865
+ "epoch": 1.7475447718082033,
866
+ "grad_norm": 8.104743957519531,
867
+ "learning_rate": 2.3261288251658463e-05,
868
+ "loss": 0.3703,
869
+ "step": 3025
870
+ },
871
+ {
872
+ "epoch": 1.7619872905834777,
873
+ "grad_norm": 7.085102558135986,
874
+ "learning_rate": 2.2993794136529e-05,
875
+ "loss": 0.3488,
876
+ "step": 3050
877
+ },
878
+ {
879
+ "epoch": 1.7764298093587523,
880
+ "grad_norm": 8.273953437805176,
881
+ "learning_rate": 2.272630002139953e-05,
882
+ "loss": 0.3574,
883
+ "step": 3075
884
+ },
885
+ {
886
+ "epoch": 1.7908723281340264,
887
+ "grad_norm": 5.399058818817139,
888
+ "learning_rate": 2.2458805906270064e-05,
889
+ "loss": 0.3699,
890
+ "step": 3100
891
+ },
892
+ {
893
+ "epoch": 1.805314846909301,
894
+ "grad_norm": 25.818262100219727,
895
+ "learning_rate": 2.2191311791140596e-05,
896
+ "loss": 0.2714,
897
+ "step": 3125
898
+ },
899
+ {
900
+ "epoch": 1.8197573656845754,
901
+ "grad_norm": 8.441669464111328,
902
+ "learning_rate": 2.192381767601113e-05,
903
+ "loss": 0.3225,
904
+ "step": 3150
905
+ },
906
+ {
907
+ "epoch": 1.8341998844598497,
908
+ "grad_norm": 3.318145751953125,
909
+ "learning_rate": 2.165632356088166e-05,
910
+ "loss": 0.3359,
911
+ "step": 3175
912
+ },
913
+ {
914
+ "epoch": 1.8486424032351243,
915
+ "grad_norm": 3.700218439102173,
916
+ "learning_rate": 2.1388829445752197e-05,
917
+ "loss": 0.3138,
918
+ "step": 3200
919
+ },
920
+ {
921
+ "epoch": 1.8630849220103987,
922
+ "grad_norm": 4.9609246253967285,
923
+ "learning_rate": 2.1121335330622726e-05,
924
+ "loss": 0.3429,
925
+ "step": 3225
926
+ },
927
+ {
928
+ "epoch": 1.877527440785673,
929
+ "grad_norm": 11.287262916564941,
930
+ "learning_rate": 2.085384121549326e-05,
931
+ "loss": 0.3926,
932
+ "step": 3250
933
+ },
934
+ {
935
+ "epoch": 1.8919699595609474,
936
+ "grad_norm": 13.642833709716797,
937
+ "learning_rate": 2.0586347100363794e-05,
938
+ "loss": 0.3484,
939
+ "step": 3275
940
+ },
941
+ {
942
+ "epoch": 1.9064124783362217,
943
+ "grad_norm": 5.669510364532471,
944
+ "learning_rate": 2.0318852985234326e-05,
945
+ "loss": 0.3307,
946
+ "step": 3300
947
+ },
948
+ {
949
+ "epoch": 1.9208549971114963,
950
+ "grad_norm": 11.987211227416992,
951
+ "learning_rate": 2.005135887010486e-05,
952
+ "loss": 0.2979,
953
+ "step": 3325
954
+ },
955
+ {
956
+ "epoch": 1.9352975158867707,
957
+ "grad_norm": 8.090258598327637,
958
+ "learning_rate": 1.978386475497539e-05,
959
+ "loss": 0.2786,
960
+ "step": 3350
961
+ },
962
+ {
963
+ "epoch": 1.949740034662045,
964
+ "grad_norm": 5.356060028076172,
965
+ "learning_rate": 1.9516370639845923e-05,
966
+ "loss": 0.281,
967
+ "step": 3375
968
+ },
969
+ {
970
+ "epoch": 1.9641825534373196,
971
+ "grad_norm": 9.354238510131836,
972
+ "learning_rate": 1.924887652471646e-05,
973
+ "loss": 0.2761,
974
+ "step": 3400
975
+ },
976
+ {
977
+ "epoch": 1.9786250722125938,
978
+ "grad_norm": 12.076613426208496,
979
+ "learning_rate": 1.8981382409586988e-05,
980
+ "loss": 0.3394,
981
+ "step": 3425
982
+ },
983
+ {
984
+ "epoch": 1.9930675909878683,
985
+ "grad_norm": 6.9947428703308105,
986
+ "learning_rate": 1.8713888294457524e-05,
987
+ "loss": 0.3652,
988
+ "step": 3450
989
+ },
990
+ {
991
+ "epoch": 2.0,
992
+ "eval_explained_variance": 0.700911283493042,
993
+ "eval_loss": 0.3305796980857849,
994
+ "eval_mae": 0.44259902834892273,
995
+ "eval_mse": 0.3305796980857849,
996
+ "eval_r2": 0.7003744220771353,
997
+ "eval_rmse": 0.5749605894088745,
998
+ "eval_runtime": 21.4982,
999
+ "eval_samples_per_second": 161.037,
1000
+ "eval_steps_per_second": 10.094,
1001
+ "step": 3462
1002
+ }
1003
+ ],
1004
+ "logging_steps": 25,
1005
+ "max_steps": 5193,
1006
+ "num_input_tokens_seen": 0,
1007
+ "num_train_epochs": 3,
1008
+ "save_steps": 500,
1009
+ "total_flos": 7285479708948480.0,
1010
+ "train_batch_size": 8,
1011
+ "trial_name": null,
1012
+ "trial_params": null
1013
+ }
checkpoint-3462/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686af3a4982678cc23b8424229c94be57bb84a776c74836677e7bde5ad5ee125
3
+ size 5048
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "FacebookAI/roberta-base",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "target"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "target": 0
21
+ },
22
+ "layer_norm_eps": 1e-05,
23
+ "max_position_embeddings": 514,
24
+ "model_type": "roberta",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "pad_token_id": 1,
28
+ "position_embedding_type": "absolute",
29
+ "problem_type": "regression",
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.40.1",
32
+ "type_vocab_size": 1,
33
+ "use_cache": true,
34
+ "vocab_size": 50265
35
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775d65fd7488be3a614de8f7c06ab4e48f197ada2653a63cc0fbc5b2c12d4db5
3
+ size 498609748
runs/Apr30_11-30-35_r-abhishek-autotrain-yu8daxhp-663a5-olct4/events.out.tfevents.1714476635.r-abhishek-autotrain-yu8daxhp-663a5-olct4.150.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9de6dee95bd69081a094d2061897077a0c0081dd946de57ca405fa82a0ba56df
3
- size 34959
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a31300fc20d14b92f93e9a777afc30b148928b2e6aa0d674f559742e7d37972
3
+ size 50393
runs/Apr30_11-30-35_r-abhishek-autotrain-yu8daxhp-663a5-olct4/events.out.tfevents.1714477596.r-abhishek-autotrain-yu8daxhp-663a5-olct4.150.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6557043cd9acc094a0619bbcbee73518e42823ca5c682a7f6f8de09621c7ec0d
3
+ size 609
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "RobertaTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": "<unk>"
57
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686af3a4982678cc23b8424229c94be57bb84a776c74836677e7bde5ad5ee125
3
+ size 5048
training_params.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-m1b56-8ger6/autotrain-data",
3
+ "model": "FacebookAI/roberta-base",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 512,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-m1b56-8ger6",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "evaluation_strategy": "epoch",
26
+ "username": "abhishek",
27
+ "log": "tensorboard"
28
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff