abhishek HF staff commited on
Commit
5b1b2ff
1 Parent(s): c1f0366

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-regression
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - autotrain-99xmg-r4pqr/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Regression
15
+
16
+ ## Validation Metrics
17
+ loss: 0.34081029891967773
18
+
19
+ mse: 0.3408103287220001
20
+
21
+ mae: 0.4530330300331116
22
+
23
+ r2: 0.6911017288906998
24
+
25
+ rmse: 0.5837896466255188
26
+
27
+ explained_variance: 0.6929686665534973
checkpoint-3462/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "target"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "target": 0
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "regression",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.40.1",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
checkpoint-3462/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cf357382ee41f38190a8ca66e1f4a0c816d0b3701854f3d2302ba19008693f
3
+ size 437955572
checkpoint-3462/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2b9351b6f2fb519c4c46c83d5f426c583e7735d52cd8de23c784b5ab5abd40
3
+ size 876032250
checkpoint-3462/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ffdbd255c3df6d107cf1e3a717edf3894da4da60169af398fe37318f89aebf
3
+ size 14244
checkpoint-3462/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7452fd8ce30d6700fbd23005f34b9d0b89bca167b9b1accf8cd27329d9fcf98
3
+ size 1064
checkpoint-3462/trainer_state.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.34081029891967773,
3
+ "best_model_checkpoint": "autotrain-99xmg-r4pqr/checkpoint-3462",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3462,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.014442518775274409,
13
+ "grad_norm": 109.87998962402344,
14
+ "learning_rate": 2.0192307692307692e-06,
15
+ "loss": 6.024,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.028885037550548817,
20
+ "grad_norm": 42.21332550048828,
21
+ "learning_rate": 4.423076923076924e-06,
22
+ "loss": 4.159,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.043327556325823226,
27
+ "grad_norm": 73.7143325805664,
28
+ "learning_rate": 6.826923076923076e-06,
29
+ "loss": 1.5598,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.057770075101097634,
34
+ "grad_norm": 22.827505111694336,
35
+ "learning_rate": 9.230769230769232e-06,
36
+ "loss": 1.0843,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.07221259387637204,
41
+ "grad_norm": 10.978221893310547,
42
+ "learning_rate": 1.1634615384615386e-05,
43
+ "loss": 0.8028,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.08665511265164645,
48
+ "grad_norm": 18.381181716918945,
49
+ "learning_rate": 1.403846153846154e-05,
50
+ "loss": 0.5757,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.10109763142692085,
55
+ "grad_norm": 26.028202056884766,
56
+ "learning_rate": 1.6442307692307695e-05,
57
+ "loss": 0.5391,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.11554015020219527,
62
+ "grad_norm": 11.943533897399902,
63
+ "learning_rate": 1.8846153846153846e-05,
64
+ "loss": 0.7264,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.12998266897746968,
69
+ "grad_norm": 27.598697662353516,
70
+ "learning_rate": 2.125e-05,
71
+ "loss": 0.5976,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.14442518775274407,
76
+ "grad_norm": 2.4630095958709717,
77
+ "learning_rate": 2.3653846153846153e-05,
78
+ "loss": 0.436,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.1588677065280185,
83
+ "grad_norm": 24.022457122802734,
84
+ "learning_rate": 2.6057692307692312e-05,
85
+ "loss": 0.7163,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.1733102253032929,
90
+ "grad_norm": 9.978269577026367,
91
+ "learning_rate": 2.846153846153846e-05,
92
+ "loss": 0.6614,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.1877527440785673,
97
+ "grad_norm": 23.359638214111328,
98
+ "learning_rate": 3.0865384615384616e-05,
99
+ "loss": 0.6557,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.2021952628538417,
104
+ "grad_norm": 22.752307891845703,
105
+ "learning_rate": 3.326923076923077e-05,
106
+ "loss": 0.6954,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.21663778162911612,
111
+ "grad_norm": 4.690310001373291,
112
+ "learning_rate": 3.5673076923076926e-05,
113
+ "loss": 0.7825,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.23108030040439054,
118
+ "grad_norm": 20.00984764099121,
119
+ "learning_rate": 3.807692307692308e-05,
120
+ "loss": 0.836,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.24552281917966493,
125
+ "grad_norm": 25.91275978088379,
126
+ "learning_rate": 4.0480769230769236e-05,
127
+ "loss": 0.569,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.25996533795493937,
132
+ "grad_norm": 15.184056282043457,
133
+ "learning_rate": 4.288461538461538e-05,
134
+ "loss": 0.6616,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.27440785673021373,
139
+ "grad_norm": 4.631957530975342,
140
+ "learning_rate": 4.528846153846154e-05,
141
+ "loss": 0.5425,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.28885037550548814,
146
+ "grad_norm": 6.141411781311035,
147
+ "learning_rate": 4.76923076923077e-05,
148
+ "loss": 0.5024,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.30329289428076256,
153
+ "grad_norm": 4.359855651855469,
154
+ "learning_rate": 4.9989300235394826e-05,
155
+ "loss": 0.603,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.317735413056037,
160
+ "grad_norm": 10.599994659423828,
161
+ "learning_rate": 4.9721806120265355e-05,
162
+ "loss": 0.5994,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.3321779318313114,
167
+ "grad_norm": 44.90914535522461,
168
+ "learning_rate": 4.945431200513589e-05,
169
+ "loss": 0.8399,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.3466204506065858,
174
+ "grad_norm": 5.233628273010254,
175
+ "learning_rate": 4.918681789000642e-05,
176
+ "loss": 0.5791,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.3610629693818602,
181
+ "grad_norm": 21.001466751098633,
182
+ "learning_rate": 4.8919323774876955e-05,
183
+ "loss": 0.6869,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.3755054881571346,
188
+ "grad_norm": 17.496326446533203,
189
+ "learning_rate": 4.865182965974749e-05,
190
+ "loss": 0.5309,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.389948006932409,
195
+ "grad_norm": 16.32991600036621,
196
+ "learning_rate": 4.838433554461802e-05,
197
+ "loss": 0.7183,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.4043905257076834,
202
+ "grad_norm": 7.430429935455322,
203
+ "learning_rate": 4.811684142948855e-05,
204
+ "loss": 0.3932,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.41883304448295783,
209
+ "grad_norm": 7.165083885192871,
210
+ "learning_rate": 4.784934731435909e-05,
211
+ "loss": 0.5113,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.43327556325823224,
216
+ "grad_norm": 10.602783203125,
217
+ "learning_rate": 4.758185319922962e-05,
218
+ "loss": 0.5259,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.44771808203350666,
223
+ "grad_norm": 22.993024826049805,
224
+ "learning_rate": 4.731435908410015e-05,
225
+ "loss": 0.678,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.4621606008087811,
230
+ "grad_norm": 17.77105712890625,
231
+ "learning_rate": 4.7046864968970685e-05,
232
+ "loss": 0.6262,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.47660311958405543,
237
+ "grad_norm": 6.010850429534912,
238
+ "learning_rate": 4.677937085384122e-05,
239
+ "loss": 0.5537,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.49104563835932985,
244
+ "grad_norm": 10.288942337036133,
245
+ "learning_rate": 4.651187673871175e-05,
246
+ "loss": 0.5963,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.5054881571346043,
251
+ "grad_norm": 20.071813583374023,
252
+ "learning_rate": 4.6244382623582286e-05,
253
+ "loss": 0.5735,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.5199306759098787,
258
+ "grad_norm": 5.898088455200195,
259
+ "learning_rate": 4.5976888508452815e-05,
260
+ "loss": 0.4843,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.5343731946851531,
265
+ "grad_norm": 3.5588929653167725,
266
+ "learning_rate": 4.570939439332335e-05,
267
+ "loss": 0.4247,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.5488157134604275,
272
+ "grad_norm": 37.71608352661133,
273
+ "learning_rate": 4.544190027819388e-05,
274
+ "loss": 0.6862,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.5632582322357019,
279
+ "grad_norm": 11.169629096984863,
280
+ "learning_rate": 4.5174406163064415e-05,
281
+ "loss": 0.5932,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.5777007510109763,
286
+ "grad_norm": 6.53275203704834,
287
+ "learning_rate": 4.4906912047934944e-05,
288
+ "loss": 0.5161,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.5921432697862508,
293
+ "grad_norm": 21.197736740112305,
294
+ "learning_rate": 4.463941793280548e-05,
295
+ "loss": 0.4312,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.6065857885615251,
300
+ "grad_norm": 16.153993606567383,
301
+ "learning_rate": 4.4371923817676016e-05,
302
+ "loss": 0.438,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.6210283073367996,
307
+ "grad_norm": 21.46822166442871,
308
+ "learning_rate": 4.4104429702546545e-05,
309
+ "loss": 0.4343,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.635470826112074,
314
+ "grad_norm": 18.194324493408203,
315
+ "learning_rate": 4.3836935587417074e-05,
316
+ "loss": 0.4935,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.6499133448873483,
321
+ "grad_norm": 11.8253173828125,
322
+ "learning_rate": 4.3569441472287616e-05,
323
+ "loss": 0.509,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.6643558636626228,
328
+ "grad_norm": 22.477100372314453,
329
+ "learning_rate": 4.3301947357158145e-05,
330
+ "loss": 0.3698,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.6787983824378971,
335
+ "grad_norm": 10.39338493347168,
336
+ "learning_rate": 4.3034453242028674e-05,
337
+ "loss": 0.4084,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.6932409012131716,
342
+ "grad_norm": 4.328056335449219,
343
+ "learning_rate": 4.276695912689921e-05,
344
+ "loss": 0.4137,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.707683419988446,
349
+ "grad_norm": 7.7573628425598145,
350
+ "learning_rate": 4.2499465011769746e-05,
351
+ "loss": 0.4974,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.7221259387637204,
356
+ "grad_norm": 27.289525985717773,
357
+ "learning_rate": 4.2231970896640275e-05,
358
+ "loss": 0.4999,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.7365684575389948,
363
+ "grad_norm": 9.000728607177734,
364
+ "learning_rate": 4.196447678151081e-05,
365
+ "loss": 0.4854,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.7510109763142692,
370
+ "grad_norm": 9.249234199523926,
371
+ "learning_rate": 4.169698266638134e-05,
372
+ "loss": 0.43,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.7654534950895436,
377
+ "grad_norm": 10.179132461547852,
378
+ "learning_rate": 4.1429488551251875e-05,
379
+ "loss": 0.3916,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.779896013864818,
384
+ "grad_norm": 8.030287742614746,
385
+ "learning_rate": 4.116199443612241e-05,
386
+ "loss": 0.5957,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.7943385326400925,
391
+ "grad_norm": 10.439188003540039,
392
+ "learning_rate": 4.089450032099294e-05,
393
+ "loss": 0.3744,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.8087810514153668,
398
+ "grad_norm": 14.759039878845215,
399
+ "learning_rate": 4.062700620586347e-05,
400
+ "loss": 0.406,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.8232235701906413,
405
+ "grad_norm": 5.672801494598389,
406
+ "learning_rate": 4.035951209073401e-05,
407
+ "loss": 0.5122,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.8376660889659157,
412
+ "grad_norm": 5.283393383026123,
413
+ "learning_rate": 4.009201797560454e-05,
414
+ "loss": 0.3474,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.85210860774119,
419
+ "grad_norm": 4.710180282592773,
420
+ "learning_rate": 3.982452386047507e-05,
421
+ "loss": 0.4959,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.8665511265164645,
426
+ "grad_norm": 13.148910522460938,
427
+ "learning_rate": 3.9557029745345605e-05,
428
+ "loss": 0.4234,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.8809936452917388,
433
+ "grad_norm": 2.738633155822754,
434
+ "learning_rate": 3.9289535630216134e-05,
435
+ "loss": 0.4548,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.8954361640670133,
440
+ "grad_norm": 5.08731746673584,
441
+ "learning_rate": 3.902204151508667e-05,
442
+ "loss": 0.486,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.9098786828422877,
447
+ "grad_norm": 16.38525390625,
448
+ "learning_rate": 3.8754547399957206e-05,
449
+ "loss": 0.4189,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.9243212016175621,
454
+ "grad_norm": 8.546934127807617,
455
+ "learning_rate": 3.8487053284827735e-05,
456
+ "loss": 0.4164,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.9387637203928365,
461
+ "grad_norm": 16.895854949951172,
462
+ "learning_rate": 3.8219559169698264e-05,
463
+ "loss": 0.5782,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.9532062391681109,
468
+ "grad_norm": 18.25130271911621,
469
+ "learning_rate": 3.79520650545688e-05,
470
+ "loss": 0.3899,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.9676487579433853,
475
+ "grad_norm": 8.933642387390137,
476
+ "learning_rate": 3.7684570939439335e-05,
477
+ "loss": 0.4484,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.9820912767186597,
482
+ "grad_norm": 2.895547389984131,
483
+ "learning_rate": 3.7417076824309864e-05,
484
+ "loss": 0.3807,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.9965337954939342,
489
+ "grad_norm": 6.855337619781494,
490
+ "learning_rate": 3.71495827091804e-05,
491
+ "loss": 0.5451,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 1.0,
496
+ "eval_explained_variance": 0.642084002494812,
497
+ "eval_loss": 0.4517485201358795,
498
+ "eval_mae": 0.5261725783348083,
499
+ "eval_mse": 0.4517485201358795,
500
+ "eval_r2": 0.590551334956607,
501
+ "eval_rmse": 0.6721224188804626,
502
+ "eval_runtime": 22.0997,
503
+ "eval_samples_per_second": 156.654,
504
+ "eval_steps_per_second": 9.819,
505
+ "step": 1731
506
+ },
507
+ {
508
+ "epoch": 1.0109763142692085,
509
+ "grad_norm": 20.85631561279297,
510
+ "learning_rate": 3.6882088594050936e-05,
511
+ "loss": 0.4845,
512
+ "step": 1750
513
+ },
514
+ {
515
+ "epoch": 1.025418833044483,
516
+ "grad_norm": 14.387726783752441,
517
+ "learning_rate": 3.6614594478921465e-05,
518
+ "loss": 0.3949,
519
+ "step": 1775
520
+ },
521
+ {
522
+ "epoch": 1.0398613518197575,
523
+ "grad_norm": 12.336610794067383,
524
+ "learning_rate": 3.6347100363791994e-05,
525
+ "loss": 0.3903,
526
+ "step": 1800
527
+ },
528
+ {
529
+ "epoch": 1.0543038705950318,
530
+ "grad_norm": 12.857579231262207,
531
+ "learning_rate": 3.607960624866253e-05,
532
+ "loss": 0.3625,
533
+ "step": 1825
534
+ },
535
+ {
536
+ "epoch": 1.0687463893703062,
537
+ "grad_norm": 3.4398610591888428,
538
+ "learning_rate": 3.5812112133533065e-05,
539
+ "loss": 0.3276,
540
+ "step": 1850
541
+ },
542
+ {
543
+ "epoch": 1.0831889081455806,
544
+ "grad_norm": 18.700855255126953,
545
+ "learning_rate": 3.5544618018403594e-05,
546
+ "loss": 0.4301,
547
+ "step": 1875
548
+ },
549
+ {
550
+ "epoch": 1.097631426920855,
551
+ "grad_norm": 3.7766644954681396,
552
+ "learning_rate": 3.527712390327413e-05,
553
+ "loss": 0.3571,
554
+ "step": 1900
555
+ },
556
+ {
557
+ "epoch": 1.1120739456961295,
558
+ "grad_norm": 13.447446823120117,
559
+ "learning_rate": 3.500962978814466e-05,
560
+ "loss": 0.3691,
561
+ "step": 1925
562
+ },
563
+ {
564
+ "epoch": 1.1265164644714039,
565
+ "grad_norm": 6.039691925048828,
566
+ "learning_rate": 3.4742135673015195e-05,
567
+ "loss": 0.3966,
568
+ "step": 1950
569
+ },
570
+ {
571
+ "epoch": 1.1409589832466782,
572
+ "grad_norm": 9.641484260559082,
573
+ "learning_rate": 3.447464155788573e-05,
574
+ "loss": 0.4171,
575
+ "step": 1975
576
+ },
577
+ {
578
+ "epoch": 1.1554015020219526,
579
+ "grad_norm": 8.735544204711914,
580
+ "learning_rate": 3.420714744275626e-05,
581
+ "loss": 0.4154,
582
+ "step": 2000
583
+ },
584
+ {
585
+ "epoch": 1.169844020797227,
586
+ "grad_norm": 10.355854034423828,
587
+ "learning_rate": 3.393965332762679e-05,
588
+ "loss": 0.3968,
589
+ "step": 2025
590
+ },
591
+ {
592
+ "epoch": 1.1842865395725015,
593
+ "grad_norm": 10.740571022033691,
594
+ "learning_rate": 3.367215921249733e-05,
595
+ "loss": 0.343,
596
+ "step": 2050
597
+ },
598
+ {
599
+ "epoch": 1.1987290583477759,
600
+ "grad_norm": 3.9777560234069824,
601
+ "learning_rate": 3.340466509736786e-05,
602
+ "loss": 0.3848,
603
+ "step": 2075
604
+ },
605
+ {
606
+ "epoch": 1.2131715771230502,
607
+ "grad_norm": 8.958410263061523,
608
+ "learning_rate": 3.313717098223839e-05,
609
+ "loss": 0.4381,
610
+ "step": 2100
611
+ },
612
+ {
613
+ "epoch": 1.2276140958983246,
614
+ "grad_norm": 7.341928005218506,
615
+ "learning_rate": 3.2869676867108925e-05,
616
+ "loss": 0.4134,
617
+ "step": 2125
618
+ },
619
+ {
620
+ "epoch": 1.242056614673599,
621
+ "grad_norm": 8.02366828918457,
622
+ "learning_rate": 3.260218275197946e-05,
623
+ "loss": 0.4799,
624
+ "step": 2150
625
+ },
626
+ {
627
+ "epoch": 1.2564991334488735,
628
+ "grad_norm": 5.73162841796875,
629
+ "learning_rate": 3.233468863684999e-05,
630
+ "loss": 0.3479,
631
+ "step": 2175
632
+ },
633
+ {
634
+ "epoch": 1.270941652224148,
635
+ "grad_norm": 10.025665283203125,
636
+ "learning_rate": 3.2067194521720525e-05,
637
+ "loss": 0.4987,
638
+ "step": 2200
639
+ },
640
+ {
641
+ "epoch": 1.2853841709994223,
642
+ "grad_norm": 15.222429275512695,
643
+ "learning_rate": 3.1799700406591054e-05,
644
+ "loss": 0.4249,
645
+ "step": 2225
646
+ },
647
+ {
648
+ "epoch": 1.2998266897746968,
649
+ "grad_norm": 6.472146987915039,
650
+ "learning_rate": 3.153220629146159e-05,
651
+ "loss": 0.4069,
652
+ "step": 2250
653
+ },
654
+ {
655
+ "epoch": 1.314269208549971,
656
+ "grad_norm": 2.59798264503479,
657
+ "learning_rate": 3.1264712176332126e-05,
658
+ "loss": 0.4369,
659
+ "step": 2275
660
+ },
661
+ {
662
+ "epoch": 1.3287117273252456,
663
+ "grad_norm": 6.89563512802124,
664
+ "learning_rate": 3.0997218061202655e-05,
665
+ "loss": 0.4497,
666
+ "step": 2300
667
+ },
668
+ {
669
+ "epoch": 1.34315424610052,
670
+ "grad_norm": 7.411588668823242,
671
+ "learning_rate": 3.0729723946073184e-05,
672
+ "loss": 0.3562,
673
+ "step": 2325
674
+ },
675
+ {
676
+ "epoch": 1.3575967648757943,
677
+ "grad_norm": 21.604717254638672,
678
+ "learning_rate": 3.0462229830943723e-05,
679
+ "loss": 0.3741,
680
+ "step": 2350
681
+ },
682
+ {
683
+ "epoch": 1.3720392836510689,
684
+ "grad_norm": 6.267892360687256,
685
+ "learning_rate": 3.0194735715814255e-05,
686
+ "loss": 0.3237,
687
+ "step": 2375
688
+ },
689
+ {
690
+ "epoch": 1.3864818024263432,
691
+ "grad_norm": 17.0023250579834,
692
+ "learning_rate": 2.9927241600684784e-05,
693
+ "loss": 0.3928,
694
+ "step": 2400
695
+ },
696
+ {
697
+ "epoch": 1.4009243212016176,
698
+ "grad_norm": 3.766000509262085,
699
+ "learning_rate": 2.9659747485555317e-05,
700
+ "loss": 0.3341,
701
+ "step": 2425
702
+ },
703
+ {
704
+ "epoch": 1.415366839976892,
705
+ "grad_norm": 16.134517669677734,
706
+ "learning_rate": 2.9392253370425852e-05,
707
+ "loss": 0.3264,
708
+ "step": 2450
709
+ },
710
+ {
711
+ "epoch": 1.4298093587521663,
712
+ "grad_norm": 15.79883861541748,
713
+ "learning_rate": 2.9124759255296385e-05,
714
+ "loss": 0.3201,
715
+ "step": 2475
716
+ },
717
+ {
718
+ "epoch": 1.4442518775274409,
719
+ "grad_norm": 10.560098648071289,
720
+ "learning_rate": 2.8857265140166917e-05,
721
+ "loss": 0.3848,
722
+ "step": 2500
723
+ },
724
+ {
725
+ "epoch": 1.4586943963027152,
726
+ "grad_norm": 24.152523040771484,
727
+ "learning_rate": 2.858977102503745e-05,
728
+ "loss": 0.4073,
729
+ "step": 2525
730
+ },
731
+ {
732
+ "epoch": 1.4731369150779896,
733
+ "grad_norm": 15.787476539611816,
734
+ "learning_rate": 2.8322276909907985e-05,
735
+ "loss": 0.4058,
736
+ "step": 2550
737
+ },
738
+ {
739
+ "epoch": 1.487579433853264,
740
+ "grad_norm": 5.4662604331970215,
741
+ "learning_rate": 2.8054782794778518e-05,
742
+ "loss": 0.4019,
743
+ "step": 2575
744
+ },
745
+ {
746
+ "epoch": 1.5020219526285383,
747
+ "grad_norm": 5.6592698097229,
748
+ "learning_rate": 2.778728867964905e-05,
749
+ "loss": 0.3481,
750
+ "step": 2600
751
+ },
752
+ {
753
+ "epoch": 1.516464471403813,
754
+ "grad_norm": 2.2615883350372314,
755
+ "learning_rate": 2.751979456451958e-05,
756
+ "loss": 0.3628,
757
+ "step": 2625
758
+ },
759
+ {
760
+ "epoch": 1.5309069901790873,
761
+ "grad_norm": 12.71334457397461,
762
+ "learning_rate": 2.7252300449390118e-05,
763
+ "loss": 0.3562,
764
+ "step": 2650
765
+ },
766
+ {
767
+ "epoch": 1.5453495089543616,
768
+ "grad_norm": 6.8426408767700195,
769
+ "learning_rate": 2.6984806334260647e-05,
770
+ "loss": 0.4108,
771
+ "step": 2675
772
+ },
773
+ {
774
+ "epoch": 1.5597920277296362,
775
+ "grad_norm": 28.521926879882812,
776
+ "learning_rate": 2.671731221913118e-05,
777
+ "loss": 0.3277,
778
+ "step": 2700
779
+ },
780
+ {
781
+ "epoch": 1.5742345465049103,
782
+ "grad_norm": 5.261911869049072,
783
+ "learning_rate": 2.6449818104001712e-05,
784
+ "loss": 0.3287,
785
+ "step": 2725
786
+ },
787
+ {
788
+ "epoch": 1.588677065280185,
789
+ "grad_norm": 31.970890045166016,
790
+ "learning_rate": 2.6182323988872248e-05,
791
+ "loss": 0.3222,
792
+ "step": 2750
793
+ },
794
+ {
795
+ "epoch": 1.6031195840554593,
796
+ "grad_norm": 11.813735961914062,
797
+ "learning_rate": 2.591482987374278e-05,
798
+ "loss": 0.3125,
799
+ "step": 2775
800
+ },
801
+ {
802
+ "epoch": 1.6175621028307337,
803
+ "grad_norm": 6.268784523010254,
804
+ "learning_rate": 2.5647335758613312e-05,
805
+ "loss": 0.3741,
806
+ "step": 2800
807
+ },
808
+ {
809
+ "epoch": 1.6320046216060082,
810
+ "grad_norm": 7.095576286315918,
811
+ "learning_rate": 2.537984164348384e-05,
812
+ "loss": 0.3723,
813
+ "step": 2825
814
+ },
815
+ {
816
+ "epoch": 1.6464471403812824,
817
+ "grad_norm": 10.156100273132324,
818
+ "learning_rate": 2.511234752835438e-05,
819
+ "loss": 0.3499,
820
+ "step": 2850
821
+ },
822
+ {
823
+ "epoch": 1.660889659156557,
824
+ "grad_norm": 11.29510498046875,
825
+ "learning_rate": 2.484485341322491e-05,
826
+ "loss": 0.29,
827
+ "step": 2875
828
+ },
829
+ {
830
+ "epoch": 1.6753321779318313,
831
+ "grad_norm": 15.906551361083984,
832
+ "learning_rate": 2.4577359298095442e-05,
833
+ "loss": 0.37,
834
+ "step": 2900
835
+ },
836
+ {
837
+ "epoch": 1.6897746967071057,
838
+ "grad_norm": 6.896731853485107,
839
+ "learning_rate": 2.4309865182965978e-05,
840
+ "loss": 0.3577,
841
+ "step": 2925
842
+ },
843
+ {
844
+ "epoch": 1.7042172154823803,
845
+ "grad_norm": 3.6533708572387695,
846
+ "learning_rate": 2.4042371067836507e-05,
847
+ "loss": 0.3122,
848
+ "step": 2950
849
+ },
850
+ {
851
+ "epoch": 1.7186597342576544,
852
+ "grad_norm": 11.97821044921875,
853
+ "learning_rate": 2.3774876952707042e-05,
854
+ "loss": 0.3725,
855
+ "step": 2975
856
+ },
857
+ {
858
+ "epoch": 1.733102253032929,
859
+ "grad_norm": 12.72447395324707,
860
+ "learning_rate": 2.3507382837577575e-05,
861
+ "loss": 0.3379,
862
+ "step": 3000
863
+ },
864
+ {
865
+ "epoch": 1.7475447718082033,
866
+ "grad_norm": 8.284575462341309,
867
+ "learning_rate": 2.3239888722448107e-05,
868
+ "loss": 0.3303,
869
+ "step": 3025
870
+ },
871
+ {
872
+ "epoch": 1.7619872905834777,
873
+ "grad_norm": 7.413320064544678,
874
+ "learning_rate": 2.297239460731864e-05,
875
+ "loss": 0.3731,
876
+ "step": 3050
877
+ },
878
+ {
879
+ "epoch": 1.7764298093587523,
880
+ "grad_norm": 5.650282382965088,
881
+ "learning_rate": 2.2704900492189175e-05,
882
+ "loss": 0.3892,
883
+ "step": 3075
884
+ },
885
+ {
886
+ "epoch": 1.7908723281340264,
887
+ "grad_norm": 4.727357387542725,
888
+ "learning_rate": 2.2437406377059704e-05,
889
+ "loss": 0.3721,
890
+ "step": 3100
891
+ },
892
+ {
893
+ "epoch": 1.805314846909301,
894
+ "grad_norm": 11.350446701049805,
895
+ "learning_rate": 2.216991226193024e-05,
896
+ "loss": 0.3263,
897
+ "step": 3125
898
+ },
899
+ {
900
+ "epoch": 1.8197573656845754,
901
+ "grad_norm": 7.404331684112549,
902
+ "learning_rate": 2.1902418146800772e-05,
903
+ "loss": 0.3016,
904
+ "step": 3150
905
+ },
906
+ {
907
+ "epoch": 1.8341998844598497,
908
+ "grad_norm": 4.940402984619141,
909
+ "learning_rate": 2.1634924031671305e-05,
910
+ "loss": 0.3623,
911
+ "step": 3175
912
+ },
913
+ {
914
+ "epoch": 1.8486424032351243,
915
+ "grad_norm": 5.797356128692627,
916
+ "learning_rate": 2.1367429916541837e-05,
917
+ "loss": 0.3448,
918
+ "step": 3200
919
+ },
920
+ {
921
+ "epoch": 1.8630849220103987,
922
+ "grad_norm": 2.8347394466400146,
923
+ "learning_rate": 2.109993580141237e-05,
924
+ "loss": 0.3467,
925
+ "step": 3225
926
+ },
927
+ {
928
+ "epoch": 1.877527440785673,
929
+ "grad_norm": 6.575459003448486,
930
+ "learning_rate": 2.0832441686282902e-05,
931
+ "loss": 0.3782,
932
+ "step": 3250
933
+ },
934
+ {
935
+ "epoch": 1.8919699595609474,
936
+ "grad_norm": 11.3712797164917,
937
+ "learning_rate": 2.0564947571153438e-05,
938
+ "loss": 0.3759,
939
+ "step": 3275
940
+ },
941
+ {
942
+ "epoch": 1.9064124783362217,
943
+ "grad_norm": 11.227645874023438,
944
+ "learning_rate": 2.0297453456023967e-05,
945
+ "loss": 0.3864,
946
+ "step": 3300
947
+ },
948
+ {
949
+ "epoch": 1.9208549971114963,
950
+ "grad_norm": 24.690820693969727,
951
+ "learning_rate": 2.0029959340894502e-05,
952
+ "loss": 0.3361,
953
+ "step": 3325
954
+ },
955
+ {
956
+ "epoch": 1.9352975158867707,
957
+ "grad_norm": 6.431740760803223,
958
+ "learning_rate": 1.9762465225765035e-05,
959
+ "loss": 0.3138,
960
+ "step": 3350
961
+ },
962
+ {
963
+ "epoch": 1.949740034662045,
964
+ "grad_norm": 6.420506954193115,
965
+ "learning_rate": 1.9494971110635567e-05,
966
+ "loss": 0.282,
967
+ "step": 3375
968
+ },
969
+ {
970
+ "epoch": 1.9641825534373196,
971
+ "grad_norm": 15.711064338684082,
972
+ "learning_rate": 1.92274769955061e-05,
973
+ "loss": 0.3228,
974
+ "step": 3400
975
+ },
976
+ {
977
+ "epoch": 1.9786250722125938,
978
+ "grad_norm": 10.628539085388184,
979
+ "learning_rate": 1.8959982880376635e-05,
980
+ "loss": 0.3787,
981
+ "step": 3425
982
+ },
983
+ {
984
+ "epoch": 1.9930675909878683,
985
+ "grad_norm": 6.874371528625488,
986
+ "learning_rate": 1.8692488765247164e-05,
987
+ "loss": 0.3496,
988
+ "step": 3450
989
+ },
990
+ {
991
+ "epoch": 2.0,
992
+ "eval_explained_variance": 0.6929686665534973,
993
+ "eval_loss": 0.34081029891967773,
994
+ "eval_mae": 0.4530330300331116,
995
+ "eval_mse": 0.3408103287220001,
996
+ "eval_r2": 0.6911017288906998,
997
+ "eval_rmse": 0.5837896466255188,
998
+ "eval_runtime": 22.0954,
999
+ "eval_samples_per_second": 156.684,
1000
+ "eval_steps_per_second": 9.821,
1001
+ "step": 3462
1002
+ }
1003
+ ],
1004
+ "logging_steps": 25,
1005
+ "max_steps": 5193,
1006
+ "num_input_tokens_seen": 0,
1007
+ "num_train_epochs": 3,
1008
+ "save_steps": 500,
1009
+ "total_flos": 7285479708948480.0,
1010
+ "train_batch_size": 8,
1011
+ "trial_name": null,
1012
+ "trial_params": null
1013
+ }
checkpoint-3462/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaad2019aafc631be278d6b739195b1913233b6e829d21c819b09f5aacdc4307
3
+ size 5048
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "target"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "target": 0
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "regression",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.40.1",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cf357382ee41f38190a8ca66e1f4a0c816d0b3701854f3d2302ba19008693f
3
+ size 437955572
runs/Apr30_11-09-01_r-abhishek-autotrain-cbd6adsx-4a7db-kjsdj/events.out.tfevents.1714475341.r-abhishek-autotrain-cbd6adsx-4a7db-kjsdj.150.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:318c82614cb16168d747e16473c85e58186f5632c5cfcdf23e0acc082223b8c2
3
- size 34952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f933fa3df8fdee79e869b68a23f82daa1b7a09d7c5e5d3c6b304e793b0958e
3
+ size 50386
runs/Apr30_11-09-01_r-abhishek-autotrain-cbd6adsx-4a7db-kjsdj/events.out.tfevents.1714476294.r-abhishek-autotrain-cbd6adsx-4a7db-kjsdj.150.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f282edff2098c9a9b21901aad5e01d51223095d298ec32217129efd7a9c788
3
+ size 609
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaad2019aafc631be278d6b739195b1913233b6e829d21c819b09f5aacdc4307
3
+ size 5048
training_params.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-99xmg-r4pqr/autotrain-data",
3
+ "model": "google-bert/bert-base-uncased",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 512,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-99xmg-r4pqr",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "evaluation_strategy": "epoch",
26
+ "username": "abhishek",
27
+ "log": "tensorboard"
28
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff