abhishek HF staff commited on
Commit
039932b
1 Parent(s): 01560f0

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-regression
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - autotrain-m96nh-snymb/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Regression
15
+
16
+ ## Validation Metrics
17
+ loss: 0.282262921333313
18
+
19
+ mse: 0.2820460796356201
20
+
21
+ mae: 0.4189736545085907
22
+
23
+ r2: 0.74436353679844
24
+
25
+ rmse: 0.5310801267623901
26
+
27
+ explained_variance: 0.7570163011550903
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-5193/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "DebertaV2ForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "target"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "label2id": {
17
+ "target": 0
18
+ },
19
+ "layer_norm_eps": 1e-07,
20
+ "max_position_embeddings": 512,
21
+ "max_relative_positions": -1,
22
+ "model_type": "deberta-v2",
23
+ "norm_rel_ebd": "layer_norm",
24
+ "num_attention_heads": 16,
25
+ "num_hidden_layers": 24,
26
+ "pad_token_id": 0,
27
+ "pooler_dropout": 0,
28
+ "pooler_hidden_act": "gelu",
29
+ "pooler_hidden_size": 1024,
30
+ "pos_att_type": [
31
+ "p2c",
32
+ "c2p"
33
+ ],
34
+ "position_biased_input": false,
35
+ "position_buckets": 256,
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.40.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
checkpoint-5193/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d68d3238d7e01448edfe7b1983dcf19cc58c6bfbb7628f33a97a5690b6e914d
3
+ size 1740300340
checkpoint-5193/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f82c9fa96d35c5b639fda51433bc915f1712871b20849a7d91b7c30ad3c8545
3
+ size 3480832048
checkpoint-5193/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9692e0df6d5b78451d0faa94b4317f3c3cd1a1bdea676ec7954514c02b9cfcdc
3
+ size 15024
checkpoint-5193/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e60d67b3848a732be46492193f28d5c098caad9f51a52ca97768faf56dca6421
3
+ size 15024
checkpoint-5193/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d7e3cf0198b07ae0e4e7c0e7b2fd5824805aaf7c0ec58feda8b2ef7a83c7ef
3
+ size 15024
checkpoint-5193/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c52c0b9880b9a6b76baf795175159ea7ee4f04a9d7ffb3cd483a38f6311db7
3
+ size 15024
checkpoint-5193/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ccb544db0a45b77c836df5bb36152996c97ff1a522be11d72b179afc3d5ba75
3
+ size 1064
checkpoint-5193/trainer_state.json ADDED
@@ -0,0 +1,1509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.282262921333313,
3
+ "best_model_checkpoint": "autotrain-m96nh-snymb/checkpoint-5193",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5193,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.014442518775274409,
13
+ "grad_norm": 212.5823516845703,
14
+ "learning_rate": 7.692307692307692e-08,
15
+ "loss": 10.235,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.028885037550548817,
20
+ "grad_norm": 81.24781799316406,
21
+ "learning_rate": 1.7307692307692305e-07,
22
+ "loss": 10.1291,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.043327556325823226,
27
+ "grad_norm": 302.68389892578125,
28
+ "learning_rate": 2.692307692307692e-07,
29
+ "loss": 9.6898,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.057770075101097634,
34
+ "grad_norm": 327.724853515625,
35
+ "learning_rate": 3.615384615384615e-07,
36
+ "loss": 10.4787,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.07221259387637204,
41
+ "grad_norm": 135.6590576171875,
42
+ "learning_rate": 4.538461538461538e-07,
43
+ "loss": 9.5916,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.08665511265164645,
48
+ "grad_norm": 108.32626342773438,
49
+ "learning_rate": 5.5e-07,
50
+ "loss": 8.8043,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.10109763142692085,
55
+ "grad_norm": 73.20294189453125,
56
+ "learning_rate": 6.461538461538462e-07,
57
+ "loss": 7.1787,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.11554015020219527,
62
+ "grad_norm": 73.47607421875,
63
+ "learning_rate": 7.423076923076923e-07,
64
+ "loss": 5.024,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.12998266897746968,
69
+ "grad_norm": 73.16763305664062,
70
+ "learning_rate": 8.384615384615384e-07,
71
+ "loss": 2.586,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.14442518775274407,
76
+ "grad_norm": 42.77627182006836,
77
+ "learning_rate": 9.346153846153846e-07,
78
+ "loss": 0.9176,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.1588677065280185,
83
+ "grad_norm": 73.87055969238281,
84
+ "learning_rate": 1.0307692307692306e-06,
85
+ "loss": 1.0465,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.1733102253032929,
90
+ "grad_norm": 29.49479103088379,
91
+ "learning_rate": 1.1269230769230768e-06,
92
+ "loss": 0.6761,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.1877527440785673,
97
+ "grad_norm": 78.30584716796875,
98
+ "learning_rate": 1.2230769230769231e-06,
99
+ "loss": 0.5363,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.2021952628538417,
104
+ "grad_norm": 23.788848876953125,
105
+ "learning_rate": 1.3192307692307692e-06,
106
+ "loss": 0.437,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.21663778162911612,
111
+ "grad_norm": 5.925709247589111,
112
+ "learning_rate": 1.4153846153846155e-06,
113
+ "loss": 0.5172,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.23108030040439054,
118
+ "grad_norm": 32.11520767211914,
119
+ "learning_rate": 1.5115384615384615e-06,
120
+ "loss": 0.509,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.24552281917966493,
125
+ "grad_norm": 14.887007713317871,
126
+ "learning_rate": 1.6076923076923076e-06,
127
+ "loss": 0.6094,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.25996533795493937,
132
+ "grad_norm": 31.848352432250977,
133
+ "learning_rate": 1.7038461538461536e-06,
134
+ "loss": 0.5056,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.27440785673021373,
139
+ "grad_norm": 72.35002136230469,
140
+ "learning_rate": 1.8e-06,
141
+ "loss": 0.4752,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.28885037550548814,
146
+ "grad_norm": 12.196418762207031,
147
+ "learning_rate": 1.896153846153846e-06,
148
+ "loss": 0.375,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.30329289428076256,
153
+ "grad_norm": 25.701213836669922,
154
+ "learning_rate": 1.9923076923076923e-06,
155
+ "loss": 0.4961,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.317735413056037,
160
+ "grad_norm": 41.281166076660156,
161
+ "learning_rate": 1.9901562165632354e-06,
162
+ "loss": 0.4455,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.3321779318313114,
167
+ "grad_norm": 19.428579330444336,
168
+ "learning_rate": 1.979456451958057e-06,
169
+ "loss": 0.6306,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.3466204506065858,
174
+ "grad_norm": 18.564756393432617,
175
+ "learning_rate": 1.9687566873528784e-06,
176
+ "loss": 0.399,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.3610629693818602,
181
+ "grad_norm": 41.66643524169922,
182
+ "learning_rate": 1.9580569227476994e-06,
183
+ "loss": 0.4581,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.3755054881571346,
188
+ "grad_norm": 41.7965202331543,
189
+ "learning_rate": 1.947357158142521e-06,
190
+ "loss": 0.4799,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.389948006932409,
195
+ "grad_norm": 9.624334335327148,
196
+ "learning_rate": 1.936657393537342e-06,
197
+ "loss": 0.4632,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.4043905257076834,
202
+ "grad_norm": 11.227400779724121,
203
+ "learning_rate": 1.9259576289321635e-06,
204
+ "loss": 0.3521,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.41883304448295783,
209
+ "grad_norm": 22.89958953857422,
210
+ "learning_rate": 1.915257864326985e-06,
211
+ "loss": 0.4414,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.43327556325823224,
216
+ "grad_norm": 26.13202476501465,
217
+ "learning_rate": 1.904558099721806e-06,
218
+ "loss": 0.3836,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.44771808203350666,
223
+ "grad_norm": 55.14106750488281,
224
+ "learning_rate": 1.8938583351166273e-06,
225
+ "loss": 0.5554,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.4621606008087811,
230
+ "grad_norm": 32.7313346862793,
231
+ "learning_rate": 1.8831585705114485e-06,
232
+ "loss": 0.4613,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.47660311958405543,
237
+ "grad_norm": 14.63770866394043,
238
+ "learning_rate": 1.87245880590627e-06,
239
+ "loss": 0.3672,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.49104563835932985,
244
+ "grad_norm": 39.93565368652344,
245
+ "learning_rate": 1.8617590413010913e-06,
246
+ "loss": 0.495,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.5054881571346043,
251
+ "grad_norm": 13.94717025756836,
252
+ "learning_rate": 1.8510592766959126e-06,
253
+ "loss": 0.3883,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.5199306759098787,
258
+ "grad_norm": 19.33357810974121,
259
+ "learning_rate": 1.8403595120907338e-06,
260
+ "loss": 0.3607,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.5343731946851531,
265
+ "grad_norm": 9.50666618347168,
266
+ "learning_rate": 1.8296597474855553e-06,
267
+ "loss": 0.4309,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.5488157134604275,
272
+ "grad_norm": 99.26518249511719,
273
+ "learning_rate": 1.8189599828803766e-06,
274
+ "loss": 0.4734,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.5632582322357019,
279
+ "grad_norm": 25.336233139038086,
280
+ "learning_rate": 1.8082602182751978e-06,
281
+ "loss": 0.5486,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.5777007510109763,
286
+ "grad_norm": 41.74341583251953,
287
+ "learning_rate": 1.7975604536700191e-06,
288
+ "loss": 0.378,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.5921432697862508,
293
+ "grad_norm": 37.5980224609375,
294
+ "learning_rate": 1.7868606890648406e-06,
295
+ "loss": 0.3274,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.6065857885615251,
300
+ "grad_norm": 46.92686080932617,
301
+ "learning_rate": 1.7761609244596619e-06,
302
+ "loss": 0.3263,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.6210283073367996,
307
+ "grad_norm": 20.776403427124023,
308
+ "learning_rate": 1.7654611598544831e-06,
309
+ "loss": 0.3563,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.635470826112074,
314
+ "grad_norm": 26.412818908691406,
315
+ "learning_rate": 1.7547613952493044e-06,
316
+ "loss": 0.354,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.6499133448873483,
321
+ "grad_norm": 35.234580993652344,
322
+ "learning_rate": 1.7440616306441259e-06,
323
+ "loss": 0.4225,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.6643558636626228,
328
+ "grad_norm": 38.81793212890625,
329
+ "learning_rate": 1.7333618660389472e-06,
330
+ "loss": 0.3189,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.6787983824378971,
335
+ "grad_norm": 26.322595596313477,
336
+ "learning_rate": 1.7226621014337684e-06,
337
+ "loss": 0.3566,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.6932409012131716,
342
+ "grad_norm": 19.16035270690918,
343
+ "learning_rate": 1.7119623368285897e-06,
344
+ "loss": 0.3212,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.707683419988446,
349
+ "grad_norm": 26.820486068725586,
350
+ "learning_rate": 1.7012625722234112e-06,
351
+ "loss": 0.3318,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.7221259387637204,
356
+ "grad_norm": 9.583172798156738,
357
+ "learning_rate": 1.6905628076182322e-06,
358
+ "loss": 0.3306,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.7365684575389948,
363
+ "grad_norm": 67.81623840332031,
364
+ "learning_rate": 1.6798630430130535e-06,
365
+ "loss": 0.3503,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.7510109763142692,
370
+ "grad_norm": 26.901622772216797,
371
+ "learning_rate": 1.6691632784078748e-06,
372
+ "loss": 0.3112,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.7654534950895436,
377
+ "grad_norm": 19.61265754699707,
378
+ "learning_rate": 1.6584635138026963e-06,
379
+ "loss": 0.3354,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.779896013864818,
384
+ "grad_norm": 26.634790420532227,
385
+ "learning_rate": 1.6477637491975175e-06,
386
+ "loss": 0.4247,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.7943385326400925,
391
+ "grad_norm": 12.382301330566406,
392
+ "learning_rate": 1.6370639845923388e-06,
393
+ "loss": 0.3341,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.8087810514153668,
398
+ "grad_norm": 17.31427574157715,
399
+ "learning_rate": 1.62636421998716e-06,
400
+ "loss": 0.3274,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.8232235701906413,
405
+ "grad_norm": 11.828612327575684,
406
+ "learning_rate": 1.6156644553819815e-06,
407
+ "loss": 0.3575,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.8376660889659157,
412
+ "grad_norm": 12.682332038879395,
413
+ "learning_rate": 1.6049646907768028e-06,
414
+ "loss": 0.2944,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.85210860774119,
419
+ "grad_norm": 14.496199607849121,
420
+ "learning_rate": 1.594264926171624e-06,
421
+ "loss": 0.3447,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.8665511265164645,
426
+ "grad_norm": 20.546905517578125,
427
+ "learning_rate": 1.5835651615664453e-06,
428
+ "loss": 0.3162,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.8809936452917388,
433
+ "grad_norm": 12.139562606811523,
434
+ "learning_rate": 1.5728653969612668e-06,
435
+ "loss": 0.3553,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.8954361640670133,
440
+ "grad_norm": 38.39575958251953,
441
+ "learning_rate": 1.562165632356088e-06,
442
+ "loss": 0.3929,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.9098786828422877,
447
+ "grad_norm": 9.470243453979492,
448
+ "learning_rate": 1.5514658677509094e-06,
449
+ "loss": 0.2997,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.9243212016175621,
454
+ "grad_norm": 14.675395965576172,
455
+ "learning_rate": 1.5407661031457306e-06,
456
+ "loss": 0.3275,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.9387637203928365,
461
+ "grad_norm": 3.3243517875671387,
462
+ "learning_rate": 1.5300663385405521e-06,
463
+ "loss": 0.426,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.9532062391681109,
468
+ "grad_norm": 12.924434661865234,
469
+ "learning_rate": 1.5193665739353734e-06,
470
+ "loss": 0.2976,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.9676487579433853,
475
+ "grad_norm": 46.80762481689453,
476
+ "learning_rate": 1.5086668093301947e-06,
477
+ "loss": 0.3499,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.9820912767186597,
482
+ "grad_norm": 15.062408447265625,
483
+ "learning_rate": 1.497967044725016e-06,
484
+ "loss": 0.3225,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.9965337954939342,
489
+ "grad_norm": 22.281009674072266,
490
+ "learning_rate": 1.4872672801198374e-06,
491
+ "loss": 0.4113,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 1.0,
496
+ "eval_explained_variance": 0.7363581657409668,
497
+ "eval_loss": 0.3119768798351288,
498
+ "eval_mae": 0.4405648112297058,
499
+ "eval_mse": 0.31180134415626526,
500
+ "eval_r2": 0.7173944470629221,
501
+ "eval_rmse": 0.5583917498588562,
502
+ "eval_runtime": 80.5336,
503
+ "eval_samples_per_second": 42.988,
504
+ "eval_steps_per_second": 2.695,
505
+ "step": 1731
506
+ },
507
+ {
508
+ "epoch": 1.0109763142692085,
509
+ "grad_norm": 27.233137130737305,
510
+ "learning_rate": 1.4765675155146587e-06,
511
+ "loss": 0.3579,
512
+ "step": 1750
513
+ },
514
+ {
515
+ "epoch": 1.025418833044483,
516
+ "grad_norm": 23.662677764892578,
517
+ "learning_rate": 1.46586775090948e-06,
518
+ "loss": 0.3746,
519
+ "step": 1775
520
+ },
521
+ {
522
+ "epoch": 1.0398613518197575,
523
+ "grad_norm": 27.650678634643555,
524
+ "learning_rate": 1.4551679863043012e-06,
525
+ "loss": 0.3723,
526
+ "step": 1800
527
+ },
528
+ {
529
+ "epoch": 1.0543038705950318,
530
+ "grad_norm": 38.274314880371094,
531
+ "learning_rate": 1.4444682216991227e-06,
532
+ "loss": 0.3705,
533
+ "step": 1825
534
+ },
535
+ {
536
+ "epoch": 1.0687463893703062,
537
+ "grad_norm": 43.43634796142578,
538
+ "learning_rate": 1.433768457093944e-06,
539
+ "loss": 0.2841,
540
+ "step": 1850
541
+ },
542
+ {
543
+ "epoch": 1.0831889081455806,
544
+ "grad_norm": 19.07291030883789,
545
+ "learning_rate": 1.4230686924887652e-06,
546
+ "loss": 0.4436,
547
+ "step": 1875
548
+ },
549
+ {
550
+ "epoch": 1.097631426920855,
551
+ "grad_norm": 36.660614013671875,
552
+ "learning_rate": 1.4123689278835865e-06,
553
+ "loss": 0.3031,
554
+ "step": 1900
555
+ },
556
+ {
557
+ "epoch": 1.1120739456961295,
558
+ "grad_norm": 10.8666353225708,
559
+ "learning_rate": 1.401669163278408e-06,
560
+ "loss": 0.2895,
561
+ "step": 1925
562
+ },
563
+ {
564
+ "epoch": 1.1265164644714039,
565
+ "grad_norm": 34.650394439697266,
566
+ "learning_rate": 1.3909693986732293e-06,
567
+ "loss": 0.3506,
568
+ "step": 1950
569
+ },
570
+ {
571
+ "epoch": 1.1409589832466782,
572
+ "grad_norm": 43.247623443603516,
573
+ "learning_rate": 1.3802696340680503e-06,
574
+ "loss": 0.3555,
575
+ "step": 1975
576
+ },
577
+ {
578
+ "epoch": 1.1554015020219526,
579
+ "grad_norm": 18.716602325439453,
580
+ "learning_rate": 1.3695698694628716e-06,
581
+ "loss": 0.3931,
582
+ "step": 2000
583
+ },
584
+ {
585
+ "epoch": 1.169844020797227,
586
+ "grad_norm": 31.356761932373047,
587
+ "learning_rate": 1.358870104857693e-06,
588
+ "loss": 0.2976,
589
+ "step": 2025
590
+ },
591
+ {
592
+ "epoch": 1.1842865395725015,
593
+ "grad_norm": 18.609111785888672,
594
+ "learning_rate": 1.3481703402525143e-06,
595
+ "loss": 0.3163,
596
+ "step": 2050
597
+ },
598
+ {
599
+ "epoch": 1.1987290583477759,
600
+ "grad_norm": 31.023008346557617,
601
+ "learning_rate": 1.3374705756473356e-06,
602
+ "loss": 0.3454,
603
+ "step": 2075
604
+ },
605
+ {
606
+ "epoch": 1.2131715771230502,
607
+ "grad_norm": 27.93479347229004,
608
+ "learning_rate": 1.3267708110421569e-06,
609
+ "loss": 0.3452,
610
+ "step": 2100
611
+ },
612
+ {
613
+ "epoch": 1.2276140958983246,
614
+ "grad_norm": 23.254547119140625,
615
+ "learning_rate": 1.3160710464369784e-06,
616
+ "loss": 0.3486,
617
+ "step": 2125
618
+ },
619
+ {
620
+ "epoch": 1.242056614673599,
621
+ "grad_norm": 45.776458740234375,
622
+ "learning_rate": 1.3053712818317996e-06,
623
+ "loss": 0.3586,
624
+ "step": 2150
625
+ },
626
+ {
627
+ "epoch": 1.2564991334488735,
628
+ "grad_norm": 14.92525863647461,
629
+ "learning_rate": 1.294671517226621e-06,
630
+ "loss": 0.3338,
631
+ "step": 2175
632
+ },
633
+ {
634
+ "epoch": 1.270941652224148,
635
+ "grad_norm": 20.12270736694336,
636
+ "learning_rate": 1.2839717526214422e-06,
637
+ "loss": 0.3437,
638
+ "step": 2200
639
+ },
640
+ {
641
+ "epoch": 1.2853841709994223,
642
+ "grad_norm": 41.65699005126953,
643
+ "learning_rate": 1.2732719880162636e-06,
644
+ "loss": 0.3264,
645
+ "step": 2225
646
+ },
647
+ {
648
+ "epoch": 1.2998266897746968,
649
+ "grad_norm": 32.03495788574219,
650
+ "learning_rate": 1.262572223411085e-06,
651
+ "loss": 0.3404,
652
+ "step": 2250
653
+ },
654
+ {
655
+ "epoch": 1.314269208549971,
656
+ "grad_norm": 4.864631175994873,
657
+ "learning_rate": 1.2518724588059062e-06,
658
+ "loss": 0.296,
659
+ "step": 2275
660
+ },
661
+ {
662
+ "epoch": 1.3287117273252456,
663
+ "grad_norm": 10.562322616577148,
664
+ "learning_rate": 1.2411726942007275e-06,
665
+ "loss": 0.3442,
666
+ "step": 2300
667
+ },
668
+ {
669
+ "epoch": 1.34315424610052,
670
+ "grad_norm": 33.48724365234375,
671
+ "learning_rate": 1.230472929595549e-06,
672
+ "loss": 0.257,
673
+ "step": 2325
674
+ },
675
+ {
676
+ "epoch": 1.3575967648757943,
677
+ "grad_norm": 19.912137985229492,
678
+ "learning_rate": 1.2197731649903702e-06,
679
+ "loss": 0.2968,
680
+ "step": 2350
681
+ },
682
+ {
683
+ "epoch": 1.3720392836510689,
684
+ "grad_norm": 22.246639251708984,
685
+ "learning_rate": 1.2090734003851915e-06,
686
+ "loss": 0.2793,
687
+ "step": 2375
688
+ },
689
+ {
690
+ "epoch": 1.3864818024263432,
691
+ "grad_norm": 18.22015380859375,
692
+ "learning_rate": 1.1983736357800127e-06,
693
+ "loss": 0.3079,
694
+ "step": 2400
695
+ },
696
+ {
697
+ "epoch": 1.4009243212016176,
698
+ "grad_norm": 15.965062141418457,
699
+ "learning_rate": 1.1876738711748342e-06,
700
+ "loss": 0.335,
701
+ "step": 2425
702
+ },
703
+ {
704
+ "epoch": 1.415366839976892,
705
+ "grad_norm": 20.45452117919922,
706
+ "learning_rate": 1.1769741065696555e-06,
707
+ "loss": 0.3061,
708
+ "step": 2450
709
+ },
710
+ {
711
+ "epoch": 1.4298093587521663,
712
+ "grad_norm": 13.89696216583252,
713
+ "learning_rate": 1.1662743419644768e-06,
714
+ "loss": 0.275,
715
+ "step": 2475
716
+ },
717
+ {
718
+ "epoch": 1.4442518775274409,
719
+ "grad_norm": 35.64567947387695,
720
+ "learning_rate": 1.155574577359298e-06,
721
+ "loss": 0.3471,
722
+ "step": 2500
723
+ },
724
+ {
725
+ "epoch": 1.4586943963027152,
726
+ "grad_norm": 14.65186882019043,
727
+ "learning_rate": 1.1448748127541195e-06,
728
+ "loss": 0.314,
729
+ "step": 2525
730
+ },
731
+ {
732
+ "epoch": 1.4731369150779896,
733
+ "grad_norm": 14.541102409362793,
734
+ "learning_rate": 1.1341750481489408e-06,
735
+ "loss": 0.2916,
736
+ "step": 2550
737
+ },
738
+ {
739
+ "epoch": 1.487579433853264,
740
+ "grad_norm": 37.96781539916992,
741
+ "learning_rate": 1.123475283543762e-06,
742
+ "loss": 0.3849,
743
+ "step": 2575
744
+ },
745
+ {
746
+ "epoch": 1.5020219526285383,
747
+ "grad_norm": 16.675336837768555,
748
+ "learning_rate": 1.1127755189385833e-06,
749
+ "loss": 0.2672,
750
+ "step": 2600
751
+ },
752
+ {
753
+ "epoch": 1.516464471403813,
754
+ "grad_norm": 28.15886116027832,
755
+ "learning_rate": 1.1020757543334048e-06,
756
+ "loss": 0.3031,
757
+ "step": 2625
758
+ },
759
+ {
760
+ "epoch": 1.5309069901790873,
761
+ "grad_norm": 28.914554595947266,
762
+ "learning_rate": 1.091375989728226e-06,
763
+ "loss": 0.2585,
764
+ "step": 2650
765
+ },
766
+ {
767
+ "epoch": 1.5453495089543616,
768
+ "grad_norm": 31.952404022216797,
769
+ "learning_rate": 1.0806762251230471e-06,
770
+ "loss": 0.3405,
771
+ "step": 2675
772
+ },
773
+ {
774
+ "epoch": 1.5597920277296362,
775
+ "grad_norm": 62.01006317138672,
776
+ "learning_rate": 1.0699764605178684e-06,
777
+ "loss": 0.2892,
778
+ "step": 2700
779
+ },
780
+ {
781
+ "epoch": 1.5742345465049103,
782
+ "grad_norm": 15.054553985595703,
783
+ "learning_rate": 1.0592766959126899e-06,
784
+ "loss": 0.2902,
785
+ "step": 2725
786
+ },
787
+ {
788
+ "epoch": 1.588677065280185,
789
+ "grad_norm": 39.178443908691406,
790
+ "learning_rate": 1.0485769313075112e-06,
791
+ "loss": 0.2743,
792
+ "step": 2750
793
+ },
794
+ {
795
+ "epoch": 1.6031195840554593,
796
+ "grad_norm": 43.06193923950195,
797
+ "learning_rate": 1.0378771667023324e-06,
798
+ "loss": 0.2982,
799
+ "step": 2775
800
+ },
801
+ {
802
+ "epoch": 1.6175621028307337,
803
+ "grad_norm": 43.87297821044922,
804
+ "learning_rate": 1.0271774020971537e-06,
805
+ "loss": 0.2902,
806
+ "step": 2800
807
+ },
808
+ {
809
+ "epoch": 1.6320046216060082,
810
+ "grad_norm": 21.78912925720215,
811
+ "learning_rate": 1.0164776374919752e-06,
812
+ "loss": 0.3465,
813
+ "step": 2825
814
+ },
815
+ {
816
+ "epoch": 1.6464471403812824,
817
+ "grad_norm": 15.053204536437988,
818
+ "learning_rate": 1.0057778728867964e-06,
819
+ "loss": 0.3213,
820
+ "step": 2850
821
+ },
822
+ {
823
+ "epoch": 1.660889659156557,
824
+ "grad_norm": 21.79863166809082,
825
+ "learning_rate": 9.950781082816177e-07,
826
+ "loss": 0.3278,
827
+ "step": 2875
828
+ },
829
+ {
830
+ "epoch": 1.6753321779318313,
831
+ "grad_norm": 58.025299072265625,
832
+ "learning_rate": 9.843783436764392e-07,
833
+ "loss": 0.2828,
834
+ "step": 2900
835
+ },
836
+ {
837
+ "epoch": 1.6897746967071057,
838
+ "grad_norm": 22.137096405029297,
839
+ "learning_rate": 9.736785790712605e-07,
840
+ "loss": 0.3023,
841
+ "step": 2925
842
+ },
843
+ {
844
+ "epoch": 1.7042172154823803,
845
+ "grad_norm": 19.531232833862305,
846
+ "learning_rate": 9.629788144660817e-07,
847
+ "loss": 0.3039,
848
+ "step": 2950
849
+ },
850
+ {
851
+ "epoch": 1.7186597342576544,
852
+ "grad_norm": 59.77436065673828,
853
+ "learning_rate": 9.52279049860903e-07,
854
+ "loss": 0.3376,
855
+ "step": 2975
856
+ },
857
+ {
858
+ "epoch": 1.733102253032929,
859
+ "grad_norm": 27.803564071655273,
860
+ "learning_rate": 9.415792852557243e-07,
861
+ "loss": 0.2839,
862
+ "step": 3000
863
+ },
864
+ {
865
+ "epoch": 1.7475447718082033,
866
+ "grad_norm": 21.773244857788086,
867
+ "learning_rate": 9.308795206505456e-07,
868
+ "loss": 0.3372,
869
+ "step": 3025
870
+ },
871
+ {
872
+ "epoch": 1.7619872905834777,
873
+ "grad_norm": 11.421875953674316,
874
+ "learning_rate": 9.201797560453669e-07,
875
+ "loss": 0.3754,
876
+ "step": 3050
877
+ },
878
+ {
879
+ "epoch": 1.7764298093587523,
880
+ "grad_norm": 14.211411476135254,
881
+ "learning_rate": 9.094799914401883e-07,
882
+ "loss": 0.3214,
883
+ "step": 3075
884
+ },
885
+ {
886
+ "epoch": 1.7908723281340264,
887
+ "grad_norm": 43.777278900146484,
888
+ "learning_rate": 8.987802268350096e-07,
889
+ "loss": 0.3508,
890
+ "step": 3100
891
+ },
892
+ {
893
+ "epoch": 1.805314846909301,
894
+ "grad_norm": 38.14100646972656,
895
+ "learning_rate": 8.880804622298309e-07,
896
+ "loss": 0.2535,
897
+ "step": 3125
898
+ },
899
+ {
900
+ "epoch": 1.8197573656845754,
901
+ "grad_norm": 15.347945213317871,
902
+ "learning_rate": 8.773806976246522e-07,
903
+ "loss": 0.3121,
904
+ "step": 3150
905
+ },
906
+ {
907
+ "epoch": 1.8341998844598497,
908
+ "grad_norm": 8.05485725402832,
909
+ "learning_rate": 8.666809330194736e-07,
910
+ "loss": 0.3227,
911
+ "step": 3175
912
+ },
913
+ {
914
+ "epoch": 1.8486424032351243,
915
+ "grad_norm": 11.664706230163574,
916
+ "learning_rate": 8.559811684142948e-07,
917
+ "loss": 0.3061,
918
+ "step": 3200
919
+ },
920
+ {
921
+ "epoch": 1.8630849220103987,
922
+ "grad_norm": 7.515502452850342,
923
+ "learning_rate": 8.452814038091161e-07,
924
+ "loss": 0.2753,
925
+ "step": 3225
926
+ },
927
+ {
928
+ "epoch": 1.877527440785673,
929
+ "grad_norm": 30.233638763427734,
930
+ "learning_rate": 8.345816392039374e-07,
931
+ "loss": 0.3518,
932
+ "step": 3250
933
+ },
934
+ {
935
+ "epoch": 1.8919699595609474,
936
+ "grad_norm": 16.609712600708008,
937
+ "learning_rate": 8.238818745987588e-07,
938
+ "loss": 0.3087,
939
+ "step": 3275
940
+ },
941
+ {
942
+ "epoch": 1.9064124783362217,
943
+ "grad_norm": 12.235444068908691,
944
+ "learning_rate": 8.1318210999358e-07,
945
+ "loss": 0.3224,
946
+ "step": 3300
947
+ },
948
+ {
949
+ "epoch": 1.9208549971114963,
950
+ "grad_norm": 36.453224182128906,
951
+ "learning_rate": 8.024823453884014e-07,
952
+ "loss": 0.3311,
953
+ "step": 3325
954
+ },
955
+ {
956
+ "epoch": 1.9352975158867707,
957
+ "grad_norm": 21.512168884277344,
958
+ "learning_rate": 7.917825807832227e-07,
959
+ "loss": 0.2857,
960
+ "step": 3350
961
+ },
962
+ {
963
+ "epoch": 1.949740034662045,
964
+ "grad_norm": 9.703317642211914,
965
+ "learning_rate": 7.81082816178044e-07,
966
+ "loss": 0.2662,
967
+ "step": 3375
968
+ },
969
+ {
970
+ "epoch": 1.9641825534373196,
971
+ "grad_norm": 17.714481353759766,
972
+ "learning_rate": 7.703830515728653e-07,
973
+ "loss": 0.291,
974
+ "step": 3400
975
+ },
976
+ {
977
+ "epoch": 1.9786250722125938,
978
+ "grad_norm": 22.379777908325195,
979
+ "learning_rate": 7.596832869676867e-07,
980
+ "loss": 0.3267,
981
+ "step": 3425
982
+ },
983
+ {
984
+ "epoch": 1.9930675909878683,
985
+ "grad_norm": 8.563464164733887,
986
+ "learning_rate": 7.48983522362508e-07,
987
+ "loss": 0.2939,
988
+ "step": 3450
989
+ },
990
+ {
991
+ "epoch": 2.0,
992
+ "eval_explained_variance": 0.7469815611839294,
993
+ "eval_loss": 0.31648534536361694,
994
+ "eval_mae": 0.4420657455921173,
995
+ "eval_mse": 0.3162277936935425,
996
+ "eval_r2": 0.7133824489512686,
997
+ "eval_rmse": 0.5623413324356079,
998
+ "eval_runtime": 80.5259,
999
+ "eval_samples_per_second": 42.992,
1000
+ "eval_steps_per_second": 2.695,
1001
+ "step": 3462
1002
+ },
1003
+ {
1004
+ "epoch": 2.0075101097631425,
1005
+ "grad_norm": 31.694887161254883,
1006
+ "learning_rate": 7.382837577573293e-07,
1007
+ "loss": 0.2527,
1008
+ "step": 3475
1009
+ },
1010
+ {
1011
+ "epoch": 2.021952628538417,
1012
+ "grad_norm": 24.721397399902344,
1013
+ "learning_rate": 7.275839931521506e-07,
1014
+ "loss": 0.2815,
1015
+ "step": 3500
1016
+ },
1017
+ {
1018
+ "epoch": 2.0363951473136916,
1019
+ "grad_norm": 33.44636917114258,
1020
+ "learning_rate": 7.16884228546972e-07,
1021
+ "loss": 0.3586,
1022
+ "step": 3525
1023
+ },
1024
+ {
1025
+ "epoch": 2.050837666088966,
1026
+ "grad_norm": 44.72824478149414,
1027
+ "learning_rate": 7.061844639417933e-07,
1028
+ "loss": 0.3163,
1029
+ "step": 3550
1030
+ },
1031
+ {
1032
+ "epoch": 2.0652801848642404,
1033
+ "grad_norm": 22.65967559814453,
1034
+ "learning_rate": 6.954846993366146e-07,
1035
+ "loss": 0.2523,
1036
+ "step": 3575
1037
+ },
1038
+ {
1039
+ "epoch": 2.079722703639515,
1040
+ "grad_norm": 9.611360549926758,
1041
+ "learning_rate": 6.847849347314358e-07,
1042
+ "loss": 0.2609,
1043
+ "step": 3600
1044
+ },
1045
+ {
1046
+ "epoch": 2.094165222414789,
1047
+ "grad_norm": 19.328899383544922,
1048
+ "learning_rate": 6.740851701262572e-07,
1049
+ "loss": 0.3327,
1050
+ "step": 3625
1051
+ },
1052
+ {
1053
+ "epoch": 2.1086077411900637,
1054
+ "grad_norm": 17.564197540283203,
1055
+ "learning_rate": 6.633854055210784e-07,
1056
+ "loss": 0.2777,
1057
+ "step": 3650
1058
+ },
1059
+ {
1060
+ "epoch": 2.123050259965338,
1061
+ "grad_norm": 35.05995178222656,
1062
+ "learning_rate": 6.526856409158998e-07,
1063
+ "loss": 0.2516,
1064
+ "step": 3675
1065
+ },
1066
+ {
1067
+ "epoch": 2.1374927787406124,
1068
+ "grad_norm": 17.389116287231445,
1069
+ "learning_rate": 6.419858763107211e-07,
1070
+ "loss": 0.2387,
1071
+ "step": 3700
1072
+ },
1073
+ {
1074
+ "epoch": 2.151935297515887,
1075
+ "grad_norm": 18.017724990844727,
1076
+ "learning_rate": 6.312861117055425e-07,
1077
+ "loss": 0.2764,
1078
+ "step": 3725
1079
+ },
1080
+ {
1081
+ "epoch": 2.166377816291161,
1082
+ "grad_norm": 13.583812713623047,
1083
+ "learning_rate": 6.205863471003637e-07,
1084
+ "loss": 0.2717,
1085
+ "step": 3750
1086
+ },
1087
+ {
1088
+ "epoch": 2.1808203350664357,
1089
+ "grad_norm": 18.499242782592773,
1090
+ "learning_rate": 6.098865824951851e-07,
1091
+ "loss": 0.2439,
1092
+ "step": 3775
1093
+ },
1094
+ {
1095
+ "epoch": 2.19526285384171,
1096
+ "grad_norm": 17.863845825195312,
1097
+ "learning_rate": 5.991868178900064e-07,
1098
+ "loss": 0.2498,
1099
+ "step": 3800
1100
+ },
1101
+ {
1102
+ "epoch": 2.2097053726169844,
1103
+ "grad_norm": 42.79360580444336,
1104
+ "learning_rate": 5.884870532848277e-07,
1105
+ "loss": 0.2986,
1106
+ "step": 3825
1107
+ },
1108
+ {
1109
+ "epoch": 2.224147891392259,
1110
+ "grad_norm": 18.10019302368164,
1111
+ "learning_rate": 5.77787288679649e-07,
1112
+ "loss": 0.2692,
1113
+ "step": 3850
1114
+ },
1115
+ {
1116
+ "epoch": 2.238590410167533,
1117
+ "grad_norm": 52.769935607910156,
1118
+ "learning_rate": 5.670875240744704e-07,
1119
+ "loss": 0.3265,
1120
+ "step": 3875
1121
+ },
1122
+ {
1123
+ "epoch": 2.2530329289428077,
1124
+ "grad_norm": 42.038516998291016,
1125
+ "learning_rate": 5.563877594692917e-07,
1126
+ "loss": 0.3196,
1127
+ "step": 3900
1128
+ },
1129
+ {
1130
+ "epoch": 2.267475447718082,
1131
+ "grad_norm": 14.1666898727417,
1132
+ "learning_rate": 5.45687994864113e-07,
1133
+ "loss": 0.2888,
1134
+ "step": 3925
1135
+ },
1136
+ {
1137
+ "epoch": 2.2819179664933564,
1138
+ "grad_norm": 16.471778869628906,
1139
+ "learning_rate": 5.349882302589342e-07,
1140
+ "loss": 0.2782,
1141
+ "step": 3950
1142
+ },
1143
+ {
1144
+ "epoch": 2.296360485268631,
1145
+ "grad_norm": 9.197157859802246,
1146
+ "learning_rate": 5.242884656537556e-07,
1147
+ "loss": 0.3127,
1148
+ "step": 3975
1149
+ },
1150
+ {
1151
+ "epoch": 2.310803004043905,
1152
+ "grad_norm": 19.208568572998047,
1153
+ "learning_rate": 5.135887010485768e-07,
1154
+ "loss": 0.2572,
1155
+ "step": 4000
1156
+ },
1157
+ {
1158
+ "epoch": 2.3252455228191797,
1159
+ "grad_norm": 5.966078758239746,
1160
+ "learning_rate": 5.028889364433982e-07,
1161
+ "loss": 0.2631,
1162
+ "step": 4025
1163
+ },
1164
+ {
1165
+ "epoch": 2.339688041594454,
1166
+ "grad_norm": 27.037731170654297,
1167
+ "learning_rate": 4.921891718382196e-07,
1168
+ "loss": 0.2794,
1169
+ "step": 4050
1170
+ },
1171
+ {
1172
+ "epoch": 2.3541305603697285,
1173
+ "grad_norm": 39.20252990722656,
1174
+ "learning_rate": 4.814894072330409e-07,
1175
+ "loss": 0.2656,
1176
+ "step": 4075
1177
+ },
1178
+ {
1179
+ "epoch": 2.368573079145003,
1180
+ "grad_norm": 32.399147033691406,
1181
+ "learning_rate": 4.7078964262786213e-07,
1182
+ "loss": 0.2654,
1183
+ "step": 4100
1184
+ },
1185
+ {
1186
+ "epoch": 2.383015597920277,
1187
+ "grad_norm": 23.706451416015625,
1188
+ "learning_rate": 4.6008987802268346e-07,
1189
+ "loss": 0.2887,
1190
+ "step": 4125
1191
+ },
1192
+ {
1193
+ "epoch": 2.3974581166955518,
1194
+ "grad_norm": 15.86970043182373,
1195
+ "learning_rate": 4.493901134175048e-07,
1196
+ "loss": 0.2568,
1197
+ "step": 4150
1198
+ },
1199
+ {
1200
+ "epoch": 2.4119006354708263,
1201
+ "grad_norm": 27.933916091918945,
1202
+ "learning_rate": 4.386903488123261e-07,
1203
+ "loss": 0.2536,
1204
+ "step": 4175
1205
+ },
1206
+ {
1207
+ "epoch": 2.4263431542461005,
1208
+ "grad_norm": 16.812334060668945,
1209
+ "learning_rate": 4.279905842071474e-07,
1210
+ "loss": 0.2542,
1211
+ "step": 4200
1212
+ },
1213
+ {
1214
+ "epoch": 2.440785673021375,
1215
+ "grad_norm": 38.82505416870117,
1216
+ "learning_rate": 4.172908196019687e-07,
1217
+ "loss": 0.2872,
1218
+ "step": 4225
1219
+ },
1220
+ {
1221
+ "epoch": 2.455228191796649,
1222
+ "grad_norm": 23.149492263793945,
1223
+ "learning_rate": 4.0659105499679e-07,
1224
+ "loss": 0.2818,
1225
+ "step": 4250
1226
+ },
1227
+ {
1228
+ "epoch": 2.4696707105719238,
1229
+ "grad_norm": 43.19930648803711,
1230
+ "learning_rate": 3.9589129039161134e-07,
1231
+ "loss": 0.213,
1232
+ "step": 4275
1233
+ },
1234
+ {
1235
+ "epoch": 2.484113229347198,
1236
+ "grad_norm": 23.671152114868164,
1237
+ "learning_rate": 3.8519152578643266e-07,
1238
+ "loss": 0.3698,
1239
+ "step": 4300
1240
+ },
1241
+ {
1242
+ "epoch": 2.4985557481224725,
1243
+ "grad_norm": 88.69607543945312,
1244
+ "learning_rate": 3.74491761181254e-07,
1245
+ "loss": 0.2968,
1246
+ "step": 4325
1247
+ },
1248
+ {
1249
+ "epoch": 2.512998266897747,
1250
+ "grad_norm": 9.653864860534668,
1251
+ "learning_rate": 3.637919965760753e-07,
1252
+ "loss": 0.2777,
1253
+ "step": 4350
1254
+ },
1255
+ {
1256
+ "epoch": 2.527440785673021,
1257
+ "grad_norm": 11.768026351928711,
1258
+ "learning_rate": 3.5309223197089663e-07,
1259
+ "loss": 0.3236,
1260
+ "step": 4375
1261
+ },
1262
+ {
1263
+ "epoch": 2.541883304448296,
1264
+ "grad_norm": 15.171217918395996,
1265
+ "learning_rate": 3.423924673657179e-07,
1266
+ "loss": 0.3077,
1267
+ "step": 4400
1268
+ },
1269
+ {
1270
+ "epoch": 2.5563258232235704,
1271
+ "grad_norm": 9.879386901855469,
1272
+ "learning_rate": 3.316927027605392e-07,
1273
+ "loss": 0.2684,
1274
+ "step": 4425
1275
+ },
1276
+ {
1277
+ "epoch": 2.5707683419988445,
1278
+ "grad_norm": 9.355985641479492,
1279
+ "learning_rate": 3.2099293815536054e-07,
1280
+ "loss": 0.2526,
1281
+ "step": 4450
1282
+ },
1283
+ {
1284
+ "epoch": 2.585210860774119,
1285
+ "grad_norm": 4.87063455581665,
1286
+ "learning_rate": 3.1029317355018186e-07,
1287
+ "loss": 0.2689,
1288
+ "step": 4475
1289
+ },
1290
+ {
1291
+ "epoch": 2.5996533795493937,
1292
+ "grad_norm": 20.083267211914062,
1293
+ "learning_rate": 2.995934089450032e-07,
1294
+ "loss": 0.259,
1295
+ "step": 4500
1296
+ },
1297
+ {
1298
+ "epoch": 2.614095898324668,
1299
+ "grad_norm": 12.317808151245117,
1300
+ "learning_rate": 2.888936443398245e-07,
1301
+ "loss": 0.2232,
1302
+ "step": 4525
1303
+ },
1304
+ {
1305
+ "epoch": 2.628538417099942,
1306
+ "grad_norm": 28.255945205688477,
1307
+ "learning_rate": 2.7819387973464583e-07,
1308
+ "loss": 0.2466,
1309
+ "step": 4550
1310
+ },
1311
+ {
1312
+ "epoch": 2.6429809358752165,
1313
+ "grad_norm": 72.9136734008789,
1314
+ "learning_rate": 2.674941151294671e-07,
1315
+ "loss": 0.2693,
1316
+ "step": 4575
1317
+ },
1318
+ {
1319
+ "epoch": 2.657423454650491,
1320
+ "grad_norm": 44.20970153808594,
1321
+ "learning_rate": 2.567943505242884e-07,
1322
+ "loss": 0.2527,
1323
+ "step": 4600
1324
+ },
1325
+ {
1326
+ "epoch": 2.6718659734257653,
1327
+ "grad_norm": 17.912519454956055,
1328
+ "learning_rate": 2.460945859191098e-07,
1329
+ "loss": 0.2706,
1330
+ "step": 4625
1331
+ },
1332
+ {
1333
+ "epoch": 2.68630849220104,
1334
+ "grad_norm": 48.64137649536133,
1335
+ "learning_rate": 2.3539482131393107e-07,
1336
+ "loss": 0.2795,
1337
+ "step": 4650
1338
+ },
1339
+ {
1340
+ "epoch": 2.7007510109763144,
1341
+ "grad_norm": 39.6313362121582,
1342
+ "learning_rate": 2.246950567087524e-07,
1343
+ "loss": 0.2669,
1344
+ "step": 4675
1345
+ },
1346
+ {
1347
+ "epoch": 2.7151935297515886,
1348
+ "grad_norm": 12.336877822875977,
1349
+ "learning_rate": 2.139952921035737e-07,
1350
+ "loss": 0.2791,
1351
+ "step": 4700
1352
+ },
1353
+ {
1354
+ "epoch": 2.729636048526863,
1355
+ "grad_norm": 11.376914024353027,
1356
+ "learning_rate": 2.03295527498395e-07,
1357
+ "loss": 0.2599,
1358
+ "step": 4725
1359
+ },
1360
+ {
1361
+ "epoch": 2.7440785673021377,
1362
+ "grad_norm": 26.109207153320312,
1363
+ "learning_rate": 1.9259576289321633e-07,
1364
+ "loss": 0.3112,
1365
+ "step": 4750
1366
+ },
1367
+ {
1368
+ "epoch": 2.758521086077412,
1369
+ "grad_norm": 13.475809097290039,
1370
+ "learning_rate": 1.8189599828803765e-07,
1371
+ "loss": 0.2797,
1372
+ "step": 4775
1373
+ },
1374
+ {
1375
+ "epoch": 2.7729636048526864,
1376
+ "grad_norm": 23.833911895751953,
1377
+ "learning_rate": 1.7119623368285895e-07,
1378
+ "loss": 0.264,
1379
+ "step": 4800
1380
+ },
1381
+ {
1382
+ "epoch": 2.7874061236279606,
1383
+ "grad_norm": 7.037588119506836,
1384
+ "learning_rate": 1.6049646907768027e-07,
1385
+ "loss": 0.3202,
1386
+ "step": 4825
1387
+ },
1388
+ {
1389
+ "epoch": 2.801848642403235,
1390
+ "grad_norm": 36.29332733154297,
1391
+ "learning_rate": 1.497967044725016e-07,
1392
+ "loss": 0.286,
1393
+ "step": 4850
1394
+ },
1395
+ {
1396
+ "epoch": 2.8162911611785093,
1397
+ "grad_norm": 10.196252822875977,
1398
+ "learning_rate": 1.3909693986732292e-07,
1399
+ "loss": 0.3008,
1400
+ "step": 4875
1401
+ },
1402
+ {
1403
+ "epoch": 2.830733679953784,
1404
+ "grad_norm": 27.923114776611328,
1405
+ "learning_rate": 1.283971752621442e-07,
1406
+ "loss": 0.2778,
1407
+ "step": 4900
1408
+ },
1409
+ {
1410
+ "epoch": 2.8451761987290585,
1411
+ "grad_norm": 5.924576282501221,
1412
+ "learning_rate": 1.1769741065696553e-07,
1413
+ "loss": 0.2237,
1414
+ "step": 4925
1415
+ },
1416
+ {
1417
+ "epoch": 2.8596187175043326,
1418
+ "grad_norm": 16.432357788085938,
1419
+ "learning_rate": 1.0699764605178686e-07,
1420
+ "loss": 0.2532,
1421
+ "step": 4950
1422
+ },
1423
+ {
1424
+ "epoch": 2.874061236279607,
1425
+ "grad_norm": 10.846713066101074,
1426
+ "learning_rate": 9.629788144660816e-08,
1427
+ "loss": 0.2277,
1428
+ "step": 4975
1429
+ },
1430
+ {
1431
+ "epoch": 2.8885037550548818,
1432
+ "grad_norm": 21.979785919189453,
1433
+ "learning_rate": 8.559811684142947e-08,
1434
+ "loss": 0.3065,
1435
+ "step": 5000
1436
+ },
1437
+ {
1438
+ "epoch": 2.902946273830156,
1439
+ "grad_norm": 17.25764274597168,
1440
+ "learning_rate": 7.48983522362508e-08,
1441
+ "loss": 0.2449,
1442
+ "step": 5025
1443
+ },
1444
+ {
1445
+ "epoch": 2.9173887926054305,
1446
+ "grad_norm": 20.356718063354492,
1447
+ "learning_rate": 6.41985876310721e-08,
1448
+ "loss": 0.2357,
1449
+ "step": 5050
1450
+ },
1451
+ {
1452
+ "epoch": 2.9318313113807046,
1453
+ "grad_norm": 18.828088760375977,
1454
+ "learning_rate": 5.349882302589343e-08,
1455
+ "loss": 0.2329,
1456
+ "step": 5075
1457
+ },
1458
+ {
1459
+ "epoch": 2.946273830155979,
1460
+ "grad_norm": 10.398417472839355,
1461
+ "learning_rate": 4.279905842071474e-08,
1462
+ "loss": 0.2279,
1463
+ "step": 5100
1464
+ },
1465
+ {
1466
+ "epoch": 2.9607163489312533,
1467
+ "grad_norm": 40.09988784790039,
1468
+ "learning_rate": 3.209929381553605e-08,
1469
+ "loss": 0.2337,
1470
+ "step": 5125
1471
+ },
1472
+ {
1473
+ "epoch": 2.975158867706528,
1474
+ "grad_norm": 22.620283126831055,
1475
+ "learning_rate": 2.139952921035737e-08,
1476
+ "loss": 0.2783,
1477
+ "step": 5150
1478
+ },
1479
+ {
1480
+ "epoch": 2.9896013864818025,
1481
+ "grad_norm": 33.585792541503906,
1482
+ "learning_rate": 1.0699764605178684e-08,
1483
+ "loss": 0.2726,
1484
+ "step": 5175
1485
+ },
1486
+ {
1487
+ "epoch": 3.0,
1488
+ "eval_explained_variance": 0.7570163011550903,
1489
+ "eval_loss": 0.282262921333313,
1490
+ "eval_mae": 0.4189736545085907,
1491
+ "eval_mse": 0.2820460796356201,
1492
+ "eval_r2": 0.74436353679844,
1493
+ "eval_rmse": 0.5310801267623901,
1494
+ "eval_runtime": 80.5385,
1495
+ "eval_samples_per_second": 42.986,
1496
+ "eval_steps_per_second": 2.694,
1497
+ "step": 5193
1498
+ }
1499
+ ],
1500
+ "logging_steps": 25,
1501
+ "max_steps": 5193,
1502
+ "num_input_tokens_seen": 0,
1503
+ "num_train_epochs": 3,
1504
+ "save_steps": 500,
1505
+ "total_flos": 7.743257397795226e+16,
1506
+ "train_batch_size": 2,
1507
+ "trial_name": null,
1508
+ "trial_params": null
1509
+ }
checkpoint-5193/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270da9b099b2d02a736480bd012939e9b5f02ea692f21c4e08c4925dc1f458dc
3
+ size 5048
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "_num_labels": 1,
4
+ "architectures": [
5
+ "DebertaV2ForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "target"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "label2id": {
17
+ "target": 0
18
+ },
19
+ "layer_norm_eps": 1e-07,
20
+ "max_position_embeddings": 512,
21
+ "max_relative_positions": -1,
22
+ "model_type": "deberta-v2",
23
+ "norm_rel_ebd": "layer_norm",
24
+ "num_attention_heads": 16,
25
+ "num_hidden_layers": 24,
26
+ "pad_token_id": 0,
27
+ "pooler_dropout": 0,
28
+ "pooler_hidden_act": "gelu",
29
+ "pooler_hidden_size": 1024,
30
+ "pos_att_type": [
31
+ "p2c",
32
+ "c2p"
33
+ ],
34
+ "position_biased_input": false,
35
+ "position_buckets": 256,
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.40.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d68d3238d7e01448edfe7b1983dcf19cc58c6bfbb7628f33a97a5690b6e914d
3
+ size 1740300340
runs/Apr29_14-53-13_r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p/events.out.tfevents.1714402394.r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p.464.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:129c6f6a91995200e4332cddbe1b7a5c6e4e8894bfdbe4f4a000e8a5d32483fd
3
- size 46485
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d3054238590a97382b063b811087cd93da94519af24987471b0223a836fdde
3
+ size 50525
runs/Apr29_14-53-13_r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p/events.out.tfevents.1714407850.r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p.464.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b9b3dff0d2329581b485dd6e9867790b2a9b813bd88a0ec2b931a09c093664
3
+ size 609
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270da9b099b2d02a736480bd012939e9b5f02ea692f21c4e08c4925dc1f458dc
3
+ size 5048
training_params.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-m96nh-snymb/autotrain-data",
3
+ "model": "microsoft/deberta-v3-large",
4
+ "lr": 2e-06,
5
+ "epochs": 3,
6
+ "max_seq_length": 1024,
7
+ "batch_size": 2,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-m96nh-snymb",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "evaluation_strategy": "epoch",
26
+ "username": "abhishek",
27
+ "log": "tensorboard"
28
+ }