Ihor commited on
Commit
8c403f8
·
verified ·
1 Parent(s): ada458d

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. gliner_config.json +4 -4
  2. pytorch_model.bin +2 -2
  3. rng_state.pth +1 -1
  4. trainer_state.json +84 -84
gliner_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "embed_ent_token": true,
5
  "encoder_config": {
6
  "_attn_implementation_autoset": false,
7
- "_name_or_path": "microsoft/deberta-v3-small",
8
  "add_cross_attention": false,
9
  "architectures": null,
10
  "attention_probs_dropout_prob": 0.1,
@@ -51,7 +51,7 @@
51
  "num_attention_heads": 12,
52
  "num_beam_groups": 1,
53
  "num_beams": 1,
54
- "num_hidden_layers": 6,
55
  "num_return_sequences": 1,
56
  "output_attentions": false,
57
  "output_hidden_states": false,
@@ -114,13 +114,13 @@
114
  "max_neg_type_ratio": 1,
115
  "max_types": 30,
116
  "max_width": 12,
117
- "model_name": "microsoft/deberta-v3-small",
118
  "model_type": "gliner",
119
  "name": "span level gliner",
120
  "num_post_fusion_layers": 1,
121
  "num_steps": 20000,
122
  "post_fusion_schema": "",
123
- "pre_fusion": false,
124
  "prev_path": null,
125
  "random_drop": true,
126
  "root_dir": "gliner_logs",
 
4
  "embed_ent_token": true,
5
  "encoder_config": {
6
  "_attn_implementation_autoset": false,
7
+ "_name_or_path": "microsoft/deberta-v3-base",
8
  "add_cross_attention": false,
9
  "architectures": null,
10
  "attention_probs_dropout_prob": 0.1,
 
51
  "num_attention_heads": 12,
52
  "num_beam_groups": 1,
53
  "num_beams": 1,
54
+ "num_hidden_layers": 12,
55
  "num_return_sequences": 1,
56
  "output_attentions": false,
57
  "output_hidden_states": false,
 
114
  "max_neg_type_ratio": 1,
115
  "max_types": 30,
116
  "max_width": 12,
117
+ "model_name": "microsoft/deberta-v3-base",
118
  "model_type": "gliner",
119
  "name": "span level gliner",
120
  "num_post_fusion_layers": 1,
121
  "num_steps": 20000,
122
  "post_fusion_schema": "",
123
+ "pre_fusion": true,
124
  "prev_path": null,
125
  "random_drop": true,
126
  "root_dir": "gliner_logs",
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:472fdbedacfc09c65506d0169de1699692016839757a46d9eeebb43d61aacf51
3
- size 664140326
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:295ba4ee8de1649cae79728427daa5d652dd782dbbe93502a238464edf7621d3
3
+ size 834285094
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:038861b1d03c9404c666d3b4543d50d21a511f4e7bb00cdfb74679716d563627
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef9f48147cb905d24ae3e090ba16f38ea0def956db2043ed6cb06be4b235eda
3
  size 14244
trainer_state.json CHANGED
@@ -10,290 +10,290 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.04505316273202379,
13
- "grad_norm": 1520.9156494140625,
14
  "learning_rate": 1.25e-05,
15
- "loss": 26674.356,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.09010632546404758,
20
- "grad_norm": 3414.407958984375,
21
  "learning_rate": 2.5e-05,
22
- "loss": 923.0112,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.13515948819607138,
27
- "grad_norm": 3830.174560546875,
28
  "learning_rate": 3.7500000000000003e-05,
29
- "loss": 747.1836,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.18021265092809516,
34
- "grad_norm": 5375.5400390625,
35
  "learning_rate": 5e-05,
36
- "loss": 614.7856,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.22526581366011894,
41
- "grad_norm": 2154.57470703125,
42
  "learning_rate": 4.990486745229364e-05,
43
- "loss": 540.1352,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.27031897639214275,
48
- "grad_norm": 3764.433349609375,
49
  "learning_rate": 4.962019382530521e-05,
50
- "loss": 494.5525,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 0.3153721391241665,
55
- "grad_norm": 2386.823974609375,
56
  "learning_rate": 4.914814565722671e-05,
57
- "loss": 463.7285,
58
  "step": 3500
59
  },
60
  {
61
  "epoch": 0.3604253018561903,
62
- "grad_norm": 2336.798583984375,
63
  "learning_rate": 4.849231551964771e-05,
64
- "loss": 437.9698,
65
  "step": 4000
66
  },
67
  {
68
  "epoch": 0.40547846458821407,
69
- "grad_norm": 1636.76123046875,
70
  "learning_rate": 4.765769467591625e-05,
71
- "loss": 417.4882,
72
  "step": 4500
73
  },
74
  {
75
  "epoch": 0.4505316273202379,
76
- "grad_norm": 2203.75830078125,
77
  "learning_rate": 4.665063509461097e-05,
78
- "loss": 404.2696,
79
  "step": 5000
80
  },
81
  {
82
  "epoch": 0.4955847900522617,
83
- "grad_norm": 5339.2021484375,
84
  "learning_rate": 4.54788011072248e-05,
85
- "loss": 392.1308,
86
  "step": 5500
87
  },
88
  {
89
  "epoch": 0.5406379527842855,
90
- "grad_norm": 2440.087890625,
91
  "learning_rate": 4.415111107797445e-05,
92
- "loss": 388.4905,
93
  "step": 6000
94
  },
95
  {
96
  "epoch": 0.5856911155163093,
97
- "grad_norm": 2639.127197265625,
98
  "learning_rate": 4.267766952966369e-05,
99
- "loss": 378.0803,
100
  "step": 6500
101
  },
102
  {
103
  "epoch": 0.630744278248333,
104
- "grad_norm": 2479.664794921875,
105
  "learning_rate": 4.1069690242163484e-05,
106
- "loss": 366.2958,
107
  "step": 7000
108
  },
109
  {
110
  "epoch": 0.6757974409803568,
111
- "grad_norm": 2261.44091796875,
112
  "learning_rate": 3.933941090877615e-05,
113
- "loss": 369.7639,
114
  "step": 7500
115
  },
116
  {
117
  "epoch": 0.7208506037123806,
118
- "grad_norm": 4735.1630859375,
119
  "learning_rate": 3.7500000000000003e-05,
120
- "loss": 359.469,
121
  "step": 8000
122
  },
123
  {
124
  "epoch": 0.7659037664444044,
125
- "grad_norm": 2466.807373046875,
126
  "learning_rate": 3.556545654351749e-05,
127
- "loss": 357.7134,
128
  "step": 8500
129
  },
130
  {
131
  "epoch": 0.8109569291764281,
132
- "grad_norm": 4404.3310546875,
133
  "learning_rate": 3.355050358314172e-05,
134
- "loss": 345.4273,
135
  "step": 9000
136
  },
137
  {
138
  "epoch": 0.856010091908452,
139
- "grad_norm": 3157.4599609375,
140
  "learning_rate": 3.147047612756302e-05,
141
- "loss": 338.9636,
142
  "step": 9500
143
  },
144
  {
145
  "epoch": 0.9010632546404758,
146
- "grad_norm": 2165.22705078125,
147
  "learning_rate": 2.9341204441673266e-05,
148
- "loss": 336.4003,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 0.9461164173724995,
153
- "grad_norm": 2615.2861328125,
154
  "learning_rate": 2.717889356869146e-05,
155
- "loss": 333.9852,
156
  "step": 10500
157
  },
158
  {
159
  "epoch": 0.9911695801045234,
160
- "grad_norm": 1756.0631103515625,
161
  "learning_rate": 2.5e-05,
162
- "loss": 331.2794,
163
  "step": 11000
164
  },
165
  {
166
  "epoch": 1.0,
167
- "eval_loss": 307.733154296875,
168
- "eval_runtime": 64.2711,
169
- "eval_samples_per_second": 153.49,
170
- "eval_steps_per_second": 19.2,
171
  "step": 11098
172
  },
173
  {
174
  "epoch": 1.0362227428365471,
175
- "grad_norm": 3073.70263671875,
176
  "learning_rate": 2.2821106431308544e-05,
177
- "loss": 318.7953,
178
  "step": 11500
179
  },
180
  {
181
  "epoch": 1.081275905568571,
182
- "grad_norm": 2345.203857421875,
183
  "learning_rate": 2.0658795558326743e-05,
184
- "loss": 316.6452,
185
  "step": 12000
186
  },
187
  {
188
  "epoch": 1.1263290683005946,
189
- "grad_norm": 3136.798095703125,
190
  "learning_rate": 1.852952387243698e-05,
191
- "loss": 315.9273,
192
  "step": 12500
193
  },
194
  {
195
  "epoch": 1.1713822310326185,
196
- "grad_norm": 3191.531494140625,
197
  "learning_rate": 1.6449496416858284e-05,
198
- "loss": 310.5562,
199
  "step": 13000
200
  },
201
  {
202
  "epoch": 1.2164353937646424,
203
- "grad_norm": 1755.1630859375,
204
  "learning_rate": 1.443454345648252e-05,
205
- "loss": 311.5838,
206
  "step": 13500
207
  },
208
  {
209
  "epoch": 1.261488556496666,
210
- "grad_norm": 2526.63720703125,
211
  "learning_rate": 1.2500000000000006e-05,
212
- "loss": 308.6302,
213
  "step": 14000
214
  },
215
  {
216
  "epoch": 1.30654171922869,
217
- "grad_norm": 2088.94970703125,
218
  "learning_rate": 1.0660589091223855e-05,
219
- "loss": 307.5582,
220
  "step": 14500
221
  },
222
  {
223
  "epoch": 1.3515948819607138,
224
- "grad_norm": 2669.016845703125,
225
  "learning_rate": 8.930309757836517e-06,
226
- "loss": 301.4395,
227
  "step": 15000
228
  },
229
  {
230
  "epoch": 1.3966480446927374,
231
- "grad_norm": 1843.081298828125,
232
  "learning_rate": 7.3223304703363135e-06,
233
- "loss": 303.276,
234
  "step": 15500
235
  },
236
  {
237
  "epoch": 1.4417012074247613,
238
- "grad_norm": 1839.1763916015625,
239
  "learning_rate": 5.848888922025553e-06,
240
- "loss": 301.4364,
241
  "step": 16000
242
  },
243
  {
244
  "epoch": 1.4867543701567851,
245
- "grad_norm": 1982.204345703125,
246
  "learning_rate": 4.521198892775203e-06,
247
- "loss": 298.6584,
248
  "step": 16500
249
  },
250
  {
251
  "epoch": 1.5318075328888088,
252
- "grad_norm": 3084.31201171875,
253
  "learning_rate": 3.3493649053890326e-06,
254
- "loss": 297.8903,
255
  "step": 17000
256
  },
257
  {
258
  "epoch": 1.5768606956208324,
259
- "grad_norm": 5865.5947265625,
260
  "learning_rate": 2.3423053240837515e-06,
261
- "loss": 298.3912,
262
  "step": 17500
263
  },
264
  {
265
  "epoch": 1.6219138583528565,
266
- "grad_norm": 2195.841796875,
267
  "learning_rate": 1.5076844803522922e-06,
268
- "loss": 291.8482,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 1.6669670210848802,
273
- "grad_norm": 3236.9716796875,
274
  "learning_rate": 8.51854342773295e-07,
275
- "loss": 295.1522,
276
  "step": 18500
277
  },
278
  {
279
  "epoch": 1.7120201838169038,
280
- "grad_norm": 1972.854736328125,
281
  "learning_rate": 3.7980617469479953e-07,
282
- "loss": 302.8398,
283
  "step": 19000
284
  },
285
  {
286
  "epoch": 1.7570733465489279,
287
- "grad_norm": 2315.84521484375,
288
  "learning_rate": 9.513254770636137e-08,
289
- "loss": 298.4053,
290
  "step": 19500
291
  },
292
  {
293
  "epoch": 1.8021265092809515,
294
- "grad_norm": 1987.9676513671875,
295
  "learning_rate": 0.0,
296
- "loss": 299.2154,
297
  "step": 20000
298
  }
299
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.04505316273202379,
13
+ "grad_norm": NaN,
14
  "learning_rate": 1.25e-05,
15
+ "loss": 254949.008,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.09010632546404758,
20
+ "grad_norm": NaN,
21
  "learning_rate": 2.5e-05,
22
+ "loss": 0.0,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.13515948819607138,
27
+ "grad_norm": NaN,
28
  "learning_rate": 3.7500000000000003e-05,
29
+ "loss": 0.0,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.18021265092809516,
34
+ "grad_norm": NaN,
35
  "learning_rate": 5e-05,
36
+ "loss": 0.0,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.22526581366011894,
41
+ "grad_norm": NaN,
42
  "learning_rate": 4.990486745229364e-05,
43
+ "loss": 0.0,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.27031897639214275,
48
+ "grad_norm": NaN,
49
  "learning_rate": 4.962019382530521e-05,
50
+ "loss": 0.0,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 0.3153721391241665,
55
+ "grad_norm": NaN,
56
  "learning_rate": 4.914814565722671e-05,
57
+ "loss": 0.0,
58
  "step": 3500
59
  },
60
  {
61
  "epoch": 0.3604253018561903,
62
+ "grad_norm": NaN,
63
  "learning_rate": 4.849231551964771e-05,
64
+ "loss": 0.0,
65
  "step": 4000
66
  },
67
  {
68
  "epoch": 0.40547846458821407,
69
+ "grad_norm": NaN,
70
  "learning_rate": 4.765769467591625e-05,
71
+ "loss": 0.0,
72
  "step": 4500
73
  },
74
  {
75
  "epoch": 0.4505316273202379,
76
+ "grad_norm": NaN,
77
  "learning_rate": 4.665063509461097e-05,
78
+ "loss": 0.0,
79
  "step": 5000
80
  },
81
  {
82
  "epoch": 0.4955847900522617,
83
+ "grad_norm": NaN,
84
  "learning_rate": 4.54788011072248e-05,
85
+ "loss": 0.0,
86
  "step": 5500
87
  },
88
  {
89
  "epoch": 0.5406379527842855,
90
+ "grad_norm": NaN,
91
  "learning_rate": 4.415111107797445e-05,
92
+ "loss": 0.0,
93
  "step": 6000
94
  },
95
  {
96
  "epoch": 0.5856911155163093,
97
+ "grad_norm": NaN,
98
  "learning_rate": 4.267766952966369e-05,
99
+ "loss": 0.0,
100
  "step": 6500
101
  },
102
  {
103
  "epoch": 0.630744278248333,
104
+ "grad_norm": NaN,
105
  "learning_rate": 4.1069690242163484e-05,
106
+ "loss": 0.0,
107
  "step": 7000
108
  },
109
  {
110
  "epoch": 0.6757974409803568,
111
+ "grad_norm": NaN,
112
  "learning_rate": 3.933941090877615e-05,
113
+ "loss": 0.0,
114
  "step": 7500
115
  },
116
  {
117
  "epoch": 0.7208506037123806,
118
+ "grad_norm": NaN,
119
  "learning_rate": 3.7500000000000003e-05,
120
+ "loss": 0.0,
121
  "step": 8000
122
  },
123
  {
124
  "epoch": 0.7659037664444044,
125
+ "grad_norm": NaN,
126
  "learning_rate": 3.556545654351749e-05,
127
+ "loss": 0.0,
128
  "step": 8500
129
  },
130
  {
131
  "epoch": 0.8109569291764281,
132
+ "grad_norm": NaN,
133
  "learning_rate": 3.355050358314172e-05,
134
+ "loss": 0.0,
135
  "step": 9000
136
  },
137
  {
138
  "epoch": 0.856010091908452,
139
+ "grad_norm": NaN,
140
  "learning_rate": 3.147047612756302e-05,
141
+ "loss": 0.0,
142
  "step": 9500
143
  },
144
  {
145
  "epoch": 0.9010632546404758,
146
+ "grad_norm": NaN,
147
  "learning_rate": 2.9341204441673266e-05,
148
+ "loss": 0.0,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 0.9461164173724995,
153
+ "grad_norm": NaN,
154
  "learning_rate": 2.717889356869146e-05,
155
+ "loss": 0.0,
156
  "step": 10500
157
  },
158
  {
159
  "epoch": 0.9911695801045234,
160
+ "grad_norm": NaN,
161
  "learning_rate": 2.5e-05,
162
+ "loss": 0.0,
163
  "step": 11000
164
  },
165
  {
166
  "epoch": 1.0,
167
+ "eval_loss": NaN,
168
+ "eval_runtime": 98.9814,
169
+ "eval_samples_per_second": 99.665,
170
+ "eval_steps_per_second": 12.467,
171
  "step": 11098
172
  },
173
  {
174
  "epoch": 1.0362227428365471,
175
+ "grad_norm": NaN,
176
  "learning_rate": 2.2821106431308544e-05,
177
+ "loss": 0.0,
178
  "step": 11500
179
  },
180
  {
181
  "epoch": 1.081275905568571,
182
+ "grad_norm": NaN,
183
  "learning_rate": 2.0658795558326743e-05,
184
+ "loss": 0.0,
185
  "step": 12000
186
  },
187
  {
188
  "epoch": 1.1263290683005946,
189
+ "grad_norm": NaN,
190
  "learning_rate": 1.852952387243698e-05,
191
+ "loss": 0.0,
192
  "step": 12500
193
  },
194
  {
195
  "epoch": 1.1713822310326185,
196
+ "grad_norm": NaN,
197
  "learning_rate": 1.6449496416858284e-05,
198
+ "loss": 0.0,
199
  "step": 13000
200
  },
201
  {
202
  "epoch": 1.2164353937646424,
203
+ "grad_norm": NaN,
204
  "learning_rate": 1.443454345648252e-05,
205
+ "loss": 0.0,
206
  "step": 13500
207
  },
208
  {
209
  "epoch": 1.261488556496666,
210
+ "grad_norm": NaN,
211
  "learning_rate": 1.2500000000000006e-05,
212
+ "loss": 0.0,
213
  "step": 14000
214
  },
215
  {
216
  "epoch": 1.30654171922869,
217
+ "grad_norm": NaN,
218
  "learning_rate": 1.0660589091223855e-05,
219
+ "loss": 0.0,
220
  "step": 14500
221
  },
222
  {
223
  "epoch": 1.3515948819607138,
224
+ "grad_norm": NaN,
225
  "learning_rate": 8.930309757836517e-06,
226
+ "loss": 0.0,
227
  "step": 15000
228
  },
229
  {
230
  "epoch": 1.3966480446927374,
231
+ "grad_norm": NaN,
232
  "learning_rate": 7.3223304703363135e-06,
233
+ "loss": 0.0,
234
  "step": 15500
235
  },
236
  {
237
  "epoch": 1.4417012074247613,
238
+ "grad_norm": NaN,
239
  "learning_rate": 5.848888922025553e-06,
240
+ "loss": 0.0,
241
  "step": 16000
242
  },
243
  {
244
  "epoch": 1.4867543701567851,
245
+ "grad_norm": NaN,
246
  "learning_rate": 4.521198892775203e-06,
247
+ "loss": 0.0,
248
  "step": 16500
249
  },
250
  {
251
  "epoch": 1.5318075328888088,
252
+ "grad_norm": NaN,
253
  "learning_rate": 3.3493649053890326e-06,
254
+ "loss": 0.0,
255
  "step": 17000
256
  },
257
  {
258
  "epoch": 1.5768606956208324,
259
+ "grad_norm": NaN,
260
  "learning_rate": 2.3423053240837515e-06,
261
+ "loss": 0.0,
262
  "step": 17500
263
  },
264
  {
265
  "epoch": 1.6219138583528565,
266
+ "grad_norm": NaN,
267
  "learning_rate": 1.5076844803522922e-06,
268
+ "loss": 0.0,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 1.6669670210848802,
273
+ "grad_norm": NaN,
274
  "learning_rate": 8.51854342773295e-07,
275
+ "loss": 0.0,
276
  "step": 18500
277
  },
278
  {
279
  "epoch": 1.7120201838169038,
280
+ "grad_norm": NaN,
281
  "learning_rate": 3.7980617469479953e-07,
282
+ "loss": 0.0,
283
  "step": 19000
284
  },
285
  {
286
  "epoch": 1.7570733465489279,
287
+ "grad_norm": NaN,
288
  "learning_rate": 9.513254770636137e-08,
289
+ "loss": 0.0,
290
  "step": 19500
291
  },
292
  {
293
  "epoch": 1.8021265092809515,
294
+ "grad_norm": NaN,
295
  "learning_rate": 0.0,
296
+ "loss": 0.0,
297
  "step": 20000
298
  }
299
  ],