Anmol-Sharma commited on
Commit
9a9ec2c
·
verified ·
1 Parent(s): f34c8fb

Training in progress, epoch 1

Browse files
Files changed (3) hide show
  1. config.json +392 -0
  2. model.safetensors +3 -0
  3. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForSequenceClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "gradient_checkpointing": false,
20
+ "hidden_activation": "gelu",
21
+ "hidden_size": 768,
22
+ "id2label": {
23
+ "0": "Nim",
24
+ "1": "Vue",
25
+ "10": "Stylus",
26
+ "100": "Bluespec",
27
+ "101": "Apex",
28
+ "102": "Hoon",
29
+ "103": "TSV",
30
+ "104": "VCL",
31
+ "105": "Gradle",
32
+ "106": "Ioke",
33
+ "107": "Lex",
34
+ "108": "Visual Basic .NET",
35
+ "109": "Gnuplot",
36
+ "11": "ColdFusion",
37
+ "110": "Common Lisp",
38
+ "111": "JQ",
39
+ "112": "Dockerfile",
40
+ "113": "Fish",
41
+ "114": "SQL",
42
+ "115": "Lean",
43
+ "116": "Kvlang",
44
+ "117": "Ada",
45
+ "118": "YANG",
46
+ "119": "Java",
47
+ "12": "AGS Script",
48
+ "120": "Haskell",
49
+ "121": "Less",
50
+ "122": "Vim Script",
51
+ "123": "PureScript",
52
+ "124": "Scilab",
53
+ "125": "RDoc",
54
+ "126": "HTML+Razor",
55
+ "127": "Ragel in Ruby Host",
56
+ "128": "Go",
57
+ "129": "EJS",
58
+ "13": "Raw token data",
59
+ "130": "PostScript",
60
+ "131": "GLSL",
61
+ "132": "Makefile",
62
+ "133": "FreeMarker",
63
+ "134": "Io",
64
+ "135": "GAS",
65
+ "136": "PLpgSQL",
66
+ "137": "Unity3D Asset",
67
+ "138": "CMake",
68
+ "139": "CoffeeScript",
69
+ "14": "Csound",
70
+ "140": "J",
71
+ "141": "HCL",
72
+ "142": "Edoid",
73
+ "143": "TSX",
74
+ "144": "GDScript",
75
+ "145": "Thrift",
76
+ "146": "DIGITAL Command Language",
77
+ "147": "Kotlin",
78
+ "148": "Visual Basic",
79
+ "149": "Objective-C",
80
+ "15": "Sass",
81
+ "150": "Smali",
82
+ "151": "Fluent",
83
+ "152": "Git Config",
84
+ "153": "Assembly",
85
+ "16": "C",
86
+ "17": "Jupyter Notebook",
87
+ "18": "CODEOWNERS",
88
+ "19": "Rascal",
89
+ "2": "Squirrel",
90
+ "20": "Blade",
91
+ "21": "Eagle",
92
+ "22": "Objective-C++",
93
+ "23": "PHP",
94
+ "24": "Crystal",
95
+ "25": "OCaml",
96
+ "26": "Scheme",
97
+ "27": "Modelica",
98
+ "28": "Rust",
99
+ "29": "Unix Assembly",
100
+ "3": "Gherkin",
101
+ "30": "Shell",
102
+ "31": "JAR Manifest",
103
+ "32": "XML Property List",
104
+ "33": "Erlang",
105
+ "34": "Prolog",
106
+ "35": "ECL",
107
+ "36": "Csound Document",
108
+ "37": "Perl",
109
+ "38": "LookML",
110
+ "39": "Swift",
111
+ "4": "Logtalk",
112
+ "40": "C#",
113
+ "41": "Go Module",
114
+ "42": "Scala",
115
+ "43": "Gettext Catalog",
116
+ "44": "FreeBasic",
117
+ "45": "Adobe Font Metrics",
118
+ "46": "PowerShell",
119
+ "47": "Pascal",
120
+ "48": "Wavefront Object",
121
+ "49": "AsciiDoc",
122
+ "5": "Redcode",
123
+ "50": "Python",
124
+ "51": "Julia",
125
+ "52": "POV-Ray SDL",
126
+ "53": "Starlark",
127
+ "54": "Metal",
128
+ "55": "GAP",
129
+ "56": "Kit",
130
+ "57": "Graphviz (DOT)",
131
+ "58": "TypeScript",
132
+ "59": "Gerber Image",
133
+ "6": "ImageJ Macro",
134
+ "60": "Mathematica",
135
+ "61": "REALbasic",
136
+ "62": "SQF",
137
+ "63": "Solidity",
138
+ "64": "OpenType Feature File",
139
+ "65": "Unknown",
140
+ "66": "Protocol Buffer Text Format",
141
+ "67": "VHDL",
142
+ "68": "Haxe",
143
+ "69": "Twig",
144
+ "7": "Hack",
145
+ "70": "Isabelle",
146
+ "71": "Lua",
147
+ "72": "Groovy",
148
+ "73": "Ignore List",
149
+ "74": "LLVM",
150
+ "75": "JavaScript",
151
+ "76": "MATLAB",
152
+ "77": "Dart",
153
+ "78": "Inform 7",
154
+ "79": "VBScript",
155
+ "8": "PlantUML",
156
+ "80": "KiCad Layout",
157
+ "81": "OpenEdge ABL",
158
+ "82": "Turtle",
159
+ "83": "Open Policy Agent",
160
+ "84": "D",
161
+ "85": "Inno Setup",
162
+ "86": "ApacheConf",
163
+ "87": "C++",
164
+ "88": "SCSS",
165
+ "89": "Ruby",
166
+ "9": "Verilog",
167
+ "90": "Chapel",
168
+ "91": "OpenStep Property List",
169
+ "92": "Fortran Free Form",
170
+ "93": "ObjDump",
171
+ "94": "G-code",
172
+ "95": "PicoLisp",
173
+ "96": "XS",
174
+ "97": "Vim Snippet",
175
+ "98": "R",
176
+ "99": "GDB"
177
+ },
178
+ "initializer_cutoff_factor": 2.0,
179
+ "initializer_range": 0.02,
180
+ "intermediate_size": 1152,
181
+ "label2id": {
182
+ "AGS Script": "12",
183
+ "Ada": "117",
184
+ "Adobe Font Metrics": "45",
185
+ "ApacheConf": "86",
186
+ "Apex": "101",
187
+ "AsciiDoc": "49",
188
+ "Assembly": "153",
189
+ "Blade": "20",
190
+ "Bluespec": "100",
191
+ "C": "16",
192
+ "C#": "40",
193
+ "C++": "87",
194
+ "CMake": "138",
195
+ "CODEOWNERS": "18",
196
+ "Chapel": "90",
197
+ "CoffeeScript": "139",
198
+ "ColdFusion": "11",
199
+ "Common Lisp": "110",
200
+ "Crystal": "24",
201
+ "Csound": "14",
202
+ "Csound Document": "36",
203
+ "D": "84",
204
+ "DIGITAL Command Language": "146",
205
+ "Dart": "77",
206
+ "Dockerfile": "112",
207
+ "ECL": "35",
208
+ "EJS": "129",
209
+ "Eagle": "21",
210
+ "Edoid": "142",
211
+ "Erlang": "33",
212
+ "Fish": "113",
213
+ "Fluent": "151",
214
+ "Fortran Free Form": "92",
215
+ "FreeBasic": "44",
216
+ "FreeMarker": "133",
217
+ "G-code": "94",
218
+ "GAP": "55",
219
+ "GAS": "135",
220
+ "GDB": "99",
221
+ "GDScript": "144",
222
+ "GLSL": "131",
223
+ "Gerber Image": "59",
224
+ "Gettext Catalog": "43",
225
+ "Gherkin": "3",
226
+ "Git Config": "152",
227
+ "Gnuplot": "109",
228
+ "Go": "128",
229
+ "Go Module": "41",
230
+ "Gradle": "105",
231
+ "Graphviz (DOT)": "57",
232
+ "Groovy": "72",
233
+ "HCL": "141",
234
+ "HTML+Razor": "126",
235
+ "Hack": "7",
236
+ "Haskell": "120",
237
+ "Haxe": "68",
238
+ "Hoon": "102",
239
+ "Ignore List": "73",
240
+ "ImageJ Macro": "6",
241
+ "Inform 7": "78",
242
+ "Inno Setup": "85",
243
+ "Io": "134",
244
+ "Ioke": "106",
245
+ "Isabelle": "70",
246
+ "J": "140",
247
+ "JAR Manifest": "31",
248
+ "JQ": "111",
249
+ "Java": "119",
250
+ "JavaScript": "75",
251
+ "Julia": "51",
252
+ "Jupyter Notebook": "17",
253
+ "KiCad Layout": "80",
254
+ "Kit": "56",
255
+ "Kotlin": "147",
256
+ "Kvlang": "116",
257
+ "LLVM": "74",
258
+ "Lean": "115",
259
+ "Less": "121",
260
+ "Lex": "107",
261
+ "Logtalk": "4",
262
+ "LookML": "38",
263
+ "Lua": "71",
264
+ "MATLAB": "76",
265
+ "Makefile": "132",
266
+ "Mathematica": "60",
267
+ "Metal": "54",
268
+ "Modelica": "27",
269
+ "Nim": "0",
270
+ "OCaml": "25",
271
+ "ObjDump": "93",
272
+ "Objective-C": "149",
273
+ "Objective-C++": "22",
274
+ "Open Policy Agent": "83",
275
+ "OpenEdge ABL": "81",
276
+ "OpenStep Property List": "91",
277
+ "OpenType Feature File": "64",
278
+ "PHP": "23",
279
+ "PLpgSQL": "136",
280
+ "POV-Ray SDL": "52",
281
+ "Pascal": "47",
282
+ "Perl": "37",
283
+ "PicoLisp": "95",
284
+ "PlantUML": "8",
285
+ "PostScript": "130",
286
+ "PowerShell": "46",
287
+ "Prolog": "34",
288
+ "Protocol Buffer Text Format": "66",
289
+ "PureScript": "123",
290
+ "Python": "50",
291
+ "R": "98",
292
+ "RDoc": "125",
293
+ "REALbasic": "61",
294
+ "Ragel in Ruby Host": "127",
295
+ "Rascal": "19",
296
+ "Raw token data": "13",
297
+ "Redcode": "5",
298
+ "Ruby": "89",
299
+ "Rust": "28",
300
+ "SCSS": "88",
301
+ "SQF": "62",
302
+ "SQL": "114",
303
+ "Sass": "15",
304
+ "Scala": "42",
305
+ "Scheme": "26",
306
+ "Scilab": "124",
307
+ "Shell": "30",
308
+ "Smali": "150",
309
+ "Solidity": "63",
310
+ "Squirrel": "2",
311
+ "Starlark": "53",
312
+ "Stylus": "10",
313
+ "Swift": "39",
314
+ "TSV": "103",
315
+ "TSX": "143",
316
+ "Thrift": "145",
317
+ "Turtle": "82",
318
+ "Twig": "69",
319
+ "TypeScript": "58",
320
+ "Unity3D Asset": "137",
321
+ "Unix Assembly": "29",
322
+ "Unknown": "65",
323
+ "VBScript": "79",
324
+ "VCL": "104",
325
+ "VHDL": "67",
326
+ "Verilog": "9",
327
+ "Vim Script": "122",
328
+ "Vim Snippet": "97",
329
+ "Visual Basic": "148",
330
+ "Visual Basic .NET": "108",
331
+ "Vue": "1",
332
+ "Wavefront Object": "48",
333
+ "XML Property List": "32",
334
+ "XS": "96",
335
+ "YANG": "118"
336
+ },
337
+ "layer_norm_eps": 1e-05,
338
+ "layer_types": [
339
+ "full_attention",
340
+ "sliding_attention",
341
+ "sliding_attention",
342
+ "full_attention",
343
+ "sliding_attention",
344
+ "sliding_attention",
345
+ "full_attention",
346
+ "sliding_attention",
347
+ "sliding_attention",
348
+ "full_attention",
349
+ "sliding_attention",
350
+ "sliding_attention",
351
+ "full_attention",
352
+ "sliding_attention",
353
+ "sliding_attention",
354
+ "full_attention",
355
+ "sliding_attention",
356
+ "sliding_attention",
357
+ "full_attention",
358
+ "sliding_attention",
359
+ "sliding_attention",
360
+ "full_attention"
361
+ ],
362
+ "local_attention": 128,
363
+ "max_position_embeddings": 8192,
364
+ "mlp_bias": false,
365
+ "mlp_dropout": 0.0,
366
+ "model_type": "modernbert",
367
+ "norm_bias": false,
368
+ "norm_eps": 1e-05,
369
+ "num_attention_heads": 12,
370
+ "num_hidden_layers": 22,
371
+ "pad_token_id": 50283,
372
+ "position_embedding_type": "absolute",
373
+ "problem_type": "single_label_classification",
374
+ "repad_logits_with_grad": false,
375
+ "rope_parameters": {
376
+ "full_attention": {
377
+ "rope_theta": 160000.0,
378
+ "rope_type": "default"
379
+ },
380
+ "sliding_attention": {
381
+ "rope_theta": 10000.0,
382
+ "rope_type": "default"
383
+ }
384
+ },
385
+ "sep_token_id": 50282,
386
+ "sparse_pred_ignore_index": -100,
387
+ "sparse_prediction": false,
388
+ "tie_word_embeddings": true,
389
+ "transformers_version": "5.0.0",
390
+ "use_cache": false,
391
+ "vocab_size": 50368
392
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9ed8cb62eea64c544112cb7f00a2fed2f57b3a1393d0bee7a430002c87e096
3
+ size 598907344
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f562983391125d3ac0ace4d191d7d312a0bed2ab3d25272af12aac6734cf231
3
+ size 5265