Rolv-Arild's picture
NbAiLab/bifrost-translation-source-classifier
8135448 verified
{
"architectures": [
"ModernBertForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 2,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 1,
"decoder_bias": true,
"deterministic_flash_attn": false,
"dtype": "float32",
"embedding_dropout": 0.0,
"eos_token_id": 1,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "aeb",
"1": "afr",
"2": "als",
"3": "amh",
"4": "anp",
"5": "apc",
"6": "arb",
"7": "arg",
"8": "ars",
"9": "ary",
"10": "arz",
"11": "asm",
"12": "ast",
"13": "azb",
"14": "azj",
"15": "bak",
"16": "bar",
"17": "bel",
"18": "ben",
"19": "bew",
"20": "bho",
"21": "bod",
"22": "bos",
"23": "bul",
"24": "cat",
"25": "ceb",
"26": "ces",
"27": "che",
"28": "chv",
"29": "ckb",
"30": "cmn",
"31": "cnh",
"32": "cos",
"33": "crh",
"34": "cym",
"35": "dan",
"36": "deu",
"37": "div",
"38": "dzo",
"39": "ekk",
"40": "ell",
"41": "eng",
"42": "epo",
"43": "eus",
"44": "fao",
"45": "fas",
"46": "fij",
"47": "fil",
"48": "fin",
"49": "fra",
"50": "fry",
"51": "fur",
"52": "gaz",
"53": "gla",
"54": "gle",
"55": "glg",
"56": "glk",
"57": "grc",
"58": "gsw",
"59": "guj",
"60": "hac",
"61": "hat",
"62": "hau",
"63": "haw",
"64": "hbo",
"65": "heb",
"66": "hif",
"67": "hil",
"68": "hin",
"69": "hne",
"70": "hrv",
"71": "hsb",
"72": "hun",
"73": "hye",
"74": "hyw",
"75": "iba",
"76": "ibo",
"77": "ilo",
"78": "ind",
"79": "isl",
"80": "ita",
"81": "jav",
"82": "jpn",
"83": "kal",
"84": "kan",
"85": "kat",
"86": "kaz",
"87": "kha",
"88": "khk",
"89": "khm",
"90": "kin",
"91": "kir",
"92": "kiu",
"93": "kmr",
"94": "kor",
"95": "lao",
"96": "lat",
"97": "lim",
"98": "lin",
"99": "lit",
"100": "ltz",
"101": "lug",
"102": "lus",
"103": "lvs",
"104": "mai",
"105": "mal",
"106": "mar",
"107": "mhr",
"108": "mkd",
"109": "mlt",
"110": "mri",
"111": "mww",
"112": "mya",
"113": "nap",
"114": "nde",
"115": "nds",
"116": "new",
"117": "nld",
"118": "nno",
"119": "nob",
"120": "npi",
"121": "nrm",
"122": "nya",
"123": "oci",
"124": "ory",
"125": "oss",
"126": "pan",
"127": "pap",
"128": "pbt",
"129": "plt",
"130": "pnb",
"131": "pol",
"132": "por",
"133": "roh",
"134": "ron",
"135": "rue",
"136": "run",
"137": "rus",
"138": "sah",
"139": "san",
"140": "scn",
"141": "sdh",
"142": "sin",
"143": "slk",
"144": "slv",
"145": "sme",
"146": "smo",
"147": "sna",
"148": "snd",
"149": "som",
"150": "sot",
"151": "spa",
"152": "srd",
"153": "srp",
"154": "sun",
"155": "swe",
"156": "swh",
"157": "tam",
"158": "tat",
"159": "tel",
"160": "tgk",
"161": "tha",
"162": "tir",
"163": "tuk",
"164": "tur",
"165": "tyv",
"166": "udm",
"167": "uig",
"168": "ukr",
"169": "urd",
"170": "uzn",
"171": "uzs",
"172": "vie",
"173": "xho",
"174": "ydd",
"175": "yor",
"176": "yue",
"177": "zea",
"178": "zsm",
"179": "zul"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"aeb": 0,
"afr": 1,
"als": 2,
"amh": 3,
"anp": 4,
"apc": 5,
"arb": 6,
"arg": 7,
"ars": 8,
"ary": 9,
"arz": 10,
"asm": 11,
"ast": 12,
"azb": 13,
"azj": 14,
"bak": 15,
"bar": 16,
"bel": 17,
"ben": 18,
"bew": 19,
"bho": 20,
"bod": 21,
"bos": 22,
"bul": 23,
"cat": 24,
"ceb": 25,
"ces": 26,
"che": 27,
"chv": 28,
"ckb": 29,
"cmn": 30,
"cnh": 31,
"cos": 32,
"crh": 33,
"cym": 34,
"dan": 35,
"deu": 36,
"div": 37,
"dzo": 38,
"ekk": 39,
"ell": 40,
"eng": 41,
"epo": 42,
"eus": 43,
"fao": 44,
"fas": 45,
"fij": 46,
"fil": 47,
"fin": 48,
"fra": 49,
"fry": 50,
"fur": 51,
"gaz": 52,
"gla": 53,
"gle": 54,
"glg": 55,
"glk": 56,
"grc": 57,
"gsw": 58,
"guj": 59,
"hac": 60,
"hat": 61,
"hau": 62,
"haw": 63,
"hbo": 64,
"heb": 65,
"hif": 66,
"hil": 67,
"hin": 68,
"hne": 69,
"hrv": 70,
"hsb": 71,
"hun": 72,
"hye": 73,
"hyw": 74,
"iba": 75,
"ibo": 76,
"ilo": 77,
"ind": 78,
"isl": 79,
"ita": 80,
"jav": 81,
"jpn": 82,
"kal": 83,
"kan": 84,
"kat": 85,
"kaz": 86,
"kha": 87,
"khk": 88,
"khm": 89,
"kin": 90,
"kir": 91,
"kiu": 92,
"kmr": 93,
"kor": 94,
"lao": 95,
"lat": 96,
"lim": 97,
"lin": 98,
"lit": 99,
"ltz": 100,
"lug": 101,
"lus": 102,
"lvs": 103,
"mai": 104,
"mal": 105,
"mar": 106,
"mhr": 107,
"mkd": 108,
"mlt": 109,
"mri": 110,
"mww": 111,
"mya": 112,
"nap": 113,
"nde": 114,
"nds": 115,
"new": 116,
"nld": 117,
"nno": 118,
"nob": 119,
"npi": 120,
"nrm": 121,
"nya": 122,
"oci": 123,
"ory": 124,
"oss": 125,
"pan": 126,
"pap": 127,
"pbt": 128,
"plt": 129,
"pnb": 130,
"pol": 131,
"por": 132,
"roh": 133,
"ron": 134,
"rue": 135,
"run": 136,
"rus": 137,
"sah": 138,
"san": 139,
"scn": 140,
"sdh": 141,
"sin": 142,
"slk": 143,
"slv": 144,
"sme": 145,
"smo": 146,
"sna": 147,
"snd": 148,
"som": 149,
"sot": 150,
"spa": 151,
"srd": 152,
"srp": 153,
"sun": 154,
"swe": 155,
"swh": 156,
"tam": 157,
"tat": 158,
"tel": 159,
"tgk": 160,
"tha": 161,
"tir": 162,
"tuk": 163,
"tur": 164,
"tyv": 165,
"udm": 166,
"uig": 167,
"ukr": 168,
"urd": 169,
"uzn": 170,
"uzs": 171,
"vie": 172,
"xho": 173,
"ydd": 174,
"yor": 175,
"yue": 176,
"zea": 177,
"zsm": 178,
"zul": 179
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 160000,
"mask_token_id": 4,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 0,
"position_embedding_type": "sans_pos",
"problem_type": "single_label_classification",
"repad_logits_with_grad": false,
"sep_token_id": 1,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"transformers_version": "4.57.6",
"vocab_size": 256000
}