spanbert-base-cased / config.json
ahmetayrnc's picture
Training in progress, epoch 1
07188d1
raw
history blame
2.24 kB
{
"_name_or_path": "SpanBERT/spanbert-base-cased",
"architectures": [
"BertForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"directionality": "bidi",
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "sd",
"1": "b",
"2": "sv",
"3": "%",
"4": "aa",
"5": "ba",
"6": "fc",
"7": "qw",
"8": "nn",
"9": "bk",
"10": "h",
"11": "qy^d",
"12": "bh",
"13": "^q",
"14": "bf",
"15": "fo_o_fw_\"_by_bc",
"16": "fo_o_fw_by_bc_\"",
"17": "na",
"18": "ad",
"19": "^2",
"20": "b^m",
"21": "qo",
"22": "qh",
"23": "^h",
"24": "ar",
"25": "ng",
"26": "br",
"27": "no",
"28": "fp",
"29": "qrr",
"30": "arp_nd",
"31": "t3",
"32": "oo_co_cc",
"33": "aap_am",
"34": "t1",
"35": "bd",
"36": "^g",
"37": "qw^d",
"38": "fa",
"39": "ft",
"40": "+",
"41": "x",
"42": "ny",
"43": "sv_fx",
"44": "qy_qr",
"45": "ba_fe"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"%": 3,
"+": 40,
"^2": 19,
"^g": 36,
"^h": 23,
"^q": 13,
"aa": 4,
"aap_am": 33,
"ad": 18,
"ar": 24,
"arp_nd": 30,
"b": 1,
"b^m": 20,
"ba": 5,
"ba_fe": 45,
"bd": 35,
"bf": 14,
"bh": 12,
"bk": 9,
"br": 26,
"fa": 38,
"fc": 6,
"fo_o_fw_\"_by_bc": 15,
"fo_o_fw_by_bc_\"": 16,
"fp": 28,
"ft": 39,
"h": 10,
"na": 17,
"ng": 25,
"nn": 8,
"no": 27,
"ny": 42,
"oo_co_cc": 32,
"qh": 22,
"qo": 21,
"qrr": 29,
"qw": 7,
"qw^d": 37,
"qy^d": 11,
"qy_qr": 44,
"sd": 0,
"sv": 2,
"sv_fx": 43,
"t1": 34,
"t3": 31,
"x": 41
},
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"torch_dtype": "float32",
"transformers_version": "4.26.0",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 28996
}