bert-base-russian-upos / config.json
KoichiYasuoka's picture
initial release
6eb64fd
{
"architectures": [
"BertForTokenClassification"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"directionality": "bidi",
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "ADJ",
"1": "ADJ+NOUN",
"2": "ADP",
"3": "ADP+PRON",
"4": "ADV",
"5": "AUX",
"6": "B-ADJ",
"7": "B-ADJ+ADJ",
"8": "B-ADJ+NOUN",
"9": "B-ADJ+SCONJ",
"10": "B-ADJ+VERB",
"11": "B-ADP",
"12": "B-ADP+DET",
"13": "B-ADV",
"14": "B-ADV+VERB",
"15": "B-AUX",
"16": "B-CCONJ",
"17": "B-DET",
"18": "B-INTJ",
"19": "B-NOUN",
"20": "B-NUM",
"21": "B-NUM+ADJ",
"22": "B-NUM+NOUN",
"23": "B-PART",
"24": "B-PART+PRON",
"25": "B-PRON",
"26": "B-PRON+PART",
"27": "B-PROPN",
"28": "B-PUNCT",
"29": "B-SCONJ",
"30": "B-SYM",
"31": "B-SYM+NUM",
"32": "B-VERB",
"33": "B-VERB+ADV",
"34": "B-VERB+PRON",
"35": "B-X",
"36": "B-X+X",
"37": "CCONJ",
"38": "DET",
"39": "I-ADJ",
"40": "I-ADJ+ADJ",
"41": "I-ADJ+NOUN",
"42": "I-ADJ+SCONJ",
"43": "I-ADJ+VERB",
"44": "I-ADP",
"45": "I-ADP+DET",
"46": "I-ADV",
"47": "I-ADV+VERB",
"48": "I-AUX",
"49": "I-CCONJ",
"50": "I-DET",
"51": "I-INTJ",
"52": "I-NOUN",
"53": "I-NUM",
"54": "I-NUM+ADJ",
"55": "I-NUM+NOUN",
"56": "I-PART",
"57": "I-PART+PRON",
"58": "I-PRON",
"59": "I-PRON+PART",
"60": "I-PROPN",
"61": "I-PUNCT",
"62": "I-SCONJ",
"63": "I-SYM",
"64": "I-SYM+NUM",
"65": "I-VERB",
"66": "I-VERB+ADV",
"67": "I-VERB+PRON",
"68": "I-X",
"69": "I-X+X",
"70": "INTJ",
"71": "NOUN",
"72": "NUM",
"73": "NUM+NOUN",
"74": "NUM+PROPN",
"75": "NUM+SYM",
"76": "PART",
"77": "PRON",
"78": "PROPN",
"79": "PUNCT",
"80": "SCONJ",
"81": "SYM",
"82": "SYM+NOUN",
"83": "SYM+NUM",
"84": "SYM+PROPN",
"85": "VERB",
"86": "VERB+ADV",
"87": "VERB+PRON",
"88": "X"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"ADJ": 0,
"ADJ+NOUN": 1,
"ADP": 2,
"ADP+PRON": 3,
"ADV": 4,
"AUX": 5,
"B-ADJ": 6,
"B-ADJ+ADJ": 7,
"B-ADJ+NOUN": 8,
"B-ADJ+SCONJ": 9,
"B-ADJ+VERB": 10,
"B-ADP": 11,
"B-ADP+DET": 12,
"B-ADV": 13,
"B-ADV+VERB": 14,
"B-AUX": 15,
"B-CCONJ": 16,
"B-DET": 17,
"B-INTJ": 18,
"B-NOUN": 19,
"B-NUM": 20,
"B-NUM+ADJ": 21,
"B-NUM+NOUN": 22,
"B-PART": 23,
"B-PART+PRON": 24,
"B-PRON": 25,
"B-PRON+PART": 26,
"B-PROPN": 27,
"B-PUNCT": 28,
"B-SCONJ": 29,
"B-SYM": 30,
"B-SYM+NUM": 31,
"B-VERB": 32,
"B-VERB+ADV": 33,
"B-VERB+PRON": 34,
"B-X": 35,
"B-X+X": 36,
"CCONJ": 37,
"DET": 38,
"I-ADJ": 39,
"I-ADJ+ADJ": 40,
"I-ADJ+NOUN": 41,
"I-ADJ+SCONJ": 42,
"I-ADJ+VERB": 43,
"I-ADP": 44,
"I-ADP+DET": 45,
"I-ADV": 46,
"I-ADV+VERB": 47,
"I-AUX": 48,
"I-CCONJ": 49,
"I-DET": 50,
"I-INTJ": 51,
"I-NOUN": 52,
"I-NUM": 53,
"I-NUM+ADJ": 54,
"I-NUM+NOUN": 55,
"I-PART": 56,
"I-PART+PRON": 57,
"I-PRON": 58,
"I-PRON+PART": 59,
"I-PROPN": 60,
"I-PUNCT": 61,
"I-SCONJ": 62,
"I-SYM": 63,
"I-SYM+NUM": 64,
"I-VERB": 65,
"I-VERB+ADV": 66,
"I-VERB+PRON": 67,
"I-X": 68,
"I-X+X": 69,
"INTJ": 70,
"NOUN": 71,
"NUM": 72,
"NUM+NOUN": 73,
"NUM+PROPN": 74,
"NUM+SYM": 75,
"PART": 76,
"PRON": 77,
"PROPN": 78,
"PUNCT": 79,
"SCONJ": 80,
"SYM": 81,
"SYM+NOUN": 82,
"SYM+NUM": 83,
"SYM+PROPN": 84,
"VERB": 85,
"VERB+ADV": 86,
"VERB+PRON": 87,
"X": 88
},
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"output_past": true,
"pad_token_id": 0,
"pooler_fc_size": 768,
"pooler_num_attention_heads": 12,
"pooler_num_fc_layers": 3,
"pooler_size_per_head": 128,
"pooler_type": "first_token_transform",
"position_embedding_type": "absolute",
"task_specific_params": {
"upos_multiword": {
"ADJ+ADJ": {
"\u0432\u043e\u0435\u043d\u043d\u043e\u043c\u043e\u0440\u0441\u043a\u0430\u044f": [
"\u0432\u043e\u0435\u043d\u043d\u043e",
"\u043c\u043e\u0440\u0441\u043a\u0430\u044f"
],
"\u0432\u043e\u0435\u043d\u043d\u043e\u043c\u043e\u0440\u0441\u043a\u0443\u044e": [
"\u0432\u043e\u0435\u043d\u043d\u043e",
"\u043c\u043e\u0440\u0441\u043a\u0443\u044e"
],
"\u0433\u043e\u0440\u043d\u043e\u043c\u0435\u0442\u0430\u043b\u043b\u0443\u0440\u0433\u0438\u0447\u0435\u0441\u043a\u0438\u0439": [
"\u0433\u043e\u0440\u043d\u043e",
"\u043c\u0435\u0442\u0430\u043b\u043b\u0443\u0440\u0433\u0438\u0447\u0435\u0441\u043a\u0438\u0439"
],
"\u0434\u043e\u043b\u0433\u043e\u0441\u0440\u043e\u0447\u043d\u044b\u043c": [
"\u0434\u043e\u043b\u0433\u043e",
"\u0441\u0440\u043e\u0447\u043d\u044b\u043c"
],
"\u043a\u043e\u0441\u043c\u043e\u0437\u0435\u043c\u043d\u044b\u0445": [
"\u043a\u043e\u0441\u043c\u043e",
"\u0437\u0435\u043c\u043d\u044b\u0445"
],
"\u0441\u0443\u0434\u0435\u0431\u043d\u043e\u043c\u0435\u0434\u0438\u0446\u0438\u043d\u0441\u043a\u043e\u0439": [
"\u0441\u0443\u0434\u0435\u0431\u043d\u043e",
"\u043c\u0435\u0434\u0438\u0446\u0438\u043d\u0441\u043a\u043e\u0439"
]
},
"ADJ+NOUN": {
"\u0420\u043e\u0441\u0433\u043e\u0441\u0441\u0442\u0440\u0430\u0445\u0430": [
"\u0420\u043e\u0441",
"\u0433\u043e\u0441\u0441\u0442\u0440\u0430\u0445\u0430"
],
"\u0420\u043e\u0441\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u044b": [
"\u0420\u043e\u0441",
"\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u044b"
],
"\u0433\u043e\u0441\u0437\u0430\u043a\u0443\u043f\u043e\u043a": [
"\u0433\u043e\u0441",
"\u0437\u0430\u043a\u0443\u043f\u043e\u043a"
],
"\u0433\u043e\u0441\u043a\u043e\u0440\u043f\u043e\u0440\u0430\u0446\u0438\u044f\u043c": [
"\u0433\u043e\u0441",
"\u043a\u043e\u0440\u043f\u043e\u0440\u0430\u0446\u0438\u044f\u043c"
],
"\u0433\u043e\u0441\u043f\u043e\u0434\u0434\u0435\u0440\u0436\u043a\u0430": [
"\u0433\u043e\u0441",
"\u043f\u043e\u0434\u0434\u0435\u0440\u0436\u043a\u0430"
],
"\u0433\u043e\u0441\u0441\u043b\u0443\u0436\u0431\u044b": [
"\u0433\u043e\u0441",
"\u0441\u043b\u0443\u0436\u0431\u044b"
],
"\u043c\u0435\u0434\u0443\u0447\u0440\u0435\u0436\u0434\u0435\u043d\u0438\u0438": [
"\u043c\u0435\u0434",
"\u0443\u0447\u0440\u0435\u0436\u0434\u0435\u043d\u0438\u0438"
],
"\u0441\u0435\u043b\u044c\u0445\u043e\u0437\u043f\u0440\u043e\u0434\u0443\u043a\u0446\u0438\u0438": [
"\u0441\u0435\u043b\u044c\u0445\u043e\u0437",
"\u043f\u0440\u043e\u0434\u0443\u043a\u0446\u0438\u0438"
]
},
"ADJ+VERB": {
"\u041b\u0435\u0432\u043e\u043d\u0430\u0441\u0442\u0440\u043e\u0435\u043d\u043d\u044b\u0435": [
"\u041b\u0435\u0432\u043e",
"\u043d\u0430\u0441\u0442\u0440\u043e\u0435\u043d\u043d\u044b\u0435"
]
},
"ADP+DET": {
"\u0441\u0434\u0440\u0443\u0433\u0438\u043c\u0438": [
"\u0441",
"\u0434\u0440\u0443\u0433\u0438\u043c\u0438"
],
"\u0441\u0442\u0435\u0445": [
"\u0441",
"\u0442\u0435\u0445"
]
},
"ADP+PRON": {
"\u0441\u043d\u0438\u043c": [
"\u0441",
"\u043d\u0438\u043c"
]
},
"ADV+VERB": {
"\u043d\u0435\u0433\u0434\u0435\u0442\u0440\u0435\u043d\u0438\u0440\u043e\u0432\u0430\u0442\u044c": [
"\u043d\u0435\u0433\u0434\u0435",
"\u0442\u0440\u0435\u043d\u0438\u0440\u043e\u0432\u0430\u0442\u044c"
]
},
"NUM+NOUN": {
"\u041f\u043e\u043b\u043b\u0438\u0442\u0440\u0430": [
"\u041f\u043e\u043b",
"\u043b\u0438\u0442\u0440\u0430"
],
"\u043f\u043e\u043b\u0432\u0435\u043a\u0430": [
"\u043f\u043e\u043b",
"\u0432\u0435\u043a\u0430"
],
"\u043f\u043e\u043b\u0433\u043e\u0434\u0430": [
"\u043f\u043e\u043b",
"\u0433\u043e\u0434\u0430"
],
"\u043f\u043e\u043b\u0434\u043d\u044f": [
"\u043f\u043e\u043b",
"\u0434\u043d\u044f"
],
"\u043f\u043e\u043b\u0434\u044e\u0436\u0438\u043d\u044b": [
"\u043f\u043e\u043b",
"\u0434\u044e\u0436\u0438\u043d\u044b"
],
"\u043f\u043e\u043b\u043a\u0438\u043b\u043e": [
"\u043f\u043e\u043b",
"\u043a\u0438\u043b\u043e"
],
"\u043f\u043e\u043b\u043a\u0438\u043b\u043e\u0433\u0440\u0430\u043c\u043c\u0430": [
"\u043f\u043e\u043b",
"\u043a\u0438\u043b\u043e\u0433\u0440\u0430\u043c\u043c\u0430"
],
"\u043f\u043e\u043b\u043a\u043e\u0442\u043b\u0435\u0442\u044b": [
"\u043f\u043e\u043b",
"\u043a\u043e\u0442\u043b\u0435\u0442\u044b"
],
"\u043f\u043e\u043b\u043a\u0443\u0441\u043e\u0447\u043a\u0430": [
"\u043f\u043e\u043b",
"\u043a\u0443\u0441\u043e\u0447\u043a\u0430"
],
"\u043f\u043e\u043b\u043c\u0435\u0441\u044f\u0446\u0430": [
"\u043f\u043e\u043b",
"\u043c\u0435\u0441\u044f\u0446\u0430"
],
"\u043f\u043e\u043b\u043c\u0435\u0442\u0440\u0430": [
"\u043f\u043e\u043b",
"\u043c\u0435\u0442\u0440\u0430"
],
"\u043f\u043e\u043b\u043c\u0435\u0448\u043a\u0430": [
"\u043f\u043e\u043b",
"\u043c\u0435\u0448\u043a\u0430"
],
"\u043f\u043e\u043b\u043c\u0438\u043b\u043b\u0438\u0430\u0440\u0434\u0430": [
"\u043f\u043e\u043b",
"\u043c\u0438\u043b\u043b\u0438\u0430\u0440\u0434\u0430"
],
"\u043f\u043e\u043b\u043c\u0438\u043b\u043b\u0438\u043e\u043d\u0430": [
"\u043f\u043e\u043b",
"\u043c\u0438\u043b\u043b\u0438\u043e\u043d\u0430"
],
"\u043f\u043e\u043b\u043c\u0438\u043d\u0443\u0442\u044b": [
"\u043f\u043e\u043b",
"\u043c\u0438\u043d\u0443\u0442\u044b"
],
"\u043f\u043e\u043b\u043c\u044b\u0441\u043b\u0438": [
"\u043f\u043e\u043b",
"\u043c\u044b\u0441\u043b\u0438"
],
"\u043f\u043e\u043b\u043f\u0443\u0442\u0438": [
"\u043f\u043e\u043b",
"\u043f\u0443\u0442\u0438"
],
"\u043f\u043e\u043b\u0447\u0430\u0441\u0430": [
"\u043f\u043e\u043b",
"\u0447\u0430\u0441\u0430"
]
},
"NUM+SYM": {
"20\u00b0": [
"20",
"\u00b0"
],
"2\u00b0": [
"2",
"\u00b0"
],
"90\u00b0": [
"90",
"\u00b0"
]
},
"PART+PRON": {
"\u0434\u0430\u0442\u043e": [
"\u0434\u0430",
"\u0442\u043e"
]
},
"SYM+NUM": {
"\u21161": [
"\u2116",
"1"
],
"\u2116159": [
"\u2116",
"159"
],
"\u211619": [
"\u2116",
"19"
],
"\u21162": [
"\u2116",
"2"
],
"\u211636": [
"\u2116",
"36"
],
"\u21165": [
"\u2116",
"5"
]
},
"VERB+ADV": {
"\u041d\u0435\u043a\u043e\u0433\u0434\u0430": [
"\u041d\u0435",
"\u043a\u043e\u0433\u0434\u0430"
],
"\u043d\u0435\u0437\u0430\u0447\u0435\u043c": [
"\u043d\u0435",
"\u0437\u0430\u0447\u0435\u043c"
],
"\u043d\u0435\u043a\u043e\u0433\u0434\u0430": [
"\u043d\u0435",
"\u043a\u043e\u0433\u0434\u0430"
],
"\u043d\u0435\u043a\u0443\u0434\u0430": [
"\u043d\u0435",
"\u043a\u0443\u0434\u0430"
]
},
"VERB+PRON": {
"\u041d\u0435\u043a\u043e\u043c\u0443": [
"\u041d\u0435",
"\u043a\u043e\u043c\u0443"
],
"\u043d\u0435\u043a\u043e\u0433\u043e": [
"\u043d\u0435",
"\u043a\u043e\u0433\u043e"
],
"\u043d\u0435\u043a\u043e\u043c\u0443": [
"\u043d\u0435",
"\u043a\u043e\u043c\u0443"
],
"\u043d\u0435\u0447\u0435\u0433\u043e": [
"\u043d\u0435",
"\u0447\u0435\u0433\u043e"
],
"\u043d\u0435\u0447\u0435\u043c": [
"\u043d\u0435",
"\u0447\u0435\u043c"
],
"\u043d\u0435\u0447\u0435\u043c\u0443": [
"\u043d\u0435",
"\u0447\u0435\u043c\u0443"
]
},
"X+X": {
"xxx": [
"x",
"xx"
],
"xxxx": [
"x",
"xxx"
]
}
}
},
"tokenizer_class": "BertTokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.14.1",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 119547
}