Swallow-MS-7b-upos / config.json
KoichiYasuoka's picture
bug fix
e829ec5
{
"architectures": [
"MistralForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoModelForTokenClassification": "upos.MistralForTokenClassification"
},
"bos_token_id": 1,
"custom_pipelines": {
"upos": {
"impl": "upos.BellmanFordTokenClassificationPipeline",
"pt": "AutoModelForTokenClassification"
}
},
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"id2label": {
"0": "ADJ",
"1": "B-ADJ",
"2": "I-ADJ",
"3": "ADJ|Polarity=Neg",
"4": "B-ADJ|Polarity=Neg",
"5": "I-ADJ|Polarity=Neg",
"6": "ADP",
"7": "B-ADP",
"8": "I-ADP",
"9": "ADV",
"10": "B-ADV",
"11": "I-ADV",
"12": "AUX",
"13": "B-AUX",
"14": "I-AUX",
"15": "AUX|Polarity=Neg",
"16": "B-AUX|Polarity=Neg",
"17": "I-AUX|Polarity=Neg",
"18": "CCONJ",
"19": "B-CCONJ",
"20": "I-CCONJ",
"21": "DET",
"22": "B-DET",
"23": "I-DET",
"24": "INTJ",
"25": "B-INTJ",
"26": "I-INTJ",
"27": "NOUN",
"28": "B-NOUN",
"29": "I-NOUN",
"30": "NOUN|Polarity=Neg",
"31": "B-NOUN|Polarity=Neg",
"32": "I-NOUN|Polarity=Neg",
"33": "NUM",
"34": "B-NUM",
"35": "I-NUM",
"36": "PART",
"37": "B-PART",
"38": "I-PART",
"39": "PRON",
"40": "B-PRON",
"41": "I-PRON",
"42": "PROPN",
"43": "B-PROPN",
"44": "I-PROPN",
"45": "PUNCT",
"46": "B-PUNCT",
"47": "I-PUNCT",
"48": "SCONJ",
"49": "B-SCONJ",
"50": "I-SCONJ",
"51": "SYM",
"52": "B-SYM",
"53": "I-SYM",
"54": "VERB",
"55": "B-VERB",
"56": "I-VERB",
"57": "X",
"58": "B-X",
"59": "I-X"
},
"initializer_range": 0.02,
"intermediate_size": 14336,
"label2id": {
"ADJ": 0,
"ADJ|Polarity=Neg": 3,
"ADP": 6,
"ADV": 9,
"AUX": 12,
"AUX|Polarity=Neg": 15,
"B-ADJ": 1,
"B-ADJ|Polarity=Neg": 4,
"B-ADP": 7,
"B-ADV": 10,
"B-AUX": 13,
"B-AUX|Polarity=Neg": 16,
"B-CCONJ": 19,
"B-DET": 22,
"B-INTJ": 25,
"B-NOUN": 28,
"B-NOUN|Polarity=Neg": 31,
"B-NUM": 34,
"B-PART": 37,
"B-PRON": 40,
"B-PROPN": 43,
"B-PUNCT": 46,
"B-SCONJ": 49,
"B-SYM": 52,
"B-VERB": 55,
"B-X": 58,
"CCONJ": 18,
"DET": 21,
"I-ADJ": 2,
"I-ADJ|Polarity=Neg": 5,
"I-ADP": 8,
"I-ADV": 11,
"I-AUX": 14,
"I-AUX|Polarity=Neg": 17,
"I-CCONJ": 20,
"I-DET": 23,
"I-INTJ": 26,
"I-NOUN": 29,
"I-NOUN|Polarity=Neg": 32,
"I-NUM": 35,
"I-PART": 38,
"I-PRON": 41,
"I-PROPN": 44,
"I-PUNCT": 47,
"I-SCONJ": 50,
"I-SYM": 53,
"I-VERB": 56,
"I-X": 59,
"INTJ": 24,
"NOUN": 27,
"NOUN|Polarity=Neg": 30,
"NUM": 33,
"PART": 36,
"PRON": 39,
"PROPN": 42,
"PUNCT": 45,
"SCONJ": 48,
"SYM": 51,
"VERB": 54,
"X": 57
},
"max_position_embeddings": 4096,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"tokenizer_class": "LlamaTokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.38.1",
"use_cache": true,
"vocab_size": 43317
}