KoichiYasuoka's picture
model improved for transformers 4.42
5db1855
raw
history blame contribute delete
No virus
2.73 kB
{
"architectures": [
"LlamaForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"custom_pipelines": {
"upos": {
"impl": "upos.BellmanFordTokenClassificationPipeline",
"pt": "AutoModelForTokenClassification"
}
},
"eos_token_id": 128001,
"hidden_act": "silu",
"hidden_size": 4096,
"id2label": {
"0": "ADJ",
"1": "B-ADJ",
"2": "I-ADJ",
"3": "ADP",
"4": "B-ADP",
"5": "I-ADP",
"6": "ADV",
"7": "B-ADV",
"8": "I-ADV",
"9": "AUX",
"10": "B-AUX",
"11": "I-AUX",
"12": "CCONJ",
"13": "B-CCONJ",
"14": "I-CCONJ",
"15": "DET",
"16": "B-DET",
"17": "I-DET",
"18": "INTJ",
"19": "B-INTJ",
"20": "I-INTJ",
"21": "NOUN",
"22": "B-NOUN",
"23": "I-NOUN",
"24": "NUM",
"25": "B-NUM",
"26": "I-NUM",
"27": "PART",
"28": "B-PART",
"29": "I-PART",
"30": "PRON",
"31": "B-PRON",
"32": "I-PRON",
"33": "PROPN",
"34": "B-PROPN",
"35": "I-PROPN",
"36": "PUNCT",
"37": "B-PUNCT",
"38": "I-PUNCT",
"39": "SCONJ",
"40": "B-SCONJ",
"41": "I-SCONJ",
"42": "SYM",
"43": "B-SYM",
"44": "I-SYM",
"45": "VERB",
"46": "B-VERB",
"47": "I-VERB",
"48": "X",
"49": "B-X",
"50": "I-X"
},
"initializer_range": 0.02,
"intermediate_size": 14336,
"label2id": {
"ADJ": 0,
"ADP": 3,
"ADV": 6,
"AUX": 9,
"B-ADJ": 1,
"B-ADP": 4,
"B-ADV": 7,
"B-AUX": 10,
"B-CCONJ": 13,
"B-DET": 16,
"B-INTJ": 19,
"B-NOUN": 22,
"B-NUM": 25,
"B-PART": 28,
"B-PRON": 31,
"B-PROPN": 34,
"B-PUNCT": 37,
"B-SCONJ": 40,
"B-SYM": 43,
"B-VERB": 46,
"B-X": 49,
"CCONJ": 12,
"DET": 15,
"I-ADJ": 2,
"I-ADP": 5,
"I-ADV": 8,
"I-AUX": 11,
"I-CCONJ": 14,
"I-DET": 17,
"I-INTJ": 20,
"I-NOUN": 23,
"I-NUM": 26,
"I-PART": 29,
"I-PRON": 32,
"I-PROPN": 35,
"I-PUNCT": 38,
"I-SCONJ": 41,
"I-SYM": 44,
"I-VERB": 47,
"I-X": 50,
"INTJ": 18,
"NOUN": 21,
"NUM": 24,
"PART": 27,
"PRON": 30,
"PROPN": 33,
"PUNCT": 36,
"SCONJ": 39,
"SYM": 42,
"VERB": 45,
"X": 48
},
"max_position_embeddings": 8192,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 500000.0,
"tie_word_embeddings": false,
"tokenizer_class": "LlamaTokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.42.4",
"use_cache": true,
"vocab_size": 128259
}