bert-token-classifier / config.json
permutans's picture
Upload folder using huggingface_hub
47ff542 verified
{
"architectures": [
"ModernBertForMaskedLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoModel": "modeling_havelock.HavelockTokenClassifier"
},
"bos_token_id": 50281,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 50281,
"decoder_bias": true,
"deterministic_flash_attn": false,
"dtype": "float32",
"embedding_dropout": 0.0,
"eos_token_id": 50282,
"global_attn_every_n_layers": 3,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "O-literate_abstract_noun",
"1": "B-literate_abstract_noun",
"10": "B-literate_agentless_passive",
"100": "B-oral_discourse_formula",
"101": "I-oral_discourse_formula",
"102": "O-oral_embodied_action",
"103": "B-oral_embodied_action",
"104": "I-oral_embodied_action",
"105": "O-oral_everyday_example",
"106": "B-oral_everyday_example",
"107": "I-oral_everyday_example",
"108": "O-oral_imperative",
"109": "B-oral_imperative",
"11": "I-literate_agentless_passive",
"110": "I-oral_imperative",
"111": "O-oral_inclusive_we",
"112": "B-oral_inclusive_we",
"113": "I-oral_inclusive_we",
"114": "O-oral_intensifier_doubling",
"115": "B-oral_intensifier_doubling",
"116": "I-oral_intensifier_doubling",
"117": "O-oral_lexical_repetition",
"118": "B-oral_lexical_repetition",
"119": "I-oral_lexical_repetition",
"12": "O-literate_aside",
"120": "O-oral_named_individual",
"121": "B-oral_named_individual",
"122": "I-oral_named_individual",
"123": "O-oral_parallelism",
"124": "B-oral_parallelism",
"125": "I-oral_parallelism",
"126": "O-oral_phatic_check",
"127": "B-oral_phatic_check",
"128": "I-oral_phatic_check",
"129": "O-oral_phatic_filler",
"13": "B-literate_aside",
"130": "B-oral_phatic_filler",
"131": "I-oral_phatic_filler",
"132": "O-oral_rhetorical_question",
"133": "B-oral_rhetorical_question",
"134": "I-oral_rhetorical_question",
"135": "O-oral_second_person",
"136": "B-oral_second_person",
"137": "I-oral_second_person",
"138": "O-oral_self_correction",
"139": "B-oral_self_correction",
"14": "I-literate_aside",
"140": "I-oral_self_correction",
"141": "O-oral_sensory_detail",
"142": "B-oral_sensory_detail",
"143": "I-oral_sensory_detail",
"144": "O-oral_simple_conjunction",
"145": "B-oral_simple_conjunction",
"146": "I-oral_simple_conjunction",
"147": "O-oral_specific_place",
"148": "B-oral_specific_place",
"149": "I-oral_specific_place",
"15": "O-literate_categorical_statement",
"150": "O-oral_temporal_anchor",
"151": "B-oral_temporal_anchor",
"152": "I-oral_temporal_anchor",
"153": "O-oral_tricolon",
"154": "B-oral_tricolon",
"155": "I-oral_tricolon",
"156": "O-oral_vocative",
"157": "B-oral_vocative",
"158": "I-oral_vocative",
"16": "B-literate_categorical_statement",
"17": "I-literate_categorical_statement",
"18": "O-literate_causal_explicit",
"19": "B-literate_causal_explicit",
"2": "I-literate_abstract_noun",
"20": "I-literate_causal_explicit",
"21": "O-literate_citation",
"22": "B-literate_citation",
"23": "I-literate_citation",
"24": "O-literate_conceptual_metaphor",
"25": "B-literate_conceptual_metaphor",
"26": "I-literate_conceptual_metaphor",
"27": "O-literate_concessive",
"28": "B-literate_concessive",
"29": "I-literate_concessive",
"3": "O-literate_additive_formal",
"30": "O-literate_concessive_connector",
"31": "B-literate_concessive_connector",
"32": "I-literate_concessive_connector",
"33": "O-literate_concrete_setting",
"34": "B-literate_concrete_setting",
"35": "I-literate_concrete_setting",
"36": "O-literate_conditional",
"37": "B-literate_conditional",
"38": "I-literate_conditional",
"39": "O-literate_contrastive",
"4": "B-literate_additive_formal",
"40": "B-literate_contrastive",
"41": "I-literate_contrastive",
"42": "O-literate_cross_reference",
"43": "B-literate_cross_reference",
"44": "I-literate_cross_reference",
"45": "O-literate_definitional_move",
"46": "B-literate_definitional_move",
"47": "I-literate_definitional_move",
"48": "O-literate_enumeration",
"49": "B-literate_enumeration",
"5": "I-literate_additive_formal",
"50": "I-literate_enumeration",
"51": "O-literate_epistemic_hedge",
"52": "B-literate_epistemic_hedge",
"53": "I-literate_epistemic_hedge",
"54": "O-literate_evidential",
"55": "B-literate_evidential",
"56": "I-literate_evidential",
"57": "O-literate_institutional_subject",
"58": "B-literate_institutional_subject",
"59": "I-literate_institutional_subject",
"6": "O-literate_agent_demoted",
"60": "O-literate_list_structure",
"61": "B-literate_list_structure",
"62": "I-literate_list_structure",
"63": "O-literate_metadiscourse",
"64": "B-literate_metadiscourse",
"65": "I-literate_metadiscourse",
"66": "O-literate_nested_clauses",
"67": "B-literate_nested_clauses",
"68": "I-literate_nested_clauses",
"69": "O-literate_nominalization",
"7": "B-literate_agent_demoted",
"70": "B-literate_nominalization",
"71": "I-literate_nominalization",
"72": "O-literate_objectifying_stance",
"73": "B-literate_objectifying_stance",
"74": "I-literate_objectifying_stance",
"75": "O-literate_probability",
"76": "B-literate_probability",
"77": "I-literate_probability",
"78": "O-literate_qualified_assertion",
"79": "B-literate_qualified_assertion",
"8": "I-literate_agent_demoted",
"80": "I-literate_qualified_assertion",
"81": "O-literate_relative_chain",
"82": "B-literate_relative_chain",
"83": "I-literate_relative_chain",
"84": "O-literate_technical_abbreviation",
"85": "B-literate_technical_abbreviation",
"86": "I-literate_technical_abbreviation",
"87": "O-literate_technical_term",
"88": "B-literate_technical_term",
"89": "I-literate_technical_term",
"9": "O-literate_agentless_passive",
"90": "O-literate_temporal_embedding",
"91": "B-literate_temporal_embedding",
"92": "I-literate_temporal_embedding",
"93": "O-oral_anaphora",
"94": "B-oral_anaphora",
"95": "I-oral_anaphora",
"96": "O-oral_antithesis",
"97": "B-oral_antithesis",
"98": "I-oral_antithesis",
"99": "O-oral_discourse_formula"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"B-literate_abstract_noun": 1,
"B-literate_additive_formal": 4,
"B-literate_agent_demoted": 7,
"B-literate_agentless_passive": 10,
"B-literate_aside": 13,
"B-literate_categorical_statement": 16,
"B-literate_causal_explicit": 19,
"B-literate_citation": 22,
"B-literate_conceptual_metaphor": 25,
"B-literate_concessive": 28,
"B-literate_concessive_connector": 31,
"B-literate_concrete_setting": 34,
"B-literate_conditional": 37,
"B-literate_contrastive": 40,
"B-literate_cross_reference": 43,
"B-literate_definitional_move": 46,
"B-literate_enumeration": 49,
"B-literate_epistemic_hedge": 52,
"B-literate_evidential": 55,
"B-literate_institutional_subject": 58,
"B-literate_list_structure": 61,
"B-literate_metadiscourse": 64,
"B-literate_nested_clauses": 67,
"B-literate_nominalization": 70,
"B-literate_objectifying_stance": 73,
"B-literate_probability": 76,
"B-literate_qualified_assertion": 79,
"B-literate_relative_chain": 82,
"B-literate_technical_abbreviation": 85,
"B-literate_technical_term": 88,
"B-literate_temporal_embedding": 91,
"B-oral_anaphora": 94,
"B-oral_antithesis": 97,
"B-oral_discourse_formula": 100,
"B-oral_embodied_action": 103,
"B-oral_everyday_example": 106,
"B-oral_imperative": 109,
"B-oral_inclusive_we": 112,
"B-oral_intensifier_doubling": 115,
"B-oral_lexical_repetition": 118,
"B-oral_named_individual": 121,
"B-oral_parallelism": 124,
"B-oral_phatic_check": 127,
"B-oral_phatic_filler": 130,
"B-oral_rhetorical_question": 133,
"B-oral_second_person": 136,
"B-oral_self_correction": 139,
"B-oral_sensory_detail": 142,
"B-oral_simple_conjunction": 145,
"B-oral_specific_place": 148,
"B-oral_temporal_anchor": 151,
"B-oral_tricolon": 154,
"B-oral_vocative": 157,
"I-literate_abstract_noun": 2,
"I-literate_additive_formal": 5,
"I-literate_agent_demoted": 8,
"I-literate_agentless_passive": 11,
"I-literate_aside": 14,
"I-literate_categorical_statement": 17,
"I-literate_causal_explicit": 20,
"I-literate_citation": 23,
"I-literate_conceptual_metaphor": 26,
"I-literate_concessive": 29,
"I-literate_concessive_connector": 32,
"I-literate_concrete_setting": 35,
"I-literate_conditional": 38,
"I-literate_contrastive": 41,
"I-literate_cross_reference": 44,
"I-literate_definitional_move": 47,
"I-literate_enumeration": 50,
"I-literate_epistemic_hedge": 53,
"I-literate_evidential": 56,
"I-literate_institutional_subject": 59,
"I-literate_list_structure": 62,
"I-literate_metadiscourse": 65,
"I-literate_nested_clauses": 68,
"I-literate_nominalization": 71,
"I-literate_objectifying_stance": 74,
"I-literate_probability": 77,
"I-literate_qualified_assertion": 80,
"I-literate_relative_chain": 83,
"I-literate_technical_abbreviation": 86,
"I-literate_technical_term": 89,
"I-literate_temporal_embedding": 92,
"I-oral_anaphora": 95,
"I-oral_antithesis": 98,
"I-oral_discourse_formula": 101,
"I-oral_embodied_action": 104,
"I-oral_everyday_example": 107,
"I-oral_imperative": 110,
"I-oral_inclusive_we": 113,
"I-oral_intensifier_doubling": 116,
"I-oral_lexical_repetition": 119,
"I-oral_named_individual": 122,
"I-oral_parallelism": 125,
"I-oral_phatic_check": 128,
"I-oral_phatic_filler": 131,
"I-oral_rhetorical_question": 134,
"I-oral_second_person": 137,
"I-oral_self_correction": 140,
"I-oral_sensory_detail": 143,
"I-oral_simple_conjunction": 146,
"I-oral_specific_place": 149,
"I-oral_temporal_anchor": 152,
"I-oral_tricolon": 155,
"I-oral_vocative": 158,
"O-literate_abstract_noun": 0,
"O-literate_additive_formal": 3,
"O-literate_agent_demoted": 6,
"O-literate_agentless_passive": 9,
"O-literate_aside": 12,
"O-literate_categorical_statement": 15,
"O-literate_causal_explicit": 18,
"O-literate_citation": 21,
"O-literate_conceptual_metaphor": 24,
"O-literate_concessive": 27,
"O-literate_concessive_connector": 30,
"O-literate_concrete_setting": 33,
"O-literate_conditional": 36,
"O-literate_contrastive": 39,
"O-literate_cross_reference": 42,
"O-literate_definitional_move": 45,
"O-literate_enumeration": 48,
"O-literate_epistemic_hedge": 51,
"O-literate_evidential": 54,
"O-literate_institutional_subject": 57,
"O-literate_list_structure": 60,
"O-literate_metadiscourse": 63,
"O-literate_nested_clauses": 66,
"O-literate_nominalization": 69,
"O-literate_objectifying_stance": 72,
"O-literate_probability": 75,
"O-literate_qualified_assertion": 78,
"O-literate_relative_chain": 81,
"O-literate_technical_abbreviation": 84,
"O-literate_technical_term": 87,
"O-literate_temporal_embedding": 90,
"O-oral_anaphora": 93,
"O-oral_antithesis": 96,
"O-oral_discourse_formula": 99,
"O-oral_embodied_action": 102,
"O-oral_everyday_example": 105,
"O-oral_imperative": 108,
"O-oral_inclusive_we": 111,
"O-oral_intensifier_doubling": 114,
"O-oral_lexical_repetition": 117,
"O-oral_named_individual": 120,
"O-oral_parallelism": 123,
"O-oral_phatic_check": 126,
"O-oral_phatic_filler": 129,
"O-oral_rhetorical_question": 132,
"O-oral_second_person": 135,
"O-oral_self_correction": 138,
"O-oral_sensory_detail": 141,
"O-oral_simple_conjunction": 144,
"O-oral_specific_place": 147,
"O-oral_temporal_anchor": 150,
"O-oral_tricolon": 153,
"O-oral_vocative": 156
},
"layer_norm_eps": 1e-05,
"layer_types": [
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention"
],
"local_attention": 128,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"num_types": 53,
"pad_token_id": 50283,
"position_embedding_type": "absolute",
"repad_logits_with_grad": false,
"rope_parameters": {
"full_attention": {
"rope_theta": 160000.0,
"rope_type": "default"
},
"sliding_attention": {
"rope_theta": 10000.0,
"rope_type": "default"
}
},
"sep_token_id": 50282,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"tie_word_embeddings": true,
"transformers_version": "5.0.0",
"use_crf": true,
"vocab_size": 50368
}