{ "_name_or_path": "albert-base-v1", "architectures": [ "AlbertForMaskedLM" ], "attention_probs_dropout_prob": 0.1, "bos_token_id": 2, "classifier_dropout_prob": 0.1, "down_scale_factor": 1, "embedding_size": 128, "eos_token_id": 3, "gap_size": 0, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "B-LOC", "1": "B-LOCderiv", "2": "B-LOCpart", "3": "B-ORG", "4": "B-ORGderiv", "5": "B-ORGpart", "6": "B-OTH", "7": "B-OTHderiv", "8": "B-OTHpart", "9": "B-PER", "10": "B-PERderiv", "11": "B-PERpart", "12": "I-LOC", "13": "I-LOCderiv", "14": "I-LOCpart", "15": "I-ORG", "16": "I-ORGpart", "17": "I-OTH", "18": "I-OTHderiv", "19": "I-OTHpart", "20": "I-PER", "21": "I-PERderiv", "22": "I-PERpart", "23": "O" }, "initializer_range": 0.02, "inner_group_num": 1, "intermediate_size": 3072, "label2id": { "B-LOC": 0, "B-LOCderiv": 1, "B-LOCpart": 2, "B-ORG": 3, "B-ORGderiv": 4, "B-ORGpart": 5, "B-OTH": 6, "B-OTHderiv": 7, "B-OTHpart": 8, "B-PER": 9, "B-PERderiv": 10, "B-PERpart": 11, "I-LOC": 12, "I-LOCderiv": 13, "I-LOCpart": 14, "I-ORG": 15, "I-ORGpart": 16, "I-OTH": 17, "I-OTHderiv": 18, "I-OTHpart": 19, "I-PER": 20, "I-PERderiv": 21, "I-PERpart": 22, "O": 23 }, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "albert", "net_structure_type": 0, "num_attention_heads": 12, "num_hidden_groups": 1, "num_hidden_layers": 12, "num_memory_blocks": 0, "pad_token_id": 0, "position_embedding_type": "absolute", "transformers_version": "4.4.0.dev0", "type_vocab_size": 2, "vocab_size": 30000 }