{ "_name_or_path": "distilbert-base-cased", "activation": "gelu", "architectures": [ "DistilBertForSequenceClassification" ], "attention_dropout": 0.1, "dim": 768, "dropout": 0.1, "hidden_dim": 3072, "id2label": { "0": "Bloom-7B", "1": "Claude-Instant-v1", "2": "Claude-v1", "3": "Cohere-Command", "4": "Dolphin-2.5-Mixtral-8x7B", "5": "Dolphin-Mixtral-8x7B", "6": "Falcon-180B", "7": "Flan-T5-Base", "8": "Flan-T5-Large", "9": "Flan-T5-Small", "10": "Flan-T5-XL", "11": "Flan-T5-XXL", "12": "GLM-130B", "13": "GPT-3.5", "14": "GPT-4", "15": "GPT-J", "16": "GPT-NeoX", "17": "Gemini-Pro", "18": "Goliath-120B", "19": "Human", "20": "LLaMA-13B", "21": "LLaMA-2-70B", "22": "LLaMA-2-7B", "23": "LLaMA-30B", "24": "LLaMA-65B", "25": "LLaMA-7B", "26": "LZLV-70B", "27": "Mistral-7B", "28": "Mistral-7B-OpenOrca", "29": "Mixtral-8x7B", "30": "MythoMax-L2-13B", "31": "Neural-Chat-7B", "32": "Noromaid-20B", "33": "Nous-Capybara-34B", "34": "Nous-Capybara-7B", "35": "Nous-Hermes-LLaMA-2-13B", "36": "Nous-Hermes-LLaMA-2-70B", "37": "OPT-1.3B", "38": "OPT-125M", "39": "OPT-13B", "40": "OPT-2.7B", "41": "OPT-30B", "42": "OPT-350M", "43": "OPT-6.7B", "44": "OpenChat-3.5", "45": "OpenHermes-2-Mistral-7B", "46": "OpenHermes-2.5-Mistral-7B", "47": "PaLM-2", "48": "Psyfighter-13B", "49": "Psyfighter-2-13B", "50": "RWKV-5-World-3B", "51": "StripedHyena-Nous-7B", "52": "T0-11B", "53": "T0-3B", "54": "Text-Ada-001", "55": "Text-Babbage-001", "56": "Text-Curie-001", "57": "Text-Davinci-001", "58": "Text-Davinci-002", "59": "Text-Davinci-003", "60": "Toppy-M-7B", "61": "Unknown", "62": "YI-34B" }, "initializer_range": 0.02, "label2id": { "Bloom-7B": 0, "Claude-Instant-v1": 1, "Claude-v1": 2, "Cohere-Command": 3, "Dolphin-2.5-Mixtral-8x7B": 4, "Dolphin-Mixtral-8x7B": 5, "Falcon-180B": 6, "Flan-T5-Base": 7, "Flan-T5-Large": 8, "Flan-T5-Small": 9, "Flan-T5-XL": 10, "Flan-T5-XXL": 11, "GLM-130B": 12, "GPT-3.5": 13, "GPT-4": 14, "GPT-J": 15, "GPT-NeoX": 16, "Gemini-Pro": 17, "Goliath-120B": 18, "Human": 19, "LLaMA-13B": 20, "LLaMA-2-70B": 21, "LLaMA-2-7B": 22, "LLaMA-30B": 23, "LLaMA-65B": 24, "LLaMA-7B": 25, "LZLV-70B": 26, "Mistral-7B": 27, "Mistral-7B-OpenOrca": 28, "Mixtral-8x7B": 29, "MythoMax-L2-13B": 30, "Neural-Chat-7B": 31, "Noromaid-20B": 32, "Nous-Capybara-34B": 33, "Nous-Capybara-7B": 34, "Nous-Hermes-LLaMA-2-13B": 35, "Nous-Hermes-LLaMA-2-70B": 36, "OPT-1.3B": 37, "OPT-125M": 38, "OPT-13B": 39, "OPT-2.7B": 40, "OPT-30B": 41, "OPT-350M": 42, "OPT-6.7B": 43, "OpenChat-3.5": 44, "OpenHermes-2-Mistral-7B": 45, "OpenHermes-2.5-Mistral-7B": 46, "PaLM-2": 47, "Psyfighter-13B": 48, "Psyfighter-2-13B": 49, "RWKV-5-World-3B": 50, "StripedHyena-Nous-7B": 51, "T0-11B": 52, "T0-3B": 53, "Text-Ada-001": 54, "Text-Babbage-001": 55, "Text-Curie-001": 56, "Text-Davinci-001": 57, "Text-Davinci-002": 58, "Text-Davinci-003": 59, "Toppy-M-7B": 60, "Unknown": 61, "YI-34B": 62 }, "max_position_embeddings": 512, "model_type": "distilbert", "n_heads": 12, "n_layers": 6, "output_past": true, "pad_token_id": 0, "qa_dropout": 0.1, "seq_classif_dropout": 0.2, "sinusoidal_pos_embds": false, "tie_weights_": true, "torch_dtype": "float32", "transformers_version": "4.36.2", "vocab_size": 28996 }