harsh4248's picture
Training in progress epoch 0
6e6e582
{
"_name_or_path": "distilbert/distilbert-base-uncased",
"activation": "gelu",
"architectures": [
"DistilBertForSequenceClassification"
],
"attention_dropout": 0.1,
"dim": 768,
"dropout": 0.1,
"hidden_dim": 3072,
"id2label": {
"0": "Bloom-7B",
"1": "Claude-Instant-v1",
"2": "Claude-v1",
"3": "Cohere-Command",
"4": "Dolphin-2.5-Mixtral-8x7B",
"5": "Dolphin-Mixtral-8x7B",
"6": "Falcon-180B",
"7": "Flan-T5-Base",
"8": "Flan-T5-Large",
"9": "Flan-T5-Small",
"10": "Flan-T5-XL",
"11": "Flan-T5-XXL",
"12": "GLM-130B",
"13": "GPT-3.5",
"14": "GPT-4",
"15": "GPT-J",
"16": "GPT-NeoX",
"17": "Gemini-Pro",
"18": "Goliath-120B",
"19": "Human",
"20": "LLaMA-13B",
"21": "LLaMA-2-70B",
"22": "LLaMA-2-7B",
"23": "LLaMA-30B",
"24": "LLaMA-65B",
"25": "LLaMA-7B",
"26": "LZLV-70B",
"27": "Mistral-7B",
"28": "Mistral-7B-OpenOrca",
"29": "Mixtral-8x7B",
"30": "MythoMax-L2-13B",
"31": "Neural-Chat-7B",
"32": "Noromaid-20B",
"33": "Nous-Capybara-34B",
"34": "Nous-Capybara-7B",
"35": "Nous-Hermes-LLaMA-2-13B",
"36": "Nous-Hermes-LLaMA-2-70B",
"37": "OPT-1.3B",
"38": "OPT-125M",
"39": "OPT-13B",
"40": "OPT-2.7B",
"41": "OPT-30B",
"42": "OPT-350M",
"43": "OPT-6.7B",
"44": "OpenChat-3.5",
"45": "OpenHermes-2-Mistral-7B",
"46": "OpenHermes-2.5-Mistral-7B",
"47": "PaLM-2",
"48": "Psyfighter-13B",
"49": "Psyfighter-2-13B",
"50": "RWKV-5-World-3B",
"51": "StripedHyena-Nous-7B",
"52": "T0-11B",
"53": "T0-3B",
"54": "Text-Ada-001",
"55": "Text-Babbage-001",
"56": "Text-Curie-001",
"57": "Text-Davinci-001",
"58": "Text-Davinci-002",
"59": "Text-Davinci-003",
"60": "Toppy-M-7B",
"61": "Unknown",
"62": "YI-34B"
},
"initializer_range": 0.02,
"label2id": {
"Bloom-7B": 0,
"Claude-Instant-v1": 1,
"Claude-v1": 2,
"Cohere-Command": 3,
"Dolphin-2.5-Mixtral-8x7B": 4,
"Dolphin-Mixtral-8x7B": 5,
"Falcon-180B": 6,
"Flan-T5-Base": 7,
"Flan-T5-Large": 8,
"Flan-T5-Small": 9,
"Flan-T5-XL": 10,
"Flan-T5-XXL": 11,
"GLM-130B": 12,
"GPT-3.5": 13,
"GPT-4": 14,
"GPT-J": 15,
"GPT-NeoX": 16,
"Gemini-Pro": 17,
"Goliath-120B": 18,
"Human": 19,
"LLaMA-13B": 20,
"LLaMA-2-70B": 21,
"LLaMA-2-7B": 22,
"LLaMA-30B": 23,
"LLaMA-65B": 24,
"LLaMA-7B": 25,
"LZLV-70B": 26,
"Mistral-7B": 27,
"Mistral-7B-OpenOrca": 28,
"Mixtral-8x7B": 29,
"MythoMax-L2-13B": 30,
"Neural-Chat-7B": 31,
"Noromaid-20B": 32,
"Nous-Capybara-34B": 33,
"Nous-Capybara-7B": 34,
"Nous-Hermes-LLaMA-2-13B": 35,
"Nous-Hermes-LLaMA-2-70B": 36,
"OPT-1.3B": 37,
"OPT-125M": 38,
"OPT-13B": 39,
"OPT-2.7B": 40,
"OPT-30B": 41,
"OPT-350M": 42,
"OPT-6.7B": 43,
"OpenChat-3.5": 44,
"OpenHermes-2-Mistral-7B": 45,
"OpenHermes-2.5-Mistral-7B": 46,
"PaLM-2": 47,
"Psyfighter-13B": 48,
"Psyfighter-2-13B": 49,
"RWKV-5-World-3B": 50,
"StripedHyena-Nous-7B": 51,
"T0-11B": 52,
"T0-3B": 53,
"Text-Ada-001": 54,
"Text-Babbage-001": 55,
"Text-Curie-001": 56,
"Text-Davinci-001": 57,
"Text-Davinci-002": 58,
"Text-Davinci-003": 59,
"Toppy-M-7B": 60,
"Unknown": 61,
"YI-34B": 62
},
"max_position_embeddings": 512,
"model_type": "distilbert",
"n_heads": 12,
"n_layers": 6,
"pad_token_id": 0,
"qa_dropout": 0.1,
"seq_classif_dropout": 0.2,
"sinusoidal_pos_embds": false,
"tie_weights_": true,
"transformers_version": "4.40.1",
"vocab_size": 30522
}