sanchit-gandhi's picture
Training in progress, step 10000
8b713fe
raw
history blame
4.81 kB
{
"_name_or_path": "MIT/ast-finetuned-audioset-10-10-0.4593",
"architectures": [
"ASTForAudioClassification"
],
"attention_probs_dropout_prob": 0.0,
"frequency_stride": 10,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "Afrikaans",
"1": "Amharic",
"2": "Arabic",
"3": "Assamese",
"4": "Asturian",
"5": "Azerbaijani",
"6": "Belarusian",
"7": "Bulgarian",
"8": "Bengali",
"9": "Bosnian",
"10": "Catalan",
"11": "Cebuano",
"12": "Sorani-Kurdish",
"13": "Mandarin Chinese",
"14": "Czech",
"15": "Welsh",
"16": "Danish",
"17": "German",
"18": "Greek",
"19": "English",
"20": "Spanish",
"21": "Estonian",
"22": "Persian",
"23": "Fula",
"24": "Finnish",
"25": "Filipino",
"26": "French",
"27": "Irish",
"28": "Galician",
"29": "Gujarati",
"30": "Hausa",
"31": "Hebrew",
"32": "Hindi",
"33": "Croatian",
"34": "Hungarian",
"35": "Armenian",
"36": "Indonesian",
"37": "Igbo",
"38": "Icelandic",
"39": "Italian",
"40": "Japanese",
"41": "Javanese",
"42": "Georgian",
"43": "Kamba",
"44": "Kabuverdianu",
"45": "Kazakh",
"46": "Khmer",
"47": "Kannada",
"48": "Korean",
"49": "Kyrgyz",
"50": "Luxembourgish",
"51": "Ganda",
"52": "Lingala",
"53": "Lao",
"54": "Lithuanian",
"55": "Luo",
"56": "Latvian",
"57": "Maori",
"58": "Macedonian",
"59": "Malayalam",
"60": "Mongolian",
"61": "Marathi",
"62": "Malay",
"63": "Maltese",
"64": "Burmese",
"65": "Norwegian",
"66": "Nepali",
"67": "Dutch",
"68": "Northern-Sotho",
"69": "Nyanja",
"70": "Occitan",
"71": "Oromo",
"72": "Oriya",
"73": "Punjabi",
"74": "Polish",
"75": "Pashto",
"76": "Portuguese",
"77": "Romanian",
"78": "Russian",
"79": "Sindhi",
"80": "Slovak",
"81": "Slovenian",
"82": "Shona",
"83": "Somali",
"84": "Serbian",
"85": "Swedish",
"86": "Swahili",
"87": "Tamil",
"88": "Telugu",
"89": "Tajik",
"90": "Thai",
"91": "Turkish",
"92": "Ukrainian",
"93": "Umbundu",
"94": "Urdu",
"95": "Uzbek",
"96": "Vietnamese",
"97": "Wolof",
"98": "Xhosa",
"99": "Yoruba",
"100": "Cantonese Chinese",
"101": "Zulu"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"Afrikaans": 0,
"Amharic": 1,
"Arabic": 2,
"Armenian": 35,
"Assamese": 3,
"Asturian": 4,
"Azerbaijani": 5,
"Belarusian": 6,
"Bengali": 8,
"Bosnian": 9,
"Bulgarian": 7,
"Burmese": 64,
"Cantonese Chinese": 100,
"Catalan": 10,
"Cebuano": 11,
"Croatian": 33,
"Czech": 14,
"Danish": 16,
"Dutch": 67,
"English": 19,
"Estonian": 21,
"Filipino": 25,
"Finnish": 24,
"French": 26,
"Fula": 23,
"Galician": 28,
"Ganda": 51,
"Georgian": 42,
"German": 17,
"Greek": 18,
"Gujarati": 29,
"Hausa": 30,
"Hebrew": 31,
"Hindi": 32,
"Hungarian": 34,
"Icelandic": 38,
"Igbo": 37,
"Indonesian": 36,
"Irish": 27,
"Italian": 39,
"Japanese": 40,
"Javanese": 41,
"Kabuverdianu": 44,
"Kamba": 43,
"Kannada": 47,
"Kazakh": 45,
"Khmer": 46,
"Korean": 48,
"Kyrgyz": 49,
"Lao": 53,
"Latvian": 56,
"Lingala": 52,
"Lithuanian": 54,
"Luo": 55,
"Luxembourgish": 50,
"Macedonian": 58,
"Malay": 62,
"Malayalam": 59,
"Maltese": 63,
"Mandarin Chinese": 13,
"Maori": 57,
"Marathi": 61,
"Mongolian": 60,
"Nepali": 66,
"Northern-Sotho": 68,
"Norwegian": 65,
"Nyanja": 69,
"Occitan": 70,
"Oriya": 72,
"Oromo": 71,
"Pashto": 75,
"Persian": 22,
"Polish": 74,
"Portuguese": 76,
"Punjabi": 73,
"Romanian": 77,
"Russian": 78,
"Serbian": 84,
"Shona": 82,
"Sindhi": 79,
"Slovak": 80,
"Slovenian": 81,
"Somali": 83,
"Sorani-Kurdish": 12,
"Spanish": 20,
"Swahili": 86,
"Swedish": 85,
"Tajik": 89,
"Tamil": 87,
"Telugu": 88,
"Thai": 90,
"Turkish": 91,
"Ukrainian": 92,
"Umbundu": 93,
"Urdu": 94,
"Uzbek": 95,
"Vietnamese": 96,
"Welsh": 15,
"Wolof": 97,
"Xhosa": 98,
"Yoruba": 99,
"Zulu": 101
},
"layer_norm_eps": 1e-12,
"max_length": 2048,
"model_type": "audio-spectrogram-transformer",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_mel_bins": 128,
"patch_size": 16,
"problem_type": "single_label_classification",
"qkv_bias": true,
"time_stride": 10,
"torch_dtype": "float16",
"transformers_version": "4.27.0.dev0"
}