mms-lid-126 / config.json
patrickvonplaten's picture
make style
53da6e3
{
"_name_or_path": "mms-lid-126",
"activation_dropout": 0.05,
"adapter_attn_dim": 16,
"adapter_kernel_size": 3,
"adapter_stride": 2,
"add_adapter": false,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2ForSequenceClassification"
],
"attention_dropout": 0.05,
"bos_token_id": 1,
"classifier_proj_size": 1024,
"codevector_dim": 1024,
"contrastive_logits_temperature": 0.1,
"conv_bias": true,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "mean",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": true,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "layer",
"feat_proj_dropout": 0.05,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.05,
"hidden_act": "gelu",
"hidden_dropout": 0.05,
"hidden_size": 1280,
"id2label": {
"0": "ara",
"1": "cmn",
"2": "eng",
"3": "spa",
"4": "fra",
"5": "mlg",
"6": "swe",
"7": "por",
"8": "vie",
"9": "ful",
"10": "sun",
"11": "asm",
"12": "ben",
"13": "zlm",
"14": "kor",
"15": "ind",
"16": "hin",
"17": "tuk",
"18": "urd",
"19": "aze",
"20": "slv",
"21": "mon",
"22": "hau",
"23": "tel",
"24": "swh",
"25": "bod",
"26": "rus",
"27": "tur",
"28": "heb",
"29": "mar",
"30": "som",
"31": "tgl",
"32": "tat",
"33": "tha",
"34": "cat",
"35": "ron",
"36": "mal",
"37": "bel",
"38": "pol",
"39": "yor",
"40": "nld",
"41": "bul",
"42": "hat",
"43": "afr",
"44": "isl",
"45": "amh",
"46": "tam",
"47": "hun",
"48": "hrv",
"49": "lit",
"50": "cym",
"51": "fas",
"52": "mkd",
"53": "ell",
"54": "bos",
"55": "deu",
"56": "sqi",
"57": "jav",
"58": "nob",
"59": "uzb",
"60": "snd",
"61": "lat",
"62": "nya",
"63": "grn",
"64": "mya",
"65": "orm",
"66": "lin",
"67": "hye",
"68": "yue",
"69": "pan",
"70": "jpn",
"71": "kaz",
"72": "npi",
"73": "kat",
"74": "guj",
"75": "kan",
"76": "tgk",
"77": "ukr",
"78": "ces",
"79": "lav",
"80": "bak",
"81": "khm",
"82": "fao",
"83": "glg",
"84": "ltz",
"85": "lao",
"86": "mlt",
"87": "sin",
"88": "sna",
"89": "ita",
"90": "srp",
"91": "mri",
"92": "nno",
"93": "pus",
"94": "eus",
"95": "ory",
"96": "lug",
"97": "bre",
"98": "luo",
"99": "slk",
"100": "fin",
"101": "dan",
"102": "yid",
"103": "est",
"104": "ceb",
"105": "war",
"106": "san",
"107": "kir",
"108": "oci",
"109": "wol",
"110": "haw",
"111": "kam",
"112": "umb",
"113": "xho",
"114": "epo",
"115": "zul",
"116": "ibo",
"117": "abk",
"118": "ckb",
"119": "nso",
"120": "gle",
"121": "kea",
"122": "ast",
"123": "sco",
"124": "glv",
"125": "ina"
},
"initializer_range": 0.02,
"intermediate_size": 5120,
"label2id": null,
"layer_norm_eps": 1e-05,
"layerdrop": 0.05,
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_prob": 0.05,
"model_type": "wav2vec2",
"num_adapter_layers": 3,
"num_attention_heads": 16,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 48,
"num_negatives": 100,
"output_hidden_size": 1280,
"pad_token_id": 0,
"proj_codevector_dim": 1024,
"tdnn_dilation": [
1,
2,
3,
1,
1
],
"tdnn_dim": [
512,
512,
512,
512,
1500
],
"tdnn_kernel": [
5,
3,
3,
1,
1
],
"torch_dtype": "float32",
"transformers_version": "4.31.0.dev0",
"use_weighted_layer_sum": false,
"vocab_size": 154,
"xvector_output_dim": 512
}