Transformers
PyTorch
English
mctct
speech
Inference Endpoints
File size: 2,799 Bytes
697f669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
{
  "architectures": [
    "MCTCTForAudioFrameClassification",
    "MCTCTForSequenceClassification"
  ],
  "attention_head_dim": 384,
  "attention_probs_dropout_prob": 0.3,
  "bos_token_id": 0,
  "conv_channels": null,
  "conv_dropout": 0.3,
  "conv_glu_dim": 1,
  "conv_kernel": [
    7
  ],
  "conv_stride": [
    3
  ],
  "ctc_loss_reduction": "sum",
  "ctc_zero_infinity": false,
  "eos_token_id": 2,
  "hidden_act": "relu",
  "hidden_dropout_prob": 0.3,
  "hidden_size": 1536,
  "id2label": {
    "0": "ab",
    "1": "ar",
    "10": "dv",
    "11": "el",
    "12": "en",
    "13": "eo",
    "14": "es",
    "15": "et",
    "16": "eu",
    "17": "fa",
    "18": "fi",
    "19": "fr",
    "2": "as",
    "20": "fy-NL",
    "21": "ga-IE",
    "22": "hi",
    "23": "hsb",
    "24": "hu",
    "25": "ia",
    "26": "id",
    "27": "it",
    "28": "ja",
    "29": "ka",
    "3": "br",
    "30": "kab",
    "31": "ky",
    "32": "lg",
    "33": "lt",
    "34": "lv",
    "35": "mn",
    "36": "mt",
    "37": "nl",
    "38": "or",
    "39": "pa-IN",
    "4": "ca",
    "40": "pl",
    "41": "pt",
    "42": "rm-sursilv",
    "43": "rm-vallader",
    "44": "ro",
    "45": "ru",
    "46": "rw",
    "47": "sah",
    "48": "sl",
    "49": "sv-SE",
    "5": "cnh",
    "50": "ta",
    "51": "th",
    "52": "tr",
    "53": "tt",
    "54": "uk",
    "55": "vi",
    "56": "vot",
    "57": "zh-CN",
    "58": "zh-HK",
    "59": "zh-TW",
    "6": "cs",
    "7": "cv",
    "8": "cy",
    "9": "de"
  },
  "initializer_range": 0.02,
  "input_channels": 1,
  "input_feat_per_channel": 80,
  "intermediate_size": 6144,
  "label2id": {
    "ab": 0,
    "ar": 1,
    "as": 2,
    "br": 3,
    "ca": 4,
    "cnh": 5,
    "cs": 6,
    "cv": 7,
    "cy": 8,
    "de": 9,
    "dv": 10,
    "el": 11,
    "en": 12,
    "eo": 13,
    "es": 14,
    "et": 15,
    "eu": 16,
    "fa": 17,
    "fi": 18,
    "fr": 19,
    "fy-NL": 20,
    "ga-IE": 21,
    "hi": 22,
    "hsb": 23,
    "hu": 24,
    "ia": 25,
    "id": 26,
    "it": 27,
    "ja": 28,
    "ka": 29,
    "kab": 30,
    "ky": 31,
    "lg": 32,
    "lt": 33,
    "lv": 34,
    "mn": 35,
    "mt": 36,
    "nl": 37,
    "or": 38,
    "pa-IN": 39,
    "pl": 40,
    "pt": 41,
    "rm-sursilv": 42,
    "rm-vallader": 43,
    "ro": 44,
    "ru": 45,
    "rw": 46,
    "sah": 47,
    "sl": 48,
    "sv-SE": 49,
    "ta": 50,
    "th": 51,
    "tr": 52,
    "tt": 53,
    "uk": 54,
    "vi": 55,
    "vot": 56,
    "zh-CN": 57,
    "zh-HK": 58,
    "zh-TW": 59
  },
  "layer_norm_eps": 1e-05,
  "layerdrop": 0.3,
  "max_position_embeddings": 920,
  "model_type": "mctct",
  "num_attention_heads": 4,
  "num_conv_layers": 1,
  "num_hidden_layers": 36,
  "pad_token_id": 1,
  "torch_dtype": "float32",
  "transformers_version": "4.20.0.dev0",
  "vocab_size": 8065
}