patrickvonplaten commited on
Commit
0806a18
1 Parent(s): e875dec
Files changed (4) hide show
  1. app.py +67 -221
  2. languages.json +0 -982
  3. logo.png +0 -0
  4. requirements.txt +2 -0
app.py CHANGED
@@ -3,230 +3,76 @@ import json
3
  import pandas as pd
4
  from tqdm.auto import tqdm
5
  import streamlit as st
 
 
6
  from huggingface_hub import HfApi, hf_hub_download
7
  from huggingface_hub.repocard import metadata_load
 
8
  import datetime
 
9
 
 
 
10
 
 
 
11
 
12
- today = datetime.date.today()
13
- year, week, _ = today.isocalendar()
14
-
15
-
16
- def make_clickable(model_name):
17
- link = "https://huggingface.co/" + model_name
18
- return f'<a target="_blank" href="{link}">{model_name}</a>'
19
-
20
-
21
- def get_model_ids():
22
- api = HfApi()
23
- models = api.list_models(filter="esc-bench")
24
- model_ids = [x.modelId for x in models]
25
- return model_ids
26
-
27
-
28
- def get_metadata(model_id):
29
- try:
30
- readme_path = hf_hub_download(model_id, filename="README.md")
31
- return metadata_load(readme_path)
32
- except:
33
- # 404 README.md not found
34
- print(f"Model id: {model_id} is not great!")
35
- return None
36
-
37
-
38
-
39
- def parse_metric_value(value):
40
- if isinstance(value, str):
41
- "".join(value.split("%"))
42
- try:
43
- value = float(value)
44
- except: # noqa: E722
45
- value = None
46
- elif isinstance(value, float) and value < 1.1:
47
- # assuming that WER is given in 0.xx format
48
- value = 100 * value
49
- elif isinstance(value, list):
50
- if len(value) > 0:
51
- value = value[0]
52
- else:
53
- value = None
54
- value = round(value, 2) if value is not None else None
55
- return value
56
-
57
-
58
- def parse_metrics_rows(meta):
59
- if "model-index" not in meta or "language" not in meta:
60
- return None
61
- for result in meta["model-index"][0]["results"]:
62
- if "dataset" not in result or "metrics" not in result:
63
- continue
64
- dataset = result["dataset"]["type"]
65
- if "args" in result["dataset"] and "language" in result["dataset"]["args"]:
66
- lang = result["dataset"]["args"]["language"]
67
- else:
68
- lang = meta["language"]
69
- lang = lang[0] if isinstance(lang, list) else lang
70
- lang = aliases_lang[lang] if lang in aliases_lang else lang
71
- config = result["dataset"]["config"] if "config" in result["dataset"] else lang
72
- split = result["dataset"]["split"] if "split" in result["dataset"] else None
73
- row = {
74
- "dataset": dataset,
75
- "lang": lang,
76
- "config": config,
77
- "split": split
78
- }
79
- for metric in result["metrics"]:
80
- type = metric["type"].lower().strip()
81
- if type not in ["wer", "cer"]:
82
- continue
83
- value = parse_metric_value(metric["value"])
84
- if value is None:
85
- continue
86
- if type not in row or value < row[type]:
87
- # overwrite the metric if the new value is lower (e.g. with LM)
88
- row[type] = value
89
- if "wer" in row or "cer" in row:
90
- yield row
91
-
92
-
93
- @st.cache(ttl=600)
94
- def get_data():
95
- data = []
96
- model_ids = get_model_ids()
97
- for model_id in tqdm(model_ids):
98
- meta = get_metadata(model_id)
99
- if meta is None:
100
- continue
101
- for row in parse_metrics_rows(meta):
102
- if row is None:
103
- continue
104
- row["model_id"] = model_id
105
- data.append(row)
106
- return pd.DataFrame.from_records(data)
107
-
108
-
109
- def sort_datasets(datasets):
110
- # 1. sort by name
111
- datasets = sorted(datasets)
112
- # 2. bring the suggested datasets to the top and append the rest
113
- datasets = sorted(
114
- datasets,
115
- key=lambda dataset_id: suggested_datasets.index(dataset_id)
116
- if dataset_id in suggested_datasets
117
- else len(suggested_datasets),
118
- )
119
- return datasets
120
-
121
-
122
- @st.cache(ttl=600)
123
- def generate_dataset_info(datasets):
124
- msg = """
125
- The models have been trained and/or evaluated on the following datasets:
126
- """
127
- for dataset_id in datasets:
128
- if dataset_id in suggested_datasets:
129
- msg += f"* [{dataset_id}](https://hf.co/datasets/{dataset_id}) *(recommended)*\n"
130
- else:
131
- msg += f"* [{dataset_id}](https://hf.co/datasets/{dataset_id})\n"
132
-
133
- msg = "\n".join([line.strip() for line in msg.split("\n")])
134
- return msg
135
-
136
-
137
- dataframe = get_data()
138
- dataframe = dataframe.fillna("")
139
-
140
- st.sidebar.image("logo.png", width=200)
141
-
142
- st.markdown("# The 🤗 Speech Bench")
143
-
144
- st.markdown(
145
- f"This is a leaderboard of **{dataframe['model_id'].nunique()}** speech recognition models "
146
- f"and **{dataframe['dataset'].nunique()}** datasets.\n\n"
147
- "⬅ Please select the language you want to find a model for from the dropdown on the left."
148
- )
149
-
150
- lang = st.sidebar.selectbox(
151
- "Language",
152
- sorted(dataframe["lang"].unique(), key=lambda key: lang2name.get(key, key)),
153
- format_func=lambda key: lang2name.get(key, key),
154
- index=0,
155
- )
156
- lang_df = dataframe[dataframe.lang == lang]
157
-
158
- sorted_datasets = sort_datasets(lang_df["dataset"].unique())
159
-
160
- lang_name = lang2name[lang] if lang in lang2name else ""
161
- num_models = len(lang_df["model_id"].unique())
162
- num_datasets = len(lang_df["dataset"].unique())
163
- text = f"""
164
- For the `{lang}` ({lang_name}) language, there are currently `{num_models}` model(s)
165
- trained on `{num_datasets}` dataset(s) available for `automatic-speech-recognition`.
166
- """
167
- st.markdown(text)
168
-
169
- st.sidebar.markdown("""
170
- Choose the dataset that is most relevant to your task and select it from the dropdown below:
171
- """)
172
-
173
- dataset = st.sidebar.selectbox(
174
- "Dataset",
175
- sorted_datasets,
176
- index=0,
177
- )
178
- dataset_df = lang_df[lang_df.dataset == dataset]
179
-
180
- text = generate_dataset_info(sorted_datasets)
181
- st.sidebar.markdown(text)
182
-
183
- # sort by WER or CER depending on the language
184
- metric_col = "cer" if lang in cer_langs else "wer"
185
- if dataset_df["config"].nunique() > 1:
186
- # if there are more than one dataset config
187
- dataset_df = dataset_df[["model_id", "config", metric_col]]
188
- dataset_df = dataset_df.pivot_table(index=['model_id'], columns=["config"], values=[metric_col])
189
- dataset_df = dataset_df.reset_index(level=0)
190
- else:
191
- dataset_df = dataset_df[["model_id", metric_col]]
192
- dataset_df.sort_values(dataset_df.columns[-1], inplace=True)
193
- dataset_df = dataset_df.fillna("")
194
-
195
- dataset_df.rename(
196
- columns={
197
- "model_id": "Model",
198
- "wer": "WER (lower is better)",
199
- "cer": "CER (lower is better)",
200
- },
201
- inplace=True,
202
- )
203
-
204
- st.markdown(
205
- "Please click on the model's name to be redirected to its model card which includes documentation and examples on how to use it."
206
- )
207
-
208
- # display the model ranks
209
- dataset_df = dataset_df.reset_index(drop=True)
210
- dataset_df.index += 1
211
-
212
- # turn the model ids into clickable links
213
- dataset_df["Model"] = dataset_df["Model"].apply(make_clickable)
214
-
215
- table_html = dataset_df.to_html(escape=False)
216
- table_html = table_html.replace("<th>", '<th align="left">') # left-align the headers
217
- st.write(table_html, unsafe_allow_html=True)
218
-
219
- if lang in cer_langs:
220
- st.markdown(
221
- "---\n\* **CER** is [Char Error Rate](https://huggingface.co/metrics/cer)"
222
- )
223
- else:
224
- st.markdown(
225
- "---\n\* **WER** is [Word Error Rate](https://huggingface.co/metrics/wer)"
226
- )
227
-
228
- st.markdown(
229
- "Want to beat the Leaderboard? Don't see your speech recognition model show up here? "
230
- "Simply add the `hf-asr-leaderboard` tag to your model card alongside your evaluation metrics. "
231
- "Try our [Metrics Editor](https://huggingface.co/spaces/huggingface/speech-bench-metrics-editor) to get started!"
232
- )
 
3
  import pandas as pd
4
  from tqdm.auto import tqdm
5
  import streamlit as st
6
+ from pandas import read_csv
7
+ import os
8
  from huggingface_hub import HfApi, hf_hub_download
9
  from huggingface_hub.repocard import metadata_load
10
+ import jiwer
11
  import datetime
12
+ from huggingface_hub import Repository
13
 
14
+ REFERENCE_NAME = "references"
15
+ SUBMISSION_NAME = "submissions"
16
 
17
+ REFERENCE_URL = os.path.join("https://huggingface.co/datasets/esc-bench", REFERENCE_NAME)
18
+ SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/esc-bench", SUBMISSION_NAME)
19
 
20
+ TEST_SETS = ["librispeech-clean", "librispeech-other", "common-voice-9", "vox-populi", "ted-lium", "giga-speech", "spgi-speech", "earnings-22", "ami"]
21
+ EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
22
+ OPTIONAL_TEST_SETS = ["switch-board", "call-home", "chime-4"]
23
+
24
+ CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
25
+
26
+
27
+ HF_TOKEN = os.environ.get("HF_TOKEN")
28
+
29
+
30
+ def compute_wer(pred_file, ref_file):
31
+ with open(pred_file, "r", encoding="utf-8") as pred, open(ref_file, "r", encoding="utf-8") as ref:
32
+ pred_lines = [line.strip() for line in pred.readlines()]
33
+ ref_lines = [line.strip() for line in ref.readlines()]
34
+
35
+ wer = jiwer.wer(ref_lines, pred_lines)
36
+ return wer
37
+
38
+
39
+ reference_repo = Repository(local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN)
40
+ submission_repo = Repository(local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
41
+
42
+ all_submissions = [folder for folder in os.listdir(SUBMISSION_NAME) if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"]
43
+
44
+ all_results = read_csv(CSV_RESULTS_FILE)
45
+ evaluated_submissions = all_results["name"].values.tolist()
46
+
47
+ non_evaluated_submissions = set(all_submissions) - set(evaluated_submissions)
48
+ if len(non_evaluated_submissions) > 0:
49
+ for submission in non_evaluated_submissions:
50
+ print(f"Evaluate {submission}")
51
+ results = {"name": submission}
52
+ submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
53
+
54
+ submitted_files = [f for f in submitted_files if f in EXPECTED_TEST_FILES]
55
+
56
+ if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
57
+ raise ValueError(f"{', '.join(submitted_files)} were submitted, but expected {', '.join(EXPECTED_TEST_FILES)}")
58
+
59
+ for file in submitted_files:
60
+ ref_file = os.path.join(REFERENCE_NAME, file)
61
+ pred_file = os.path.join(SUBMISSION_NAME, submission, file)
62
+
63
+ wer = compute_wer(pred_file, ref_file)
64
+ results[file.split(".")[0]] = str(wer)
65
+
66
+ wer_values = [float(results[t]) for t in TEST_SETS]
67
+ all_wer = sum(wer_values) / len(wer_values)
68
+
69
+ results["esc-score"] = all_wer
70
+ all_results = all_results.append(results, ignore_index=True)
71
+
72
+ # save and upload new evaluated results
73
+ all_results.to_csv(CSV_RESULTS_FILE)
74
+
75
+ commit_url = reference_repo.push_to_hub()
76
+ print(commit_url)
77
+
78
+ st.table(all_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
languages.json DELETED
@@ -1,982 +0,0 @@
1
- {
2
- "aa": "Afar",
3
- "ab": "Abkhazian",
4
- "ace": "Achinese",
5
- "ady": "Adyghe",
6
- "af": "Afrikaans",
7
- "af-NA": "Afrikaans (Namibia)",
8
- "af-ZA": "Afrikaans (South Africa)",
9
- "agq": "Aghem",
10
- "agq-CM": "Aghem (Cameroon)",
11
- "ak": "Akan",
12
- "ak-GH": "Akan (Ghana)",
13
- "als": "Tosk Albanian",
14
- "am": "Amharic",
15
- "am-ET": "Amharic (Ethiopia)",
16
- "an": "Aragonese",
17
- "ang": "English, Old (ca.450–1100)",
18
- "ar": "Arabic",
19
- "ar-001": "Arabic (World)",
20
- "ar-AE": "Arabic (United Arab Emirates)",
21
- "ar-BH": "Arabic (Bahrain)",
22
- "ar-DJ": "Arabic (Djibouti)",
23
- "ar-DZ": "Arabic (Algeria)",
24
- "ar-EG": "Arabic (Egypt)",
25
- "ar-EH": "Arabic (Western Sahara)",
26
- "ar-ER": "Arabic (Eritrea)",
27
- "ar-IL": "Arabic (Israel)",
28
- "ar-IQ": "Arabic (Iraq)",
29
- "ar-JO": "Arabic (Jordan)",
30
- "ar-KM": "Arabic (Comoros)",
31
- "ar-KW": "Arabic (Kuwait)",
32
- "ar-LB": "Arabic (Lebanon)",
33
- "ar-LY": "Arabic (Libya)",
34
- "ar-MA": "Arabic (Morocco)",
35
- "ar-MR": "Arabic (Mauritania)",
36
- "ar-OM": "Arabic (Oman)",
37
- "ar-PS": "Arabic (Palestinian Territories)",
38
- "ar-QA": "Arabic (Qatar)",
39
- "ar-SA": "Arabic (Saudi Arabia)",
40
- "ar-SD": "Arabic (Sudan)",
41
- "ar-SO": "Arabic (Somalia)",
42
- "ar-SS": "Arabic (South Sudan)",
43
- "ar-SY": "Arabic (Syria)",
44
- "ar-TD": "Arabic (Chad)",
45
- "ar-TN": "Arabic (Tunisia)",
46
- "ar-YE": "Arabic (Yemen)",
47
- "arc": "Official Aramaic (700–300 BCE); Imperial Aramaic (700–300 BCE)",
48
- "arz": "Egyptian Arabic",
49
- "as": "Assamese",
50
- "as-IN": "Assamese (India)",
51
- "asa": "Asu",
52
- "asa-TZ": "Asu (Tanzania)",
53
- "ast": "Asturian",
54
- "ast-ES": "Asturian (Spain)",
55
- "atj": "Atikamekw",
56
- "av": "Avaric",
57
- "ay": "Aymara",
58
- "az": "Azerbaijani",
59
- "az-Cyrl": "Azerbaijani (Cyrillic)",
60
- "az-Cyrl-AZ": "Azerbaijani (Cyrillic, Azerbaijan)",
61
- "az-Latn": "Azerbaijani (Latin)",
62
- "az-Latn-AZ": "Azerbaijani (Latin, Azerbaijan)",
63
- "azb": "South Azerbaijani",
64
- "ba": "Bashkir",
65
- "bar": "Bavarian",
66
- "bas": "Basaa",
67
- "bas-CM": "Basaa (Cameroon)",
68
- "bcl": "Central Bikol",
69
- "be": "Belarusian",
70
- "be-BY": "Belarusian (Belarus)",
71
- "be-tarask": "Belarusan (Taraškievica)",
72
- "bem": "Bemba",
73
- "bem-ZM": "Bemba (Zambia)",
74
- "bez": "Bena",
75
- "bez-TZ": "Bena (Tanzania)",
76
- "bg": "Bulgarian",
77
- "bg-BG": "Bulgarian (Bulgaria)",
78
- "bg-Latn": "Bulgarian (Latin)",
79
- "bh": "Bihari languages",
80
- "bi": "Bislama",
81
- "bjn": "Banjar",
82
- "bm": "Bambara",
83
- "bm-ML": "Bambara (Mali)",
84
- "bn": "Bangla",
85
- "bn-BD": "Bangla (Bangladesh)",
86
- "bn-IN": "Bangla (India)",
87
- "bn-Latn": "Bangla (Latin script)",
88
- "bo": "Tibetan",
89
- "bo-CN": "Tibetan (China)",
90
- "bo-IN": "Tibetan (India)",
91
- "bpy": "Bishnupriya",
92
- "br": "Breton",
93
- "br-FR": "Breton (France)",
94
- "brx": "Bodo",
95
- "brx-IN": "Bodo (India)",
96
- "bs": "Bosnian",
97
- "bs-Cyrl": "Bosnian (Cyrillic)",
98
- "bs-Cyrl-BA": "Bosnian (Cyrillic, Bosnia & Herzegovina)",
99
- "bs-Latn": "Bosnian",
100
- "bs-Latn-BA": "Bosnian (Bosnia & Herzegovina)",
101
- "bug": "Buginese",
102
- "bxr": "Russia Buriat",
103
- "bzd": "Bribri",
104
- "ca": "Catalan",
105
- "ca-AD": "Catalan (Andorra)",
106
- "ca-ES": "Catalan (Spain)",
107
- "ca-ES-valencia": "Catalan (Spain)",
108
- "ca-FR": "Catalan (France)",
109
- "ca-IT": "Catalan (Italy)",
110
- "cbk": "Chavacano",
111
- "ccp": "Chakma",
112
- "ccp-BD": "Chakma (Bangladesh)",
113
- "ccp-IN": "Chakma (India)",
114
- "cdo": "Min Dong Chinese",
115
- "ce": "Chechen",
116
- "ce-RU": "Chechen (Russia)",
117
- "ceb": "Cebuano",
118
- "ceb-PH": "Cebuano (Philippines)",
119
- "cgg": "Chiga",
120
- "cgg-UG": "Chiga (Uganda)",
121
- "ch": "Chamorro",
122
- "cho": "Choctaw",
123
- "chr": "Cherokee",
124
- "chr-US": "Cherokee (United States)",
125
- "chy": "Cheyenne",
126
- "ckb": "Central Kurdish",
127
- "ckb-IQ": "Central Kurdish (Iraq)",
128
- "ckb-IR": "Central Kurdish (Iran)",
129
- "cni": "Ashaninka",
130
- "cnh": "Hakha Chin",
131
- "co": "Corsican",
132
- "code": "Programming language (C++, Java, Javascript, Python, etc.)",
133
- "cr": "Cree",
134
- "crh": "Crimean Tatar; Crimean Turkish",
135
- "cs": "Czech",
136
- "cs-CZ": "Czech (Czechia)",
137
- "csb": "Kashubian",
138
- "cu": "Church Slavic",
139
- "cu-RU": "Church Slavic (Russia)",
140
- "cv": "Chuvash",
141
- "cy": "Welsh",
142
- "cy-GB": "Welsh (United Kingdom)",
143
- "da": "Danish",
144
- "da-DK": "Danish (Denmark)",
145
- "da-GL": "Danish (Greenland)",
146
- "dav": "Taita",
147
- "dav-KE": "Taita (Kenya)",
148
- "de": "German",
149
- "de-AT": "German (Austria)",
150
- "de-BE": "German (Belgium)",
151
- "de-CH": "German (Switzerland)",
152
- "de-DE": "German (Germany)",
153
- "de-IT": "German (Italy)",
154
- "de-LI": "German (Liechtenstein)",
155
- "de-LU": "German (Luxembourg)",
156
- "din": "Dinka",
157
- "diq": "Dimli (individual language)",
158
- "dje": "Zarma",
159
- "dje-NE": "Zarma (Niger)",
160
- "dsb": "Lower Sorbian",
161
- "dsb-DE": "Lower Sorbian (Germany)",
162
- "dty": "Doteli",
163
- "dua": "Duala",
164
- "dua-CM": "Duala (Cameroon)",
165
- "dv": "Dhivehi",
166
- "dyo": "Jola-Fonyi",
167
- "dyo-SN": "Jola-Fonyi (Senegal)",
168
- "dz": "Dzongkha",
169
- "dz-BT": "Dzongkha (Bhutan)",
170
- "ebu": "Embu",
171
- "ebu-KE": "Embu (Kenya)",
172
- "ee": "Ewe",
173
- "ee-GH": "Ewe (Ghana)",
174
- "ee-TG": "Ewe (Togo)",
175
- "egy": "Egyptian (Ancient)",
176
- "el": "Greek",
177
- "el-CY": "Greek (Cyprus)",
178
- "el-GR": "Greek (Greece)",
179
- "el-Latn": "Greek (Latin)",
180
- "eml": "Emiliano-Romagnolo",
181
- "en": "English",
182
- "en-001": "English (World)",
183
- "en-150": "English (Europe)",
184
- "en-AE": "English (United Arab Emirates)",
185
- "en-AG": "English (Antigua & Barbuda)",
186
- "en-AI": "English (Anguilla)",
187
- "en-AS": "English (American Samoa)",
188
- "en-AT": "English (Austria)",
189
- "en-AU": "English (Australia)",
190
- "en-BB": "English (Barbados)",
191
- "en-BE": "English (Belgium)",
192
- "en-BI": "English (Burundi)",
193
- "en-BM": "English (Bermuda)",
194
- "en-BS": "English (Bahamas)",
195
- "en-BW": "English (Botswana)",
196
- "en-BZ": "English (Belize)",
197
- "en-CA": "English (Canada)",
198
- "en-CC": "English (Cocos (Keeling) Islands)",
199
- "en-CH": "English (Switzerland)",
200
- "en-CK": "English (Cook Islands)",
201
- "en-CM": "English (Cameroon)",
202
- "en-CX": "English (Christmas Island)",
203
- "en-CY": "English (Cyprus)",
204
- "en-DE": "English (Germany)",
205
- "en-DG": "English (Diego Garcia)",
206
- "en-DK": "English (Denmark)",
207
- "en-DM": "English (Dominica)",
208
- "en-ER": "English (Eritrea)",
209
- "en-FI": "English (Finland)",
210
- "en-FJ": "English (Fiji)",
211
- "en-FK": "English (Falkland Islands)",
212
- "en-FM": "English (Micronesia)",
213
- "en-GB": "English (United Kingdom)",
214
- "en-GD": "English (Grenada)",
215
- "en-GG": "English (Guernsey)",
216
- "en-GH": "English (Ghana)",
217
- "en-GI": "English (Gibraltar)",
218
- "en-GM": "English (Gambia)",
219
- "en-GU": "English (Guam)",
220
- "en-GY": "English (Guyana)",
221
- "en-HK": "English (Hong Kong SAR China)",
222
- "en-IE": "English (Ireland)",
223
- "en-IL": "English (Israel)",
224
- "en-IM": "English (Isle of Man)",
225
- "en-IN": "English (India)",
226
- "en-IO": "English (British Indian Ocean Territory)",
227
- "en-JE": "English (Jersey)",
228
- "en-JM": "English (Jamaica)",
229
- "en-KE": "English (Kenya)",
230
- "en-KI": "English (Kiribati)",
231
- "en-KN": "English (St. Kitts & Nevis)",
232
- "en-KY": "English (Cayman Islands)",
233
- "en-LC": "English (St. Lucia)",
234
- "en-LR": "English (Liberia)",
235
- "en-LS": "English (Lesotho)",
236
- "en-MG": "English (Madagascar)",
237
- "en-MH": "English (Marshall Islands)",
238
- "en-MO": "English (Macao SAR China)",
239
- "en-MP": "English (Northern Mariana Islands)",
240
- "en-MS": "English (Montserrat)",
241
- "en-MT": "English (Malta)",
242
- "en-MU": "English (Mauritius)",
243
- "en-MW": "English (Malawi)",
244
- "en-MY": "English (Malaysia)",
245
- "en-NA": "English (Namibia)",
246
- "en-NF": "English (Norfolk Island)",
247
- "en-NG": "English (Nigeria)",
248
- "en-NL": "English (Netherlands)",
249
- "en-NR": "English (Nauru)",
250
- "en-NU": "English (Niue)",
251
- "en-NZ": "English (New Zealand)",
252
- "en-PG": "English (Papua New Guinea)",
253
- "en-PH": "English (Philippines)",
254
- "en-PK": "English (Pakistan)",
255
- "en-PN": "English (Pitcairn Islands)",
256
- "en-PR": "English (Puerto Rico)",
257
- "en-PW": "English (Palau)",
258
- "en-RW": "English (Rwanda)",
259
- "en-SB": "English (Solomon Islands)",
260
- "en-SC": "English (Seychelles)",
261
- "en-SD": "English (Sudan)",
262
- "en-SE": "English (Sweden)",
263
- "en-SG": "English (Singapore)",
264
- "en-SH": "English (St. Helena)",
265
- "en-SI": "English (Slovenia)",
266
- "en-SL": "English (Sierra Leone)",
267
- "en-SS": "English (South Sudan)",
268
- "en-SX": "English (Sint Maarten)",
269
- "en-SZ": "English (Eswatini)",
270
- "en-TC": "English (Turks & Caicos Islands)",
271
- "en-TK": "English (Tokelau)",
272
- "en-TO": "English (Tonga)",
273
- "en-TT": "English (Trinidad & Tobago)",
274
- "en-TV": "English (Tuvalu)",
275
- "en-TZ": "English (Tanzania)",
276
- "en-UG": "English (Uganda)",
277
- "en-UM": "English (U.S. Outlying Islands)",
278
- "en-US": "English (United States)",
279
- "en-US-posix": "English (United States)",
280
- "en-VC": "English (St. Vincent & Grenadines)",
281
- "en-VG": "English (British Virgin Islands)",
282
- "en-VI": "English (U.S. Virgin Islands)",
283
- "en-VU": "English (Vanuatu)",
284
- "en-WS": "English (Samoa)",
285
- "en-ZA": "English (South Africa)",
286
- "en-ZM": "English (Zambia)",
287
- "en-ZW": "English (Zimbabwe)",
288
- "en-basiceng": "Basic English",
289
- "eo": "Esperanto",
290
- "eo-001": "Esperanto (World)",
291
- "es": "Spanish",
292
- "es-419": "Spanish (Latin America)",
293
- "es-AR": "Spanish (Argentina)",
294
- "es-BO": "Spanish (Bolivia)",
295
- "es-BR": "Spanish (Brazil)",
296
- "es-BZ": "Spanish (Belize)",
297
- "es-CL": "Spanish (Chile)",
298
- "es-CO": "Spanish (Colombia)",
299
- "es-CR": "Spanish (Costa Rica)",
300
- "es-CU": "Spanish (Cuba)",
301
- "es-DO": "Spanish (Dominican Republic)",
302
- "es-EA": "Spanish (Ceuta & Melilla)",
303
- "es-EC": "Spanish (Ecuador)",
304
- "es-ES": "Spanish (Spain)",
305
- "es-GQ": "Spanish (Equatorial Guinea)",
306
- "es-GT": "Spanish (Guatemala)",
307
- "es-HN": "Spanish (Honduras)",
308
- "es-IC": "Spanish (Canary Islands)",
309
- "es-MX": "Spanish (Mexico)",
310
- "es-NI": "Spanish (Nicaragua)",
311
- "es-PA": "Spanish (Panama)",
312
- "es-PE": "Spanish (Peru)",
313
- "es-PH": "Spanish (Philippines)",
314
- "es-PR": "Spanish (Puerto Rico)",
315
- "es-PY": "Spanish (Paraguay)",
316
- "es-SV": "Spanish (El Salvador)",
317
- "es-US": "Spanish (United States)",
318
- "es-UY": "Spanish (Uruguay)",
319
- "es-VE": "Spanish (Venezuela)",
320
- "et": "Estonian",
321
- "et-EE": "Estonian (Estonia)",
322
- "eu": "Basque",
323
- "eu-ES": "Basque (Spain)",
324
- "ewo": "Ewondo",
325
- "ewo-CM": "Ewondo (Cameroon)",
326
- "ext": "Extremaduran",
327
- "fa": "Persian",
328
- "fa-AF": "Persian (Afghanistan)",
329
- "fa-IR": "Persian (Iran)",
330
- "ff": "Fulah",
331
- "ff-Adlm": "Fulah (Adlam)",
332
- "ff-Adlm-BF": "Fulah (Adlam, Burkina Faso)",
333
- "ff-Adlm-CM": "Fulah (Adlam, Cameroon)",
334
- "ff-Adlm-GH": "Fulah (Adlam, Ghana)",
335
- "ff-Adlm-GM": "Fulah (Adlam, Gambia)",
336
- "ff-Adlm-GN": "Fulah (Adlam, Guinea)",
337
- "ff-Adlm-GW": "Fulah (Adlam, Guinea-Bissau)",
338
- "ff-Adlm-LR": "Fulah (Adlam, Liberia)",
339
- "ff-Adlm-MR": "Fulah (Adlam, Mauritania)",
340
- "ff-Adlm-NE": "Fulah (Adlam, Niger)",
341
- "ff-Adlm-NG": "Fulah (Adlam, Nigeria)",
342
- "ff-Adlm-SL": "Fulah (Adlam, Sierra Leone)",
343
- "ff-Adlm-SN": "Fulah (Adlam, Senegal)",
344
- "ff-Latn": "Fulah (Latin)",
345
- "ff-Latn-BF": "Fulah (Latin, Burkina Faso)",
346
- "ff-Latn-CM": "Fulah (Latin, Cameroon)",
347
- "ff-Latn-GH": "Fulah (Latin, Ghana)",
348
- "ff-Latn-GM": "Fulah (Latin, Gambia)",
349
- "ff-Latn-GN": "Fulah (Latin, Guinea)",
350
- "ff-Latn-GW": "Fulah (Latin, Guinea-Bissau)",
351
- "ff-Latn-LR": "Fulah (Latin, Liberia)",
352
- "ff-Latn-MR": "Fulah (Latin, Mauritania)",
353
- "ff-Latn-NE": "Fulah (Latin, Niger)",
354
- "ff-Latn-NG": "Fulah (Latin, Nigeria)",
355
- "ff-Latn-SL": "Fulah (Latin, Sierra Leone)",
356
- "ff-Latn-SN": "Fulah (Latin, Senegal)",
357
- "fi": "Finnish",
358
- "fi-FI": "Finnish (Finland)",
359
- "fil": "Filipino",
360
- "fil-PH": "Filipino (Philippines)",
361
- "fj": "Fijian",
362
- "fo": "Faroese",
363
- "fo-DK": "Faroese (Denmark)",
364
- "fo-FO": "Faroese (Faroe Islands)",
365
- "fon": "Fon",
366
- "fr": "French",
367
- "fr-BE": "French (Belgium)",
368
- "fr-BF": "French (Burkina Faso)",
369
- "fr-BI": "French (Burundi)",
370
- "fr-BJ": "French (Benin)",
371
- "fr-BL": "French (St. Barthélemy)",
372
- "fr-CA": "French (Canada)",
373
- "fr-CD": "French (Congo - Kinshasa)",
374
- "fr-CF": "French (Central African Republic)",
375
- "fr-CG": "French (Congo - Brazzaville)",
376
- "fr-CH": "French (Switzerland)",
377
- "fr-CI": "French (Côte d’Ivoire)",
378
- "fr-CM": "French (Cameroon)",
379
- "fr-DJ": "French (Djibouti)",
380
- "fr-DZ": "French (Algeria)",
381
- "fr-FR": "French (France)",
382
- "fr-GA": "French (Gabon)",
383
- "fr-GF": "French (French Guiana)",
384
- "fr-GN": "French (Guinea)",
385
- "fr-GP": "French (Guadeloupe)",
386
- "fr-GQ": "French (Equatorial Guinea)",
387
- "fr-HT": "French (Haiti)",
388
- "fr-KM": "French (Comoros)",
389
- "fr-LU": "French (Luxembourg)",
390
- "fr-MA": "French (Morocco)",
391
- "fr-MC": "French (Monaco)",
392
- "fr-MF": "French (St. Martin)",
393
- "fr-MG": "French (Madagascar)",
394
- "fr-ML": "French (Mali)",
395
- "fr-MQ": "French (Martinique)",
396
- "fr-MR": "French (Mauritania)",
397
- "fr-MU": "French (Mauritius)",
398
- "fr-NC": "French (New Caledonia)",
399
- "fr-NE": "French (Niger)",
400
- "fr-PF": "French (French Polynesia)",
401
- "fr-PM": "French (St. Pierre & Miquelon)",
402
- "fr-RE": "French (Réunion)",
403
- "fr-RW": "French (Rwanda)",
404
- "fr-SC": "French (Seychelles)",
405
- "fr-SN": "French (Senegal)",
406
- "fr-SY": "French (Syria)",
407
- "fr-TD": "French (Chad)",
408
- "fr-TG": "French (Togo)",
409
- "fr-TN": "French (Tunisia)",
410
- "fr-VU": "French (Vanuatu)",
411
- "fr-WF": "French (Wallis & Futuna)",
412
- "fr-YT": "French (Mayotte)",
413
- "frp": "Franco-Provençal",
414
- "frr": "Northern Frisian",
415
- "fur": "Friulian",
416
- "fur-IT": "Friulian (Italy)",
417
- "fy": "Western Frisian",
418
- "fy-NL": "Western Frisian (Netherlands)",
419
- "ga": "Irish",
420
- "ga-GB": "Irish (United Kingdom)",
421
- "ga-IE": "Irish (Ireland)",
422
- "gag": "Gagauz",
423
- "gan": "Gan Chinese",
424
- "gd": "Scottish Gaelic",
425
- "gd-GB": "Scottish Gaelic (United Kingdom)",
426
- "gl": "Galician",
427
- "gl-ES": "Galician (Spain)",
428
- "glk": "Gilaki",
429
- "gn": "Guarani",
430
- "gom": "Goan Konkani",
431
- "gor": "Gorontalo",
432
- "got": "Gothic",
433
- "gsw": "Swiss German",
434
- "gsw-CH": "Swiss German (Switzerland)",
435
- "gsw-FR": "Swiss German (France)",
436
- "gsw-LI": "Swiss German (Liechtenstein)",
437
- "gu": "Gujarati",
438
- "gu-IN": "Gujarati (India)",
439
- "guz": "Gusii",
440
- "guz-KE": "Gusii (Kenya)",
441
- "gv": "Manx",
442
- "gv-IM": "Manx (Isle of Man)",
443
- "ha": "Hausa",
444
- "ha-GH": "Hausa (Ghana)",
445
- "ha-NE": "Hausa (Niger)",
446
- "ha-NG": "Hausa (Nigeria)",
447
- "hak": "Hakka Chinese",
448
- "haw": "Hawaiian",
449
- "haw-US": "Hawaiian (United States)",
450
- "hch": "Wixarika",
451
- "he": "Hebrew",
452
- "he-IL": "Hebrew (Israel)",
453
- "hi": "Hindi",
454
- "hi-IN": "Hindi (India)",
455
- "hi-Latn": "Hindi (Latin script)",
456
- "hif": "Fiji Hindi",
457
- "hmn": "Hmong, Mong",
458
- "ho": "Hiri Motu",
459
- "hr": "Croatian",
460
- "hr-BA": "Croatian (Bosnia & Herzegovina)",
461
- "hr-HR": "Croatian (Croatia)",
462
- "hsb": "Upper Sorbian",
463
- "hsb-DE": "Upper Sorbian (Germany)",
464
- "ht": "Haitian",
465
- "hu": "Hungarian",
466
- "hu-HU": "Hungarian (Hungary)",
467
- "hy": "Armenian",
468
- "hy-AM": "Armenian (Armenia)",
469
- "ia": "Interlingua",
470
- "ia-001": "Interlingua (World)",
471
- "id": "Indonesian",
472
- "id-ID": "Indonesian (Indonesia)",
473
- "ie": "Interlingue",
474
- "ig": "Igbo",
475
- "ig-NG": "Igbo (Nigeria)",
476
- "ii": "Sichuan Yi",
477
- "ii-CN": "Sichuan Yi (China)",
478
- "ik": "Iñupiaq",
479
- "ilo": "Iloko",
480
- "inh": "Ingush",
481
- "io": "Ido",
482
- "is": "Icelandic",
483
- "is-IS": "Icelandic (Iceland)",
484
- "it": "Italian",
485
- "it-CH": "Italian (Switzerland)",
486
- "it-IT": "Italian (Italy)",
487
- "it-SM": "Italian (San Marino)",
488
- "it-VA": "Italian (Vatican City)",
489
- "iu": "Inuktitut",
490
- "iw": "former Hebrew",
491
- "ja": "Japanese",
492
- "ja-JP": "Japanese (Japan)",
493
- "ja-Latn": "Japanese (Latin)",
494
- "jam": "Jamaican Patois",
495
- "jbo": "Lojban",
496
- "jgo": "Ngomba",
497
- "jgo-CM": "Ngomba (Cameroon)",
498
- "jmc": "Machame",
499
- "jmc-TZ": "Machame (Tanzania)",
500
- "jv": "Javanese",
501
- "jv-ID": "Javanese (Indonesia)",
502
- "jv-x-bms": "Banyumasan",
503
- "ka": "Georgian",
504
- "ka-GE": "Georgian (Georgia)",
505
- "kaa": "Karakalpak",
506
- "kab": "Kabyle",
507
- "kab-DZ": "Kabyle (Algeria)",
508
- "kac": "Kachin; Jingpho",
509
- "kam": "Kamba",
510
- "kam-KE": "Kamba (Kenya)",
511
- "kbd": "Kabardian",
512
- "kbp": "Kabiye",
513
- "kde": "Makonde",
514
- "kde-TZ": "Makonde (Tanzania)",
515
- "kea": "Kabuverdianu",
516
- "kea-CV": "Kabuverdianu (Cape Verde)",
517
- "kg": "Kongo",
518
- "khq": "Koyra Chiini",
519
- "khq-ML": "Koyra Chiini (Mali)",
520
- "ki": "Kikuyu",
521
- "ki-KE": "Kikuyu (Kenya)",
522
- "kj": "Kwanyama",
523
- "kk": "Kazakh",
524
- "kk-KZ": "Kazakh (Kazakhstan)",
525
- "kkj": "Kako",
526
- "kkj-CM": "Kako (Cameroon)",
527
- "kl": "Kalaallisut",
528
- "kl-GL": "Kalaallisut (Greenland)",
529
- "kln": "Kalenjin",
530
- "kln-KE": "Kalenjin (Kenya)",
531
- "km": "Khmer",
532
- "km-KH": "Khmer (Cambodia)",
533
- "kmr": "Northern Kurdish",
534
- "kn": "Kannada",
535
- "kn-IN": "Kannada (India)",
536
- "ko": "Korean",
537
- "ko-KP": "Korean (North Korea)",
538
- "ko-KR": "Korean (South Korea)",
539
- "koi": "Komi-Permyak",
540
- "kok": "Konkani",
541
- "kok-IN": "Konkani (India)",
542
- "krc": "Karachay-Balkar",
543
- "ks": "Kashmiri",
544
- "ks-Arab": "Kashmiri (Arabic)",
545
- "ks-IN": "Kashmiri (India)",
546
- "ksb": "Shambala",
547
- "ksb-TZ": "Shambala (Tanzania)",
548
- "ksf": "Bafia",
549
- "ksf-CM": "Bafia (Cameroon)",
550
- "ksh": "Colognian",
551
- "ksh-DE": "Colognian (Germany)",
552
- "ku": "Kurdish",
553
- "ku-TR": "Kurdish (Turkey)",
554
- "kv": "Komi",
555
- "kw": "Cornish",
556
- "kw-GB": "Cornish (United Kingdom)",
557
- "ky": "Kyrgyz",
558
- "ky-KG": "Kyrgyz (Kyrgyzstan)",
559
- "la": "Latin",
560
- "lad": "Ladino",
561
- "lag": "Langi",
562
- "lag-TZ": "Langi (Tanzania)",
563
- "lb": "Luxembourgish",
564
- "lb-LU": "Luxembourgish (Luxembourg)",
565
- "lbe": "Lak",
566
- "lez": "Lezghian",
567
- "lfn": "Lingua Franca Nova",
568
- "lg": "Luganda",
569
- "lg-UG": "Ganda (Uganda)",
570
- "li": "Limburgan",
571
- "lij": "Ligurian",
572
- "lkt": "Lakota",
573
- "lkt-US": "Lakota (United States)",
574
- "lmo": "Lombard",
575
- "ln": "Lingala",
576
- "ln-AO": "Lingala (Angola)",
577
- "ln-CD": "Lingala (Congo - Kinshasa)",
578
- "ln-CF": "Lingala (Central African Republic)",
579
- "ln-CG": "Lingala (Congo - Brazzaville)",
580
- "lo": "Lao",
581
- "lo-LA": "Lao (Laos)",
582
- "lrc": "Northern Luri",
583
- "lrc-IQ": "Northern Luri (Iraq)",
584
- "lrc-IR": "Northern Luri (Iran)",
585
- "lt": "Lithuanian",
586
- "lt-LT": "Lithuanian (Lithuania)",
587
- "ltg": "Latgalian",
588
- "lu": "Luba-Katanga",
589
- "lu-CD": "Luba-Katanga (Congo - Kinshasa)",
590
- "luo": "Luo (Kenya and Tanzania)",
591
- "luo-KE": "Luo (Kenya and Tanzania) (Kenya)",
592
- "luy": "Luyia",
593
- "luy-KE": "Luyia (Kenya)",
594
- "lv": "Latvian",
595
- "lv-LV": "Latvian (Latvia)",
596
- "lzh": "Literary Chinese",
597
- "mai": "Maithili",
598
- "mai-IN": "Maithili (India)",
599
- "mas": "Masai",
600
- "mas-KE": "Masai (Kenya)",
601
- "mas-TZ": "Masai (Tanzania)",
602
- "mdf": "Moksha",
603
- "mer": "Meru",
604
- "mer-KE": "Meru (Kenya)",
605
- "mfe": "Morisyen",
606
- "mfe-MU": "Morisyen (Mauritius)",
607
- "mg": "Malagasy",
608
- "mg-MG": "Malagasy (Madagascar)",
609
- "mgh": "Makhuwa-Meetto",
610
- "mgh-MZ": "Makhuwa-Meetto (Mozambique)",
611
- "mgo": "Metaʼ",
612
- "mgo-CM": "Metaʼ (Cameroon)",
613
- "mh": "Marshallese",
614
- "mhr": "Eastern Mari",
615
- "mi": "Maori",
616
- "mi-NZ": "Maori (New Zealand)",
617
- "min": "Minangkabau",
618
- "mk": "Macedonian",
619
- "mk-MK": "Macedonian (North Macedonia)",
620
- "ml": "Malayalam",
621
- "ml-IN": "Malayalam (India)",
622
- "mn": "Mongolian",
623
- "mn-MN": "Mongolian (Mongolia)",
624
- "mni": "Manipuri",
625
- "mni-Beng": "Manipuri (Bangla)",
626
- "mni-Beng-IN": "Manipuri (Bangla, India)",
627
- "mr": "Marathi",
628
- "mr-IN": "Marathi (India)",
629
- "mrj": "Western Mari",
630
- "ms": "Malay",
631
- "ms-BN": "Malay (Brunei)",
632
- "ms-ID": "Malay (Indonesia)",
633
- "ms-MY": "Malay (Malaysia)",
634
- "ms-SG": "Malay (Singapore)",
635
- "mt": "Maltese",
636
- "mt-MT": "Maltese (Malta)",
637
- "mua": "Mundang",
638
- "mua-CM": "Mundang (Cameroon)",
639
- "mus": "Muscogee",
640
- "mwl": "Mirandese",
641
- "my": "Burmese",
642
- "my-MM": "Burmese (Myanmar (Burma))",
643
- "my-x-zawgyi": "Myanmar (Zawgyi)",
644
- "myv": "Erzya",
645
- "mzn": "Mazanderani",
646
- "mzn-IR": "Mazanderani (Iran)",
647
- "na": "Nauruan",
648
- "nah": "Nahuatl languages",
649
- "nan": "Min Nan Chinese",
650
- "nap": "Neapolitan",
651
- "naq": "Nama",
652
- "naq-NA": "Nama (Namibia)",
653
- "nb": "Norwegian Bokmål",
654
- "nb-NO": "Norwegian Bokmål (Norway)",
655
- "nb-SJ": "Norwegian Bokmål (Svalbard & Jan Mayen)",
656
- "nd": "North Ndebele",
657
- "nd-ZW": "North Ndebele (Zimbabwe)",
658
- "nds": "Low German",
659
- "nds-DE": "Low German (Germany)",
660
- "nds-NL": "Low German (Netherlands)",
661
- "nds-nl": "Dutch Low Saxon",
662
- "ne": "Nepali",
663
- "ne-IN": "Nepali (India)",
664
- "ne-NP": "Nepali (Nepal)",
665
- "new": "Newari",
666
- "ng": "Ndonga",
667
- "nl": "Dutch",
668
- "nl-AW": "Dutch (Aruba)",
669
- "nl-BE": "Dutch (Belgium)",
670
- "nl-BQ": "Dutch (Caribbean Netherlands)",
671
- "nl-CW": "Dutch (Curaçao)",
672
- "nl-NL": "Dutch (Netherlands)",
673
- "nl-SR": "Dutch (Suriname)",
674
- "nl-SX": "Dutch (Sint Maarten)",
675
- "nmg": "Kwasio",
676
- "nmg-CM": "Kwasio (Cameroon)",
677
- "nn": "Norwegian Nynorsk",
678
- "nn-NO": "Norwegian Nynorsk (Norway)",
679
- "nnh": "Ngiemboon",
680
- "nnh-CM": "Ngiemboon (Cameroon)",
681
- "no": "Norwegian",
682
- "nov": "Novial",
683
- "nrf": "Norman",
684
- "ns": "Northern Sotho",
685
- "nso": "Pedi; Sepedi; Northern Sotho",
686
- "nus": "Nuer",
687
- "nus-SS": "Nuer (South Sudan)",
688
- "nv": "Navajo",
689
- "ny": "Nyanja",
690
- "nyn": "Nyankole",
691
- "nyn-UG": "Nyankole (Uganda)",
692
- "oc": "Occitan (post 1500)",
693
- "olo": "Livvi-Karelian",
694
- "om": "Oromo",
695
- "om-ET": "Oromo (Ethiopia)",
696
- "om-KE": "Oromo (Kenya)",
697
- "or": "Odia",
698
- "or-IN": "Odia (India)",
699
- "os": "Ossetic",
700
- "os-GE": "Ossetic (Georgia)",
701
- "os-RU": "Ossetic (Russia)",
702
- "oto": "Otomi",
703
- "pa": "Punjabi",
704
- "pa-Arab": "Punjabi (Arabic)",
705
- "pa-Arab-PK": "Punjabi (Arabic, Pakistan)",
706
- "pa-Guru": "Punjabi",
707
- "pa-Guru-IN": "Punjabi (India)",
708
- "pa-IN": "Punjabi (India)",
709
- "pag": "Pangasinan",
710
- "pam": "Pampanga",
711
- "pap": "Papiamento",
712
- "pcd": "Picard",
713
- "pcm": "Nigerian Pidgin",
714
- "pcm-NG": "Nigerian Pidgin (Nigeria)",
715
- "pdc": "Pennsylvania German",
716
- "pfl": "Palatine German",
717
- "pi": "Pali",
718
- "pih": "Norfolk",
719
- "pl": "Polish",
720
- "pl-PL": "Polish (Poland)",
721
- "pms": "Piemontese",
722
- "pnb": "Western Panjabi",
723
- "pnt": "Pontic Greek",
724
- "prg": "Prussian",
725
- "prg-001": "Prussian (World)",
726
- "ps": "Pashto",
727
- "ps-AF": "Pashto (Afghanistan)",
728
- "ps-PK": "Pashto (Pakistan)",
729
- "pt": "Portuguese",
730
- "pt-AO": "Portuguese (Angola)",
731
- "pt-BR": "Portuguese (Brazil)",
732
- "pt-CH": "Portuguese (Switzerland)",
733
- "pt-CV": "Portuguese (Cape Verde)",
734
- "pt-GQ": "Portuguese (Equatorial Guinea)",
735
- "pt-GW": "Portuguese (Guinea-Bissau)",
736
- "pt-LU": "Portuguese (Luxembourg)",
737
- "pt-MO": "Portuguese (Macao SAR China)",
738
- "pt-MZ": "Portuguese (Mozambique)",
739
- "pt-PT": "Portuguese (Portugal)",
740
- "pt-ST": "Portuguese (São Tomé & Príncipe)",
741
- "pt-TL": "Portuguese (Timor-Leste)",
742
- "qu": "Quechua",
743
- "qu-BO": "Quechua (Bolivia)",
744
- "qu-EC": "Quechua (Ecuador)",
745
- "qu-PE": "Quechua (Peru)",
746
- "rm": "Romansh",
747
- "rm-CH": "Romansh (Switzerland)",
748
- "rm-sursilv": "Romansh (Sursilvan idiom, Switzerland)",
749
- "rm-vallader": "Romansh (Vallader idiom, Swizterland)",
750
- "rmy": "Vlax Romani",
751
- "rn": "Rundi",
752
- "rn-BI": "Rundi (Burundi)",
753
- "ro": "Romanian",
754
- "ro-MD": "Romanian (Moldova)",
755
- "ro-RO": "Romanian (Romania)",
756
- "rof": "Rombo",
757
- "rof-TZ": "Rombo (Tanzania)",
758
- "ru": "Russian",
759
- "ru-BY": "Russian (Belarus)",
760
- "ru-KG": "Russian (Kyrgyzstan)",
761
- "ru-KZ": "Russian (Kazakhstan)",
762
- "ru-Latn": "Russian (Latin)",
763
- "ru-MD": "Russian (Moldova)",
764
- "ru-RU": "Russian (Russia)",
765
- "ru-UA": "Russian (Ukraine)",
766
- "rue": "Rusyn",
767
- "rup": "Aromanian",
768
- "rw": "Kinyarwanda",
769
- "rw-RW": "Kinyarwanda (Rwanda)",
770
- "rwk": "Rwa",
771
- "rwk-TZ": "Rwa (Tanzania)",
772
- "sa": "Sanskrit",
773
- "sah": "Sakha",
774
- "sah-RU": "Sakha (Russia)",
775
- "saq": "Samburu",
776
- "saq-KE": "Samburu (Kenya)",
777
- "sat": "Santali",
778
- "sat-Olck": "Santali (Ol Chiki)",
779
- "sat-Olck-IN": "Santali (Ol Chiki, India)",
780
- "sbp": "Sangu",
781
- "sbp-TZ": "Sangu (Tanzania)",
782
- "sc": "Sardinian",
783
- "scn": "Sicilian",
784
- "sco": "Scots",
785
- "sd": "Sindhi",
786
- "sd-Arab": "Sindhi (Arabic)",
787
- "sd-Arab-PK": "Sindhi (Arabic, Pakistan)",
788
- "sd-Deva": "Sindhi (Devanagari)",
789
- "sd-Deva-IN": "Sindhi (Devanagari, India)",
790
- "se": "Northern Sami",
791
- "se-FI": "Northern Sami (Finland)",
792
- "se-NO": "Northern Sami (Norway)",
793
- "se-SE": "Northern Sami (Sweden)",
794
- "seh": "Sena",
795
- "seh-MZ": "Sena (Mozambique)",
796
- "ses": "Koyraboro Senni",
797
- "ses-ML": "Koyraboro Senni (Mali)",
798
- "sg": "Sango",
799
- "sg-CF": "Sango (Central African Republic)",
800
- "sgs": "Samogitian",
801
- "sh": "Serbo-Croatian",
802
- "shi": "Tachelhit",
803
- "shi-Latn": "Tachelhit (Latin)",
804
- "shi-Latn-MA": "Tachelhit (Latin, Morocco)",
805
- "shi-Tfng": "Tachelhit (Tifinagh)",
806
- "shi-Tfng-MA": "Tachelhit (Tifinagh, Morocco)",
807
- "shn": "Shan",
808
- "shp": "Shipibo-Konibo",
809
- "si": "Sinhala",
810
- "si-LK": "Sinhala (Sri Lanka)",
811
- "simple": "en",
812
- "sk": "Slovak",
813
- "sk-SK": "Slovak (Slovakia)",
814
- "sl": "Slovenian",
815
- "sl-SI": "Slovenian (Slovenia)",
816
- "sm": "San Marino",
817
- "smn": "Inari Sami",
818
- "smn-FI": "Inari Sami (Finland)",
819
- "sn": "Shona",
820
- "sn-ZW": "Shona (Zimbabwe)",
821
- "so": "Somali",
822
- "so-DJ": "Somali (Djibouti)",
823
- "so-ET": "Somali (Ethiopia)",
824
- "so-KE": "Somali (Kenya)",
825
- "so-SO": "Somali (Somalia)",
826
- "sq": "Albanian",
827
- "sq-AL": "Albanian (Albania)",
828
- "sq-MK": "Albanian (North Macedonia)",
829
- "sq-XK": "Albanian (Kosovo)",
830
- "sr": "Serbian",
831
- "sr-Cyrl": "Serbian (Cyrillic)",
832
- "sr-Cyrl-BA": "Serbian (Cyrillic, Bosnia & Herzegovina)",
833
- "sr-Cyrl-ME": "Serbian (Cyrillic, Montenegro)",
834
- "sr-Cyrl-RS": "Serbian (Cyrillic, Serbia)",
835
- "sr-Cyrl-XK": "Serbian (Cyrillic, Kosovo)",
836
- "sr-Latn": "Serbian (Latin)",
837
- "sr-Latn-BA": "Serbian (Latin, Bosnia & Herzegovina)",
838
- "sr-Latn-ME": "Serbian (Latin, Montenegro)",
839
- "sr-Latn-RS": "Serbian (Latin, Serbia)",
840
- "sr-Latn-XK": "Serbian (Latin, Kosovo)",
841
- "srn": "Sranan Tongo",
842
- "ss": "Swati",
843
- "st": "Southern Sotho",
844
- "stq": "Saterland Frisian",
845
- "su": "Sundanese",
846
- "su-Latn": "Sundanese (Latin)",
847
- "su-Latn-ID": "Sundanese (Latin, Indonesia)",
848
- "sv": "Swedish",
849
- "sv-AX": "Swedish (Åland Islands)",
850
- "sv-FI": "Swedish (Finland)",
851
- "sv-SE": "Swedish (Sweden)",
852
- "sw": "Swahili",
853
- "sw-CD": "Swahili (Congo - Kinshasa)",
854
- "sw-KE": "Swahili (Kenya)",
855
- "sw-TZ": "Swahili (Tanzania)",
856
- "sw-UG": "Swahili (Uganda)",
857
- "syc": "Syriac",
858
- "szl": "Silesian",
859
- "ta": "Tamil",
860
- "ta-IN": "Tamil (India)",
861
- "ta-LK": "Tamil (Sri Lanka)",
862
- "ta-Latn": "Tamil (Latin script)",
863
- "ta-MY": "Tamil (Malaysia)",
864
- "ta-SG": "Tamil (Singapore)",
865
- "tar": "Raramuri",
866
- "tcy": "Tulu",
867
- "tdt": "Tetum",
868
- "te": "Telugu",
869
- "te-IN": "Telugu (India)",
870
- "te-Latn": "Telugu (Latin script)",
871
- "teo": "Teso",
872
- "teo-KE": "Teso (Kenya)",
873
- "teo-UG": "Teso (Uganda)",
874
- "tg": "Tajik",
875
- "tg-TJ": "Tajik (Tajikistan)",
876
- "th": "Thai",
877
- "th-TH": "Thai (Thailand)",
878
- "ti": "Tigrinya",
879
- "ti-ER": "Tigrinya (Eritrea)",
880
- "ti-ET": "Tigrinya (Ethiopia)",
881
- "tk": "Turkmen",
882
- "tk-TM": "Turkmen (Turkmenistan)",
883
- "tl": "Tagalog",
884
- "tn": "Tswana",
885
- "to": "Tongan",
886
- "to-TO": "Tongan (Tonga)",
887
- "tpi": "Tok Pisin",
888
- "tr": "Turkish",
889
- "tr-CY": "Turkish (Cyprus)",
890
- "tr-TR": "Turkish (Turkey)",
891
- "ts": "Tsonga",
892
- "tt": "Tatar",
893
- "tt-RU": "Tatar (Russia)",
894
- "tum": "Tumbuka",
895
- "tw": "Twi",
896
- "twq": "Tasawaq",
897
- "twq-NE": "Tasawaq (Niger)",
898
- "ty": "Tahitian",
899
- "tyv": "Tuvinian",
900
- "tzm": "Central Atlas Tamazight",
901
- "tzm-MA": "Central Atlas Tamazight (Morocco)",
902
- "udm": "Udmurt",
903
- "ug": "Uyghur",
904
- "ug-CN": "Uyghur (China)",
905
- "uk": "Ukrainian",
906
- "uk-UA": "Ukrainian (Ukraine)",
907
- "und": "Unknown language",
908
- "ur": "Urdu",
909
- "ur-IN": "Urdu (India)",
910
- "ur-Latn": "Urdu (Latin script)",
911
- "ur-PK": "Urdu (Pakistan)",
912
- "uz": "Uzbek",
913
- "uz-Arab": "Uzbek (Arabic)",
914
- "uz-Arab-AF": "Uzbek (Arabic, Afghanistan)",
915
- "uz-Cyrl": "Uzbek (Cyrillic)",
916
- "uz-Cyrl-UZ": "Uzbek (Cyrillic, Uzbekistan)",
917
- "uz-Latn": "Uzbek (Latin)",
918
- "uz-Latn-UZ": "Uzbek (Latin, Uzbekistan)",
919
- "vai": "Vai",
920
- "vai-Latn": "Vai (Latin)",
921
- "vai-Latn-LR": "Vai (Latin, Liberia)",
922
- "vai-Vaii": "Vai (Vai)",
923
- "vai-Vaii-LR": "Vai (Vai, Liberia)",
924
- "ve": "Venda",
925
- "vec": "Venetian",
926
- "vep": "Veps",
927
- "vi": "Vietnamese",
928
- "vi-VN": "Vietnamese (Vietnam)",
929
- "vls": "Vlaams",
930
- "vo": "Volapük",
931
- "vo-001": "Volapük (World)",
932
- "vot": "Votic",
933
- "vro": "Võro",
934
- "vun": "Vunjo",
935
- "vun-TZ": "Vunjo (Tanzania)",
936
- "wa": "Walloon",
937
- "wae": "Walser",
938
- "wae-CH": "Walser (Switzerland)",
939
- "war": "Waray (Philippines)",
940
- "wo": "Wolof",
941
- "wo-SN": "Wolof (Senegal)",
942
- "wuu": "Wu Chinese",
943
- "xal": "Kalmyk",
944
- "xh": "Xhosa",
945
- "xh-ZA": "Xhosa (South Africa)",
946
- "xmf": "Mingrelian",
947
- "xog": "Soga",
948
- "xog-UG": "Soga (Uganda)",
949
- "yav": "Yangben",
950
- "yav-CM": "Yangben (Cameroon)",
951
- "yi": "Yiddish",
952
- "yi-001": "Yiddish (World)",
953
- "yo": "Yoruba",
954
- "yo-BJ": "Yoruba (Benin)",
955
- "yo-NG": "Yoruba (Nigeria)",
956
- "yue": "Cantonese",
957
- "yue-Hans": "Cantonese (Simplified)",
958
- "yue-Hans-CN": "Cantonese (Simplified, China)",
959
- "yue-Hant": "Cantonese (Traditional)",
960
- "yue-Hant-HK": "Cantonese (Traditional, Hong Kong SAR China)",
961
- "za": "Zhuang",
962
- "zea": "Zeeuws",
963
- "zgh": "Standard Moroccan Tamazight",
964
- "zgh-MA": "Standard Moroccan Tamazight (Morocco)",
965
- "zh": "Chinese",
966
- "zh-CN": "Chinese (Mandarin, China)",
967
- "zh-HK": "Cantonese (Hong Kong)",
968
- "zh-Hans": "Chinese (Simplified)",
969
- "zh-Hans-CN": "Chinese (Simplified, China)",
970
- "zh-Hans-HK": "Chinese (Simplified, Hong Kong SAR China)",
971
- "zh-Hans-MO": "Chinese (Simplified, Macao SAR China)",
972
- "zh-Hans-SG": "Chinese (Simplified, Singapore)",
973
- "zh-Hant": "Chinese (Traditional)",
974
- "zh-Hant-HK": "Chinese (Traditional, Hong Kong SAR China)",
975
- "zh-Hant-MO": "Chinese (Traditional, Macao SAR China)",
976
- "zh-Hant-TW": "Chinese (Traditional, Taiwan)",
977
- "zh-Latn": "Chinese (Latin)",
978
- "zh-TW": "Chinese (Mandarin, Taiwan)",
979
- "zu": "Zulu",
980
- "zu-ZA": "Zulu (South Africa)",
981
- "zza": "Zaza"
982
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logo.png DELETED
Binary file (38.5 kB)
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ huggingface_hub==0.8.1
2
+ pandas