# Language dict language_code_to_name = { "afr": "Afrikaans", "amh": "Amharic", "arb": "Modern Standard Arabic", "ary": "Moroccan Arabic", "arz": "Egyptian Arabic", "asm": "Assamese", "ast": "Asturian", "azj": "North Azerbaijani", "bel": "Belarusian", "ben": "Bengali", "bos": "Bosnian", "bul": "Bulgarian", "cat": "Catalan", "ceb": "Cebuano", "ces": "Czech", "ckb": "Central Kurdish", "cmn": "Mandarin Chinese", "cym": "Welsh", "dan": "Danish", "deu": "German", "ell": "Greek", "eng": "English", "est": "Estonian", "eus": "Basque", "fin": "Finnish", "fra": "French", "gaz": "West Central Oromo", "gle": "Irish", "glg": "Galician", "guj": "Gujarati", "heb": "Hebrew", "hin": "Hindi", "hrv": "Croatian", "hun": "Hungarian", "hye": "Armenian", "ibo": "Igbo", "ind": "Indonesian", "isl": "Icelandic", "ita": "Italian", "jav": "Javanese", "jpn": "Japanese", "kam": "Kamba", "kan": "Kannada", "kat": "Georgian", "kaz": "Kazakh", "kea": "Kabuverdianu", "khk": "Halh Mongolian", "khm": "Khmer", "kir": "Kyrgyz", "kor": "Korean", "lao": "Lao", "lit": "Lithuanian", "ltz": "Luxembourgish", "lug": "Ganda", "luo": "Luo", "lvs": "Standard Latvian", "mai": "Maithili", "mal": "Malayalam", "mar": "Marathi", "mkd": "Macedonian", "mlt": "Maltese", "mni": "Meitei", "mya": "Burmese", "nld": "Dutch", "nno": "Norwegian Nynorsk", "nob": "Norwegian Bokm\u00e5l", "npi": "Nepali", "nya": "Nyanja", "oci": "Occitan", "ory": "Odia", "pan": "Punjabi", "pbt": "Southern Pashto", "pes": "Western Persian", "pol": "Polish", "por": "Portuguese", "ron": "Romanian", "rus": "Russian", "slk": "Slovak", "slv": "Slovenian", "sna": "Shona", "snd": "Sindhi", "som": "Somali", "spa": "Spanish", "srp": "Serbian", "swe": "Swedish", "swh": "Swahili", "tam": "Tamil", "tel": "Telugu", "tgk": "Tajik", "tgl": "Tagalog", "tha": "Thai", "tur": "Turkish", "ukr": "Ukrainian", "urd": "Urdu", "uzn": "Northern Uzbek", "vie": "Vietnamese", "xho": "Xhosa", "yor": "Yoruba", "yue": "Cantonese", "zlm": "Colloquial Malay", "zsm": "Standard Malay", "zul": "Zulu", } LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()} # Source langs: S2ST / S2TT / ASR don't need source lang # T2TT / T2ST use this text_source_language_codes = [ "afr", "amh", "arb", "ary", "arz", "asm", "azj", "bel", "ben", "bos", "bul", "cat", "ceb", "ces", "ckb", "cmn", "cym", "dan", "deu", "ell", "eng", "est", "eus", "fin", "fra", "gaz", "gle", "glg", "guj", "heb", "hin", "hrv", "hun", "hye", "ibo", "ind", "isl", "ita", "jav", "jpn", "kan", "kat", "kaz", "khk", "khm", "kir", "kor", "lao", "lit", "lug", "luo", "lvs", "mai", "mal", "mar", "mkd", "mlt", "mni", "mya", "nld", "nno", "nob", "npi", "nya", "ory", "pan", "pbt", "pes", "pol", "por", "ron", "rus", "slk", "slv", "sna", "snd", "som", "spa", "srp", "swe", "swh", "tam", "tel", "tgk", "tgl", "tha", "tur", "ukr", "urd", "uzn", "vie", "yor", "yue", "zsm", "zul", ] TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes]) # Target langs: # S2ST / T2ST s2st_target_language_codes = [ "eng", "arb", "ben", "cat", "ces", "cmn", "cym", "dan", "deu", "est", "fin", "fra", "hin", "ind", "ita", "jpn", "kor", "mlt", "nld", "pes", "pol", "por", "ron", "rus", "slk", "spa", "swe", "swh", "tel", "tgl", "tha", "tur", "ukr", "urd", "uzn", "vie", ] S2ST_TARGET_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in s2st_target_language_codes]) T2ST_TARGET_LANGUAGE_NAMES = S2ST_TARGET_LANGUAGE_NAMES # S2TT / T2TT / ASR S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES ASR_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES