Spaces:
Runtime error
Runtime error
import os | |
class constantConfig: | |
def __init__(self): | |
self.flores_codes={'Acehnese (Arabic script)': 'ace_Arab', | |
'Acehnese (Latin script)': 'ace_Latn', | |
'Mesopotamian Arabic': 'acm_Arab', | |
'Ta’izzi-Adeni Arabic': 'acq_Arab', | |
'Tunisian Arabic': 'aeb_Arab', | |
'Afrikaans': 'afr_Latn', | |
'South Levantine Arabic': 'ajp_Arab', | |
'Akan': 'aka_Latn', | |
'Amharic': 'amh_Ethi', | |
'North Levantine Arabic': 'apc_Arab', | |
'Modern Standard Arabic': 'arb_Arab', | |
'Modern Standard Arabic (Romanized)': 'arb_Latn', | |
'Najdi Arabic': 'ars_Arab', | |
'Moroccan Arabic': 'ary_Arab', | |
'Egyptian Arabic': 'arz_Arab', | |
'Assamese': 'asm_Beng', | |
'Asturian': 'ast_Latn', | |
'Awadhi': 'awa_Deva', | |
'Central Aymara': 'ayr_Latn', | |
'South Azerbaijani': 'azb_Arab', | |
'North Azerbaijani': 'azj_Latn', | |
'Bashkir': 'bak_Cyrl', | |
'Bambara': 'bam_Latn', | |
'Balinese': 'ban_Latn', | |
'Belarusian': 'bel_Cyrl', | |
'Bemba': 'bem_Latn', | |
'Bengali': 'ben_Beng', | |
'Bhojpuri': 'bho_Deva', | |
'Banjar (Arabic script)': 'bjn_Arab', | |
'Banjar (Latin script)': 'bjn_Latn', | |
'Standard Tibetan': 'bod_Tibt', | |
'Bosnian': 'bos_Latn', | |
'Buginese': 'bug_Latn', | |
'Bulgarian': 'bul_Cyrl', | |
'Catalan': 'cat_Latn', | |
'Cebuano': 'ceb_Latn', | |
'Czech': 'ces_Latn', | |
'Chokwe': 'cjk_Latn', | |
'Central Kurdish': 'ckb_Arab', | |
'Crimean Tatar': 'crh_Latn', | |
'Welsh': 'cym_Latn', | |
'Danish': 'dan_Latn', | |
'German': 'deu_Latn', | |
'Southwestern Dinka': 'dik_Latn', | |
'Dyula': 'dyu_Latn', | |
'Dzongkha': 'dzo_Tibt', | |
'Greek': 'ell_Grek', | |
'English': 'eng_Latn', | |
'Esperanto': 'epo_Latn', | |
'Estonian': 'est_Latn', | |
'Basque': 'eus_Latn', | |
'Ewe': 'ewe_Latn', | |
'Faroese': 'fao_Latn', | |
'Fijian': 'fij_Latn', | |
'Finnish': 'fin_Latn', | |
'Fon': 'fon_Latn', | |
'French': 'fra_Latn', | |
'Friulian': 'fur_Latn', | |
'Nigerian Fulfulde': 'fuv_Latn', | |
'Scottish Gaelic': 'gla_Latn', | |
'Irish': 'gle_Latn', | |
'Galician': 'glg_Latn', | |
'Guarani': 'grn_Latn', | |
'Gujarati': 'guj_Gujr', | |
'Haitian Creole': 'hat_Latn', | |
'Hausa': 'hau_Latn', | |
'Hebrew': 'heb_Hebr', | |
'Hindi': 'hin_Deva', | |
'Chhattisgarhi': 'hne_Deva', | |
'Croatian': 'hrv_Latn', | |
'Hungarian': 'hun_Latn', | |
'Armenian': 'hye_Armn', | |
'Igbo': 'ibo_Latn', | |
'Ilocano': 'ilo_Latn', | |
'Indonesian': 'ind_Latn', | |
'Icelandic': 'isl_Latn', | |
'Italian': 'ita_Latn', | |
'Javanese': 'jav_Latn', | |
'Japanese': 'jpn_Jpan', | |
'Kabyle': 'kab_Latn', | |
'Jingpho': 'kac_Latn', | |
'Kamba': 'kam_Latn', | |
'Kannada': 'kan_Knda', | |
'Kashmiri (Arabic script)': 'kas_Arab', | |
'Kashmiri (Devanagari script)': 'kas_Deva', | |
'Georgian': 'kat_Geor', | |
'Central Kanuri (Arabic script)': 'knc_Arab', | |
'Central Kanuri (Latin script)': 'knc_Latn', | |
'Kazakh': 'kaz_Cyrl', | |
'Kabiyè': 'kbp_Latn', | |
'Kabuverdianu': 'kea_Latn', | |
'Khmer': 'khm_Khmr', | |
'Kikuyu': 'kik_Latn', | |
'Kinyarwanda': 'kin_Latn', 'Kyrgyz': 'kir_Cyrl', 'Kimbundu': 'kmb_Latn', | |
'Northern Kurdish': 'kmr_Latn', 'Kikongo': 'kon_Latn', | |
'Korean': 'kor_Hang', 'Lao': 'lao_Laoo', 'Ligurian': 'lij_Latn', | |
'Limburgish': 'lim_Latn', 'Lingala': 'lin_Latn', 'Lithuanian': 'lit_Latn', 'Lombard': 'lmo_Latn', | |
'Latgalian': 'ltg_Latn', 'Luxembourgish': 'ltz_Latn', 'Luba-Kasai': 'lua_Latn', 'Ganda': 'lug_Latn', | |
'Luo': 'luo_Latn', 'Mizo': 'lus_Latn', 'Standard Latvian': 'lvs_Latn', 'Magahi': 'mag_Deva', | |
'Maithili': 'mai_Deva', 'Malayalam': 'mal_Mlym', 'Marathi': 'mar_Deva', | |
'Minangkabau (Arabic script)': 'min_Arab', 'Minangkabau (Latin script)': 'min_Latn', | |
'Macedonian': 'mkd_Cyrl', 'Plateau Malagasy': 'plt_Latn', 'Maltese': 'mlt_Latn', | |
'Meitei (Bengali script)': 'mni_Beng', 'Halh Mongolian': 'khk_Cyrl', 'Mossi': 'mos_Latn', | |
'Maori': 'mri_Latn', 'Burmese': 'mya_Mymr', 'Dutch': 'nld_Latn', 'Norwegian Nynorsk': 'nno_Latn', | |
'Norwegian Bokmål': 'nob_Latn', 'Nepali': 'npi_Deva', 'Northern Sotho': 'nso_Latn', | |
'Nuer': 'nus_Latn', | |
'Nyanja': 'nya_Latn', 'Occitan': 'oci_Latn', 'West Central Oromo': 'gaz_Latn', 'Odia': 'ory_Orya', | |
'Pangasinan': 'pag_Latn', 'Eastern Panjabi': 'pan_Guru', 'Papiamento': 'pap_Latn', | |
'Western Persian': 'pes_Arab', | |
'Polish': 'pol_Latn', 'Portuguese': 'por_Latn', 'Dari': 'prs_Arab', 'Southern Pashto': 'pbt_Arab', | |
'Ayacucho Quechua': 'quy_Latn', 'Romanian': 'ron_Latn', 'Rundi': 'run_Latn', 'Russian': 'rus_Cyrl', | |
'Sango': 'sag_Latn', 'Sanskrit': 'san_Deva', 'Santali': 'sat_Olck', 'Sicilian': 'scn_Latn', | |
'Shan': 'shn_Mymr', | |
'Sinhala': 'sin_Sinh', 'Slovak': 'slk_Latn', 'Slovenian': 'slv_Latn', 'Samoan': 'smo_Latn', | |
'Shona': 'sna_Latn', | |
'Sindhi': 'snd_Arab', 'Somali': 'som_Latn', 'Southern Sotho': 'sot_Latn', 'Spanish': 'spa_Latn', | |
'Tosk Albanian': 'als_Latn', 'Sardinian': 'srd_Latn', 'Serbian': 'srp_Cyrl', 'Swati': 'ssw_Latn', | |
'Sundanese': 'sun_Latn', 'Swedish': 'swe_Latn', 'Swahili': 'swh_Latn', 'Silesian': 'szl_Latn', | |
'Tamil': 'tam_Taml', 'Tatar': 'tat_Cyrl', 'Telugu': 'tel_Telu', 'Tajik': 'tgk_Cyrl', | |
'Tagalog': 'tgl_Latn', | |
'Thai': 'tha_Thai', 'Tigrinya': 'tir_Ethi', 'Tamasheq (Latin script)': 'taq_Latn', | |
'Tamasheq (Tifinagh script)': 'taq_Tfng', | |
'Tok Pisin': 'tpi_Latn', 'Tswana': 'tsn_Latn', 'Tsonga': 'tso_Latn', 'Turkmen': 'tuk_Latn', 'Tumbuka': 'tum_Latn', | |
'Turkish': 'tur_Latn', 'Twi': 'twi_Latn', 'Central Atlas Tamazight': 'tzm_Tfng', | |
'Uyghur': 'uig_Arab', | |
'Ukrainian': 'ukr_Cyrl', 'Umbundu': 'umb_Latn', 'Urdu': 'urd_Arab', 'Northern Uzbek': 'uzn_Latn', | |
'Venetian': 'vec_Latn', | |
'Vietnamese': 'vie_Latn', 'Waray': 'war_Latn', 'Wolof': 'wol_Latn', 'Xhosa': 'xho_Latn', | |
'Eastern Yiddish': 'ydd_Hebr', | |
'Yoruba': 'yor_Latn', 'Yue Chinese': 'yue_Hant', 'Chinese (Simplified)': 'zho_Hans', | |
'Chinese (Traditional)': 'zho_Hant', | |
'Standard Malay': 'zsm_Latn', 'Zulu': 'zul_Latn'} | |
self.model_name_dict = {'0.6B': 'facebook/nllb-200-distilled-600M', | |
'1.3B': 'facebook/nllb-200-distilled-1.3B', | |
'3.3B': 'facebook/nllb-200-3.3B', | |
} | |
self.whisper_codes_to_flores_codes = {"de" : self.flores_codes['German'], | |
"en" : self.flores_codes['English'], | |
"pl" : self.flores_codes['Polish'], | |
"hi" : self.flores_codes['Hindi'] | |
} | |
self.flores_codes_to_tts_codes = {'Acehnese': 'ace', 'Mesopotamian Arabic': 'acm', 'Ta’izzi-Adeni Arabic': 'acq', 'Tunisian Arabic': 'aeb', 'Afrikaans': 'afr', 'South Levantine Arabic': 'ajp', 'Akan': 'aka', 'Amharic': 'amh', 'North Levantine Arabic': 'apc', 'Modern Standard Arabic': 'arb', 'Najdi Arabic': 'ars', 'Moroccan Arabic': 'ary', 'Egyptian Arabic': 'arz', 'Assamese': 'asm', 'Asturian': 'ast', 'Awadhi': 'awa', 'Central Aymara': 'ayr', 'South Azerbaijani': 'azb', 'North Azerbaijani': 'azj', 'Bashkir': 'bak', 'Bambara': 'bam', 'Balinese': 'ban', 'Belarusian': 'bel', 'Bemba': 'bem', 'Bengali': 'ben', 'Bhojpuri': 'bho', 'Banjar': 'bjn', 'Standard Tibetan': 'bod', 'Bosnian': 'bos', 'Buginese': 'bug', 'Bulgarian': 'bul', 'Catalan': 'cat', 'Cebuano': 'ceb', 'Czech': 'ces', 'Chokwe': 'cjk', 'Central Kurdish': 'ckb', 'Crimean Tatar': 'crh', 'Welsh': 'cym', 'Danish': 'dan', 'German': 'deu', 'Southwestern Dinka': 'dik', 'Dyula': 'dyu', 'Dzongkha': 'dzo', 'Greek': 'ell', 'English': 'eng', 'Esperanto': 'epo', 'Estonian': 'est', 'Basque': 'eus', 'Ewe': 'ewe', 'Faroese': 'fao', 'Fijian': 'fij', 'Finnish': 'fin', 'Fon': 'fon', 'French': 'fra', 'Friulian': 'fur', 'Nigerian Fulfulde': 'fuv', 'Scottish Gaelic': 'gla', 'Irish': 'gle', 'Galician': 'glg', 'Guarani': 'grn', 'Gujarati': 'guj', 'Haitian Creole': 'hat', 'Hausa': 'hau', 'Hebrew': 'heb', 'Hindi': 'hin', 'Chhattisgarhi': 'hne', 'Croatian': 'hrv', 'Hungarian': 'hun', 'Armenian': 'hye', 'Igbo': 'ibo', 'Ilocano': 'ilo', 'Indonesian': 'ind', 'Icelandic': 'isl', 'Italian': 'ita', 'Javanese': 'jav', 'Japanese': 'jpn', 'Kabyle': 'kab', 'Jingpho': 'kac', 'Kamba': 'kam', 'Kannada': 'kan', 'Kashmiri': 'kas', 'Georgian': 'kat', 'Central Kanuri': 'knc', 'Kazakh': 'kaz', 'Kabiyè': 'kbp', 'Kabuverdianu': 'kea', 'Khmer': 'khm', 'Kikuyu': 'kik', 'Kinyarwanda': 'kin', 'Kyrgyz': 'kir', 'Kimbundu': 'kmb', 'Northern Kurdish': 'kmr', 'Kikongo': 'kon', 'Korean': 'kor', 'Lao': 'lao', 'Ligurian': 'lij', 'Limburgish': 'lim', 'Lingala': 'lin', 'Lithuanian': 'lit', 'Lombard': 'lmo', 'Latgalian': 'ltg', 'Luxembourgish': 'ltz', 'Luba-Kasai': 'lua', 'Ganda': 'lug', 'Luo': 'luo', 'Mizo': 'lus', 'Standard Latvian': 'lvs', 'Magahi': 'mag', 'Maithili': 'mai', 'Malayalam': 'mal', 'Marathi': 'mar', 'Minangkabau': 'min', 'Macedonian': 'mkd', 'Plateau Malagasy': 'plt', 'Maltese': 'mlt', 'Meitei': 'mni', 'Halh Mongolian': 'khk', 'Mossi': 'mos', 'Maori': 'mri', 'Burmese': 'mya', 'Dutch': 'nld', 'Norwegian Nynorsk': 'nno', 'Norwegian Bokmål': 'nob', 'Nepali': 'npi', 'Northern Sotho': 'nso', 'Nuer': 'nus', 'Nyanja': 'nya', 'Occitan': 'oci', 'West Central Oromo': 'gaz', 'Odia': 'ory', 'Pangasinan': 'pag', 'Eastern Panjabi': 'pan', 'Papiamento': 'pap', 'Western Persian': 'pes', 'Polish': 'pol', 'Portuguese': 'por', 'Dari': 'prs', 'Southern Pashto': 'pbt', 'Ayacucho Quechua': 'quy', 'Romanian': 'ron', 'Rundi': 'run', 'Russian': 'rus', 'Sango': 'sag', 'Sanskrit': 'san', 'Santali': 'sat', 'Sicilian': 'scn', 'Shan': 'shn', 'Sinhala': 'sin', 'Slovak': 'slk', 'Slovenian': 'slv', 'Samoan': 'smo', 'Shona': 'sna', 'Sindhi': 'snd', 'Somali': 'som', 'Southern Sotho': 'sot', 'Spanish': 'spa', 'Tosk Albanian': 'als', 'Sardinian': 'srd', 'Serbian': 'srp', 'Swati': 'ssw', 'Sundanese': 'sun', 'Swedish': 'swe', 'Swahili': 'swh', 'Silesian': 'szl', 'Tamil': 'tam', 'Tatar': 'tat', 'Telugu': 'tel', 'Tajik': 'tgk', 'Tagalog': 'tgl', 'Thai': 'tha', 'Tigrinya': 'tir', 'Tamasheq': 'taq', 'Tok Pisin': 'tpi', 'Tswana': 'tsn', 'Tsonga': 'tso', 'Turkmen': 'tuk', 'Tumbuka': 'tum', 'Turkish': 'tur', 'Twi': 'twi', 'Central Atlas Tamazight': 'tzm', 'Uyghur': 'uig', 'Ukrainian': 'ukr', 'Umbundu': 'umb', 'Urdu': 'urd', 'Northern Uzbek': 'uzn', 'Venetian': 'vec', 'Vietnamese': 'vie', 'Waray': 'war', 'Wolof': 'wol', 'Xhosa': 'xho', 'Eastern Yiddish': 'ydd', 'Yoruba': 'yor', 'Yue Chinese': 'yue', 'Chinese': 'zho', 'Standard Malay': 'zsm', 'Zulu': 'zul'} | |
self.language_directory = 'Languages' | |
self.uroman_directory = 'aux_files' | |
self.language_download_web = 'https://dl.fbaipublicfiles.com/mms/tts' | |
self.language_vocab_text = "vocab.txt" | |
self.language_vocab_configuration = "config.json" | |
self.language_vocab_model = "G_100000.pth" | |
# creating the audio files temporary | |
# --------------------------------------- | |
self.temp_audio_folder = 'Temp_Audios' | |
self.text2speech_wavfile = f'{self.temp_audio_folder}/text2speech.wav' | |
self.enhanced_speech_file = f"{self.temp_audio_folder}/enhanced.mp3" | |
self.input_speech_file = f'{self.temp_audio_folder}/output.wav' | |
try: | |
os.makedirs(self.language_directory) | |
except: | |
pass | |
try: | |
os.makedirs(self.temp_audio_folder) | |
except: | |
pass | |