RASMUS commited on
Commit
668f0a9
1 Parent(s): 353dae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -102
app.py CHANGED
@@ -43,108 +43,123 @@ headers = {'Authorization': os.environ['DeepL_API_KEY']}
43
  whisper_models = ["base", "small", "medium", "large", "base.en"]
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  source_languages = {
47
- "Afrikaans":"af",
48
- "Amharic":"am",
49
- "Arabic":"ar",
50
- "Asturian ":"st",
51
- "Azerbaijani":"az",
52
- "Bashkir":"ba",
53
- "Belarusian":"be",
54
- "Bulgarian":"bg",
55
- "Bengali":"bn",
56
- "Breton":"br",
57
- "Bosnian":"bs",
58
- "Catalan; Valencian":"ca",
59
- "Cebuano":"eb",
60
- "Czech":"cs",
61
- "Welsh":"cy",
62
- "Danish":"da",
63
- "German":"de",
64
- "Greeek":"el",
65
- "English":"en",
66
- "Spanish":"es",
67
- "Estonian":"et",
68
- "Persian":"fa",
69
- "Fulah":"ff",
70
- "Finnish":"fi",
71
- "French":"fr",
72
- "Western Frisian":"fy",
73
- "Irish":"ga",
74
- "Gaelic; Scottish Gaelic":"gd",
75
- "Galician":"gl",
76
- "Gujarati":"gu",
77
- "Hausa":"ha",
78
- "Hebrew":"he",
79
- "Hindi":"hi",
80
- "Croatian":"hr",
81
- "Haitian; Haitian Creole":"ht",
82
- "Hungarian":"hu",
83
- "Armenian":"hy",
84
- "Indonesian":"id",
85
- "Igbo":"ig",
86
- "Iloko":"lo",
87
- "Icelandic":"is",
88
- "Italian":"it",
89
- "Japanese":"ja",
90
- "Javanese":"jv",
91
- "Georgian":"ka",
92
- "Kazakh":"kk",
93
- "Central Khmer":"km",
94
- "Kannada":"kn",
95
- "Korean":"ko",
96
- "Luxembourgish; Letzeburgesch":"lb",
97
- "Ganda":"lg",
98
- "Lingala":"ln",
99
- "Lao":"lo",
100
- "Lithuanian":"lt",
101
- "Latvian":"lv",
102
- "Malagasy":"mg",
103
- "Macedonian":"mk",
104
- "Malayalam":"ml",
105
- "Mongolian":"mn",
106
- "Marathi":"mr",
107
- "Malay":"ms",
108
- "Burmese":"my",
109
- "Nepali":"ne",
110
- "Dutch; Flemish":"nl",
111
- "Norwegian":"no",
112
- "Northern Sotho":"ns",
113
- "Occitan (post 1500)":"oc",
114
- "Oriya":"or",
115
- "Panjabi; Punjabi":"pa",
116
- "Polish":"pl",
117
- "Pushto; Pashto":"ps",
118
- "Portuguese":"pt",
119
- "Romanian; Moldavian; Moldovan":"ro",
120
- "Russian":"ru",
121
- "Sindhi":"sd",
122
- "Sinhala; Sinhalese":"si",
123
- "Slovak":"sk",
124
- "Slovenian":"sl",
125
- "Somali":"so",
126
- "Albanian":"sq",
127
- "Serbian":"sr",
128
- "Swati":"ss",
129
- "Sundanese":"su",
130
- "Swedish":"sv",
131
- "Swahili":"sw",
132
- "Tamil":"ta",
133
- "Thai":"th",
134
- "Tagalog":"tl",
135
- "Tswana":"tn",
136
- "Turkish":"tr",
137
- "Ukrainian":"uk",
138
- "Urdu":"ur",
139
- "Uzbek":"uz",
140
- "Vietnamese":"vi",
141
- "Wolof":"wo",
142
- "Xhosa":"xh",
143
- "Yiddish":"yi",
144
- "Yoruba":"yo",
145
- "Chinese":"zh",
146
- "Zulu":"zu",
147
- "Let the model analyze": "Let the model analyze"
148
  }
149
 
150
  DeepL_language_codes_for_translation = {
@@ -279,7 +294,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
279
 
280
  def translate_transcriptions(df, selected_translation_lang_2):
281
  if selected_translation_lang_2 is None:
282
- selected_translation_lang_2 = 'English'
283
  df.reset_index(inplace=True)
284
 
285
  print("start_translation")
 
43
  whisper_models = ["base", "small", "medium", "large", "base.en"]
44
 
45
 
46
+ LANGUAGES = {
47
+ "en": "english",
48
+ "zh": "chinese",
49
+ "de": "german",
50
+ "es": "spanish",
51
+ "ru": "russian",
52
+ "ko": "korean",
53
+ "fr": "french",
54
+ "ja": "japanese",
55
+ "pt": "portuguese",
56
+ "tr": "turkish",
57
+ "pl": "polish",
58
+ "ca": "catalan",
59
+ "nl": "dutch",
60
+ "ar": "arabic",
61
+ "sv": "swedish",
62
+ "it": "italian",
63
+ "id": "indonesian",
64
+ "hi": "hindi",
65
+ "fi": "finnish",
66
+ "vi": "vietnamese",
67
+ "he": "hebrew",
68
+ "uk": "ukrainian",
69
+ "el": "greek",
70
+ "ms": "malay",
71
+ "cs": "czech",
72
+ "ro": "romanian",
73
+ "da": "danish",
74
+ "hu": "hungarian",
75
+ "ta": "tamil",
76
+ "no": "norwegian",
77
+ "th": "thai",
78
+ "ur": "urdu",
79
+ "hr": "croatian",
80
+ "bg": "bulgarian",
81
+ "lt": "lithuanian",
82
+ "la": "latin",
83
+ "mi": "maori",
84
+ "ml": "malayalam",
85
+ "cy": "welsh",
86
+ "sk": "slovak",
87
+ "te": "telugu",
88
+ "fa": "persian",
89
+ "lv": "latvian",
90
+ "bn": "bengali",
91
+ "sr": "serbian",
92
+ "az": "azerbaijani",
93
+ "sl": "slovenian",
94
+ "kn": "kannada",
95
+ "et": "estonian",
96
+ "mk": "macedonian",
97
+ "br": "breton",
98
+ "eu": "basque",
99
+ "is": "icelandic",
100
+ "hy": "armenian",
101
+ "ne": "nepali",
102
+ "mn": "mongolian",
103
+ "bs": "bosnian",
104
+ "kk": "kazakh",
105
+ "sq": "albanian",
106
+ "sw": "swahili",
107
+ "gl": "galician",
108
+ "mr": "marathi",
109
+ "pa": "punjabi",
110
+ "si": "sinhala",
111
+ "km": "khmer",
112
+ "sn": "shona",
113
+ "yo": "yoruba",
114
+ "so": "somali",
115
+ "af": "afrikaans",
116
+ "oc": "occitan",
117
+ "ka": "georgian",
118
+ "be": "belarusian",
119
+ "tg": "tajik",
120
+ "sd": "sindhi",
121
+ "gu": "gujarati",
122
+ "am": "amharic",
123
+ "yi": "yiddish",
124
+ "lo": "lao",
125
+ "uz": "uzbek",
126
+ "fo": "faroese",
127
+ "ht": "haitian creole",
128
+ "ps": "pashto",
129
+ "tk": "turkmen",
130
+ "nn": "nynorsk",
131
+ "mt": "maltese",
132
+ "sa": "sanskrit",
133
+ "lb": "luxembourgish",
134
+ "my": "myanmar",
135
+ "bo": "tibetan",
136
+ "tl": "tagalog",
137
+ "mg": "malagasy",
138
+ "as": "assamese",
139
+ "tt": "tatar",
140
+ "haw": "hawaiian",
141
+ "ln": "lingala",
142
+ "ha": "hausa",
143
+ "ba": "bashkir",
144
+ "jw": "javanese",
145
+ "su": "sundanese",
146
+ }
147
+
148
+ # language code lookup by name, with a few language aliases
149
  source_languages = {
150
+ **{language: code for code, language in LANGUAGES.items()},
151
+ "burmese": "my",
152
+ "valencian": "ca",
153
+ "flemish": "nl",
154
+ "haitian": "ht",
155
+ "letzeburgesch": "lb",
156
+ "pushto": "ps",
157
+ "panjabi": "pa",
158
+ "moldavian": "ro",
159
+ "moldovan": "ro",
160
+ "sinhalese": "si",
161
+ "castilian": "es",
162
+ "Let the model analyze": "Let the model analyze"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
 
165
  DeepL_language_codes_for_translation = {
 
294
 
295
  def translate_transcriptions(df, selected_translation_lang_2):
296
  if selected_translation_lang_2 is None:
297
+ selected_translation_lang_2 = 'english'
298
  df.reset_index(inplace=True)
299
 
300
  print("start_translation")