Spaces:
Sleeping
Sleeping
Cleanup code
Browse files
app.py
CHANGED
@@ -12,111 +12,29 @@ import ffmpeg
|
|
12 |
# Limitations (set to -1 to disable)
|
13 |
INPUT_AUDIO_MAX_DURATION = 120 # seconds
|
14 |
|
15 |
-
LANGUAGES = [
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
"Hindi",
|
34 |
-
"Finnish",
|
35 |
-
"Vietnamese",
|
36 |
-
"Hebrew",
|
37 |
-
"Ukrainian",
|
38 |
-
"Greek",
|
39 |
-
"Malay",
|
40 |
-
"Czech",
|
41 |
-
"Romanian",
|
42 |
-
"Danish",
|
43 |
-
"Hungarian",
|
44 |
-
"Tamil",
|
45 |
-
"Norwegian",
|
46 |
-
"Thai",
|
47 |
-
"Urdu",
|
48 |
-
"Croatian",
|
49 |
-
"Bulgarian",
|
50 |
-
"Lithuanian",
|
51 |
-
"Latin",
|
52 |
-
"Maori",
|
53 |
-
"Malayalam",
|
54 |
-
"Welsh",
|
55 |
-
"Slovak",
|
56 |
-
"Telugu",
|
57 |
-
"Persian",
|
58 |
-
"Latvian",
|
59 |
-
"Bengali",
|
60 |
-
"Serbian",
|
61 |
-
"Azerbaijani",
|
62 |
-
"Slovenian",
|
63 |
-
"Kannada",
|
64 |
-
"Estonian",
|
65 |
-
"Macedonian",
|
66 |
-
"Breton",
|
67 |
-
"Basque",
|
68 |
-
"Icelandic",
|
69 |
-
"Armenian",
|
70 |
-
"Nepali",
|
71 |
-
"Mongolian",
|
72 |
-
"Bosnian",
|
73 |
-
"Kazakh",
|
74 |
-
"Albanian",
|
75 |
-
"Swahili",
|
76 |
-
"Galician",
|
77 |
-
"Marathi",
|
78 |
-
"Punjabi",
|
79 |
-
"Sinhala",
|
80 |
-
"Khmer",
|
81 |
-
"Shona",
|
82 |
-
"Yoruba",
|
83 |
-
"Somali",
|
84 |
-
"Afrikaans",
|
85 |
-
"Occitan",
|
86 |
-
"Georgian",
|
87 |
-
"Belarusian",
|
88 |
-
"Tajik",
|
89 |
-
"Sindhi",
|
90 |
-
"Gujarati",
|
91 |
-
"Amharic",
|
92 |
-
"Yiddish",
|
93 |
-
"Lao",
|
94 |
-
"Uzbek",
|
95 |
-
"Faroese",
|
96 |
-
"Haitian Creole",
|
97 |
-
"Pashto",
|
98 |
-
"Turkmen",
|
99 |
-
"Nynorsk",
|
100 |
-
"Maltese",
|
101 |
-
"Sanskrit",
|
102 |
-
"Luxembourgish",
|
103 |
-
"Myanmar",
|
104 |
-
"Tibetan",
|
105 |
-
"Tagalog",
|
106 |
-
"Malagasy",
|
107 |
-
"Assamese",
|
108 |
-
"Tatar",
|
109 |
-
"Hawaiian",
|
110 |
-
"Lingala",
|
111 |
-
"Hausa",
|
112 |
-
"Bashkir",
|
113 |
-
"Javanese",
|
114 |
-
"Sundanese"
|
115 |
]
|
116 |
|
117 |
model_cache = dict()
|
118 |
|
119 |
-
def
|
120 |
source = uploadFile if uploadFile is not None else microphoneData
|
121 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
122 |
selectedModel = modelName if modelName is not None else "base"
|
@@ -142,6 +60,7 @@ def greet(modelName, languageName, uploadFile, microphoneData, task):
|
|
142 |
|
143 |
return result["text"], segmentStream.read()
|
144 |
|
|
|
145 |
ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
|
146 |
ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
|
147 |
ui_description += " as well as speech translation and language identification. "
|
@@ -149,7 +68,7 @@ ui_description += " as well as speech translation and language identification. "
|
|
149 |
if INPUT_AUDIO_MAX_DURATION > 0:
|
150 |
ui_description += "\n\n" + "Max audio file length: " + str(INPUT_AUDIO_MAX_DURATION) + " s"
|
151 |
|
152 |
-
demo = gr.Interface(fn=
|
153 |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
|
154 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
155 |
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
|
|
|
12 |
# Limitations (set to -1 to disable)
|
13 |
INPUT_AUDIO_MAX_DURATION = 120 # seconds
|
14 |
|
15 |
+
LANGUAGES = [
|
16 |
+
"English", "Chinese", "German", "Spanish", "Russian", "Korean",
|
17 |
+
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
|
18 |
+
"Dutch", "Arabic", "Swedish", "Italian", "Indonesian", "Hindi",
|
19 |
+
"Finnish", "Vietnamese", "Hebrew", "Ukrainian", "Greek", "Malay",
|
20 |
+
"Czech", "Romanian", "Danish", "Hungarian", "Tamil", "Norwegian",
|
21 |
+
"Thai", "Urdu", "Croatian", "Bulgarian", "Lithuanian", "Latin",
|
22 |
+
"Maori", "Malayalam", "Welsh", "Slovak", "Telugu", "Persian",
|
23 |
+
"Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
|
24 |
+
"Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
|
25 |
+
"Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
|
26 |
+
"Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer",
|
27 |
+
"Shona", "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian",
|
28 |
+
"Belarusian", "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish",
|
29 |
+
"Lao", "Uzbek", "Faroese", "Haitian Creole", "Pashto", "Turkmen",
|
30 |
+
"Nynorsk", "Maltese", "Sanskrit", "Luxembourgish", "Myanmar", "Tibetan",
|
31 |
+
"Tagalog", "Malagasy", "Assamese", "Tatar", "Hawaiian", "Lingala",
|
32 |
+
"Hausa", "Bashkir", "Javanese", "Sundanese"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
]
|
34 |
|
35 |
model_cache = dict()
|
36 |
|
37 |
+
def transcribeFile(modelName, languageName, uploadFile, microphoneData, task):
|
38 |
source = uploadFile if uploadFile is not None else microphoneData
|
39 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
40 |
selectedModel = modelName if modelName is not None else "base"
|
|
|
60 |
|
61 |
return result["text"], segmentStream.read()
|
62 |
|
63 |
+
|
64 |
ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
|
65 |
ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
|
66 |
ui_description += " as well as speech translation and language identification. "
|
|
|
68 |
if INPUT_AUDIO_MAX_DURATION > 0:
|
69 |
ui_description += "\n\n" + "Max audio file length: " + str(INPUT_AUDIO_MAX_DURATION) + " s"
|
70 |
|
71 |
+
demo = gr.Interface(fn=transcribeFile, description=ui_description, inputs=[
|
72 |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
|
73 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
74 |
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
|