aadnk commited on
Commit
93c4867
1 Parent(s): 68a16d3

Cleanup code

Browse files
Files changed (1) hide show
  1. app.py +21 -102
app.py CHANGED
@@ -12,111 +12,29 @@ import ffmpeg
12
  # Limitations (set to -1 to disable)
13
  INPUT_AUDIO_MAX_DURATION = 120 # seconds
14
 
15
- LANGUAGES = [
16
- "English",
17
- "Chinese",
18
- "German",
19
- "Spanish",
20
- "Russian",
21
- "Korean",
22
- "French",
23
- "Japanese",
24
- "Portuguese",
25
- "Turkish",
26
- "Polish",
27
- "Catalan",
28
- "Dutch",
29
- "Arabic",
30
- "Swedish",
31
- "Italian",
32
- "Indonesian",
33
- "Hindi",
34
- "Finnish",
35
- "Vietnamese",
36
- "Hebrew",
37
- "Ukrainian",
38
- "Greek",
39
- "Malay",
40
- "Czech",
41
- "Romanian",
42
- "Danish",
43
- "Hungarian",
44
- "Tamil",
45
- "Norwegian",
46
- "Thai",
47
- "Urdu",
48
- "Croatian",
49
- "Bulgarian",
50
- "Lithuanian",
51
- "Latin",
52
- "Maori",
53
- "Malayalam",
54
- "Welsh",
55
- "Slovak",
56
- "Telugu",
57
- "Persian",
58
- "Latvian",
59
- "Bengali",
60
- "Serbian",
61
- "Azerbaijani",
62
- "Slovenian",
63
- "Kannada",
64
- "Estonian",
65
- "Macedonian",
66
- "Breton",
67
- "Basque",
68
- "Icelandic",
69
- "Armenian",
70
- "Nepali",
71
- "Mongolian",
72
- "Bosnian",
73
- "Kazakh",
74
- "Albanian",
75
- "Swahili",
76
- "Galician",
77
- "Marathi",
78
- "Punjabi",
79
- "Sinhala",
80
- "Khmer",
81
- "Shona",
82
- "Yoruba",
83
- "Somali",
84
- "Afrikaans",
85
- "Occitan",
86
- "Georgian",
87
- "Belarusian",
88
- "Tajik",
89
- "Sindhi",
90
- "Gujarati",
91
- "Amharic",
92
- "Yiddish",
93
- "Lao",
94
- "Uzbek",
95
- "Faroese",
96
- "Haitian Creole",
97
- "Pashto",
98
- "Turkmen",
99
- "Nynorsk",
100
- "Maltese",
101
- "Sanskrit",
102
- "Luxembourgish",
103
- "Myanmar",
104
- "Tibetan",
105
- "Tagalog",
106
- "Malagasy",
107
- "Assamese",
108
- "Tatar",
109
- "Hawaiian",
110
- "Lingala",
111
- "Hausa",
112
- "Bashkir",
113
- "Javanese",
114
- "Sundanese"
115
  ]
116
 
117
  model_cache = dict()
118
 
119
- def greet(modelName, languageName, uploadFile, microphoneData, task):
120
  source = uploadFile if uploadFile is not None else microphoneData
121
  selectedLanguage = languageName.lower() if len(languageName) > 0 else None
122
  selectedModel = modelName if modelName is not None else "base"
@@ -142,6 +60,7 @@ def greet(modelName, languageName, uploadFile, microphoneData, task):
142
 
143
  return result["text"], segmentStream.read()
144
 
 
145
  ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
146
  ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
147
  ui_description += " as well as speech translation and language identification. "
@@ -149,7 +68,7 @@ ui_description += " as well as speech translation and language identification. "
149
  if INPUT_AUDIO_MAX_DURATION > 0:
150
  ui_description += "\n\n" + "Max audio file length: " + str(INPUT_AUDIO_MAX_DURATION) + " s"
151
 
152
- demo = gr.Interface(fn=greet, description=ui_description, inputs=[
153
  gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
154
  gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
155
  gr.Audio(source="upload", type="filepath", label="Upload Audio"),
 
12
  # Limitations (set to -1 to disable)
13
  INPUT_AUDIO_MAX_DURATION = 120 # seconds
14
 
15
+ LANGUAGES = [
16
+ "English", "Chinese", "German", "Spanish", "Russian", "Korean",
17
+ "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
18
+ "Dutch", "Arabic", "Swedish", "Italian", "Indonesian", "Hindi",
19
+ "Finnish", "Vietnamese", "Hebrew", "Ukrainian", "Greek", "Malay",
20
+ "Czech", "Romanian", "Danish", "Hungarian", "Tamil", "Norwegian",
21
+ "Thai", "Urdu", "Croatian", "Bulgarian", "Lithuanian", "Latin",
22
+ "Maori", "Malayalam", "Welsh", "Slovak", "Telugu", "Persian",
23
+ "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
24
+ "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
25
+ "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
26
+ "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer",
27
+ "Shona", "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian",
28
+ "Belarusian", "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish",
29
+ "Lao", "Uzbek", "Faroese", "Haitian Creole", "Pashto", "Turkmen",
30
+ "Nynorsk", "Maltese", "Sanskrit", "Luxembourgish", "Myanmar", "Tibetan",
31
+ "Tagalog", "Malagasy", "Assamese", "Tatar", "Hawaiian", "Lingala",
32
+ "Hausa", "Bashkir", "Javanese", "Sundanese"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ]
34
 
35
  model_cache = dict()
36
 
37
+ def transcribeFile(modelName, languageName, uploadFile, microphoneData, task):
38
  source = uploadFile if uploadFile is not None else microphoneData
39
  selectedLanguage = languageName.lower() if len(languageName) > 0 else None
40
  selectedModel = modelName if modelName is not None else "base"
 
60
 
61
  return result["text"], segmentStream.read()
62
 
63
+
64
  ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
65
  ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
66
  ui_description += " as well as speech translation and language identification. "
 
68
  if INPUT_AUDIO_MAX_DURATION > 0:
69
  ui_description += "\n\n" + "Max audio file length: " + str(INPUT_AUDIO_MAX_DURATION) + " s"
70
 
71
+ demo = gr.Interface(fn=transcribeFile, description=ui_description, inputs=[
72
  gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
73
  gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
74
  gr.Audio(source="upload", type="filepath", label="Upload Audio"),