unijoh commited on
Commit
2244bbb
1 Parent(s): df1a415

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -58
app.py CHANGED
@@ -1,64 +1,41 @@
1
  import gradio as gr
2
- import librosa
3
  from asr import transcribe
4
- from tts import synthesize
5
-
6
- def identify(microphone, file_upload):
7
- LID_SAMPLING_RATE = 16_000
8
-
9
- if (microphone is not None) and (file_upload is not None):
10
- return "WARNING: Using microphone input. Uploaded file will be ignored."
11
-
12
- if (microphone is None) and (file_upload is None):
13
- return "ERROR: Provide an audio file or use the microphone."
14
-
15
- audio_fp = microphone if microphone is not None else file_upload
16
- inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]
17
-
18
- return {"Faroese": 1.0}
19
-
20
- demo = gr.Blocks()
21
-
22
- mms_transcribe = gr.Interface(
23
- fn=transcribe,
24
- inputs=[
25
- gr.Audio(source="microphone", type="filepath"),
26
- gr.Audio(source="upload", type="filepath"),
27
- ],
28
- outputs="text",
29
- title="Speech-to-text",
30
- description="Transcribe audio!",
31
- allow_flagging="never",
32
- )
33
-
34
- mms_synthesize = gr.Interface(
35
- fn=synthesize,
36
- inputs=[
37
- gr.Text(label="Input text"),
38
- gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
39
- ],
40
- outputs=gr.Audio(label="Generated Audio", type="numpy"),
41
- title="Text-to-speech",
42
- description="Generate audio!",
43
- allow_flagging="never",
44
- )
45
 
46
- mms_identify = gr.Interface(
47
- fn=identify,
48
- inputs=[
49
- gr.Audio(source="microphone", type="filepath"),
50
- gr.Audio(source="upload", type="filepath"),
51
- ],
52
- outputs=gr.Label(num_top_classes=1),
53
- title="Language Identification",
54
- description="Identify the language of audio!",
55
- allow_flagging="never",
56
- )
57
 
58
- with demo:
59
- gr.TabbedInterface(
60
- [mms_synthesize, mms_transcribe, mms_identify],
61
- ["Text-to-speech", "Speech-to-text", "Language Identification"],
 
 
 
 
 
62
  )
63
 
64
- demo.launch()
 
 
 
 
 
1
  import gradio as gr
 
2
  from asr import transcribe
3
+ from tts import synthesize_speech
4
+ from lid import identify_language
5
+
6
+ def main():
7
+ asr_interface = gr.Interface(
8
+ fn=transcribe,
9
+ inputs=[
10
+ gr.Audio(source="microphone", type="filepath", label="Microphone"),
11
+ gr.Audio(source="upload", type="filepath", label="Upload")
12
+ ],
13
+ outputs="text",
14
+ title="Faroese ASR Demo",
15
+ description="Automatic Speech Recognition for Faroese"
16
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ tts_interface = gr.Interface(
19
+ fn=synthesize_speech,
20
+ inputs="text",
21
+ outputs="audio",
22
+ title="Faroese TTS Demo",
23
+ description="Text-to-Speech Synthesis for Faroese"
24
+ )
 
 
 
 
25
 
26
+ lid_interface = gr.Interface(
27
+ fn=identify_language,
28
+ inputs=[
29
+ gr.Audio(source="microphone", type="filepath", label="Microphone"),
30
+ gr.Audio(source="upload", type="filepath", label="Upload")
31
+ ],
32
+ outputs="label",
33
+ title="Language Identification",
34
+ description="Identify the language of the spoken input"
35
  )
36
 
37
+ demo = gr.TabbedInterface([asr_interface, tts_interface, lid_interface], ["ASR", "TTS", "LID"])
38
+ demo.launch()
39
+
40
+ if __name__ == "__main__":
41
+ main()