unijoh commited on
Commit
9d60183
1 Parent(s): 0381659

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -29
app.py CHANGED
@@ -1,35 +1,35 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
- import librosa
5
 
6
- # Load pre-trained model and processor
7
- model_name = "facebook/wav2vec2-base-960h"
8
- processor = Wav2Vec2Processor.from_pretrained(model_name)
9
- model = Wav2Vec2ForCTC.from_pretrained(model_name)
 
 
 
 
10
 
11
- def transcribe(audio):
12
- # Load audio
13
- audio_input, _ = librosa.load(audio, sr=16000)
14
-
15
- # Tokenize and process
16
- inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
17
- with torch.no_grad():
18
- logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
19
-
20
- # Get predicted ids
21
- predicted_ids = torch.argmax(logits, dim=-1)
22
-
23
- # Decode the ids to text
24
- transcription = processor.batch_decode(predicted_ids)
25
- return transcription[0]
26
 
27
- # Define the Gradio interface
28
- iface = gr.Interface(
29
- fn=transcribe,
30
- inputs=gr.Audio(source="microphone", type="filepath"),
31
- outputs="text"
32
- )
 
 
 
 
33
 
34
  if __name__ == "__main__":
35
- iface.launch()
 
1
  import gradio as gr
2
+ from asr import transcribe
3
+ from tts import synthesize_speech
4
+ from lid import identify_language
5
 
6
+ def main():
7
+ asr_interface = gr.Interface(
8
+ fn=transcribe,
9
+ inputs=gr.Audio(type="filepath"),
10
+ outputs="text",
11
+ title="Faroese ASR Demo",
12
+ description="Automatic Speech Recognition for Faroese"
13
+ )
14
 
15
+ tts_interface = gr.Interface(
16
+ fn=synthesize_speech,
17
+ inputs="text",
18
+ outputs="audio",
19
+ title="Faroese TTS Demo",
20
+ description="Text-to-Speech Synthesis for Faroese"
21
+ )
 
 
 
 
 
 
 
 
22
 
23
+ lid_interface = gr.Interface(
24
+ fn=identify_language,
25
+ inputs=gr.Audio(type="filepath"),
26
+ outputs="label",
27
+ title="Language Identification",
28
+ description="Identify the language of the spoken input"
29
+ )
30
+
31
+ demo = gr.TabbedInterface([asr_interface, tts_interface, lid_interface], ["ASR", "TTS", "LID"])
32
+ demo.launch()
33
 
34
  if __name__ == "__main__":
35
+ main()