Spaces:

unijoh
/

metaambod

Running

unijoh commited on Jun 14, 2024

Commit

9d60183

verified ·

1 Parent(s): 0381659

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,35 +1,35 @@
 import gradio as gr
-import torch
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-import librosa
-# Load pre-trained model and processor
-model_name = "facebook/wav2vec2-base-960h"
-processor = Wav2Vec2Processor.from_pretrained(model_name)
-model = Wav2Vec2ForCTC.from_pretrained(model_name)
-def transcribe(audio):
-    # Load audio
-    audio_input, _ = librosa.load(audio, sr=16000)
-    # Tokenize and process
-    inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
-    with torch.no_grad():
-        logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
-    # Get predicted ids
-    predicted_ids = torch.argmax(logits, dim=-1)
-    # Decode the ids to text
-    transcription = processor.batch_decode(predicted_ids)
-    return transcription[0]
-# Define the Gradio interface
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(source="microphone", type="filepath"),
-    outputs="text"
-)
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+from asr import transcribe
+from tts import synthesize_speech
+from lid import identify_language
+def main():
+    asr_interface = gr.Interface(
+        fn=transcribe,
+        inputs=gr.Audio(type="filepath"),
+        outputs="text",
+        title="Faroese ASR Demo",
+        description="Automatic Speech Recognition for Faroese"
+    )
+    tts_interface = gr.Interface(
+        fn=synthesize_speech,
+        inputs="text",
+        outputs="audio",
+        title="Faroese TTS Demo",
+        description="Text-to-Speech Synthesis for Faroese"
+    )
+    lid_interface = gr.Interface(
+        fn=identify_language,
+        inputs=gr.Audio(type="filepath"),
+        outputs="label",
+        title="Language Identification",
+        description="Identify the language of the spoken input"
+    )
+    demo = gr.TabbedInterface([asr_interface, tts_interface, lid_interface], ["ASR", "TTS", "LID"])
+    demo.launch()
 if __name__ == "__main__":
+    main()