Spaces:

botario
/

wave2vec_luxembourgish

Sleeping

File size: 2,509 Bytes

16ff511
6a91da6
bae72eb
6a91da6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85eedc6
6a91da6
85eedc6
6a91da6
 
 
 
85eedc6
 
 
 
 
 
 
 
 
 
 
 
 
 
6a91da6
 
85eedc6
 
 
6a91da6
 
 
85eedc6
6a91da6

import spaces
import time
import os

import torch
import gradio as gr
from transformers.pipelines import pipeline
import utils

from config import (
    MODEL_PATHS,
    SUPPORTED_LANGUAGES,
    CUSTOM_CSS,
)

# set language here: available are en, de and lb
LANGUAGE = "lb"
if LANGUAGE not in SUPPORTED_LANGUAGES:
    print(f"language ({LANGUAGE}) not supported. Use one of {SUPPORTED_LANGUAGES}")
    exit()
else:
    MODEL_PATH = MODEL_PATHS[LANGUAGE]

_asr_pipeline = None

@spaces.GPU
def transcribe_gradio(audio_path: str | None) -> str:
    if not audio_path:
        return "⚠️  Please record something or choose a file first."

    global _asr_pipeline

    if _asr_pipeline is None:

        _asr_pipeline = pipeline(
            "automatic-speech-recognition",
            model=MODEL_PATH,
            device=0 if torch.cuda.is_available() else -1,
            chunk_length_s=30,
            stride_length_s=(4, 2),
            batch_size=8,
            token=os.getenv("HF_TOKEN"),
        )

    start = time.time()
    try:
        result = _asr_pipeline(audio_path)
        transcript = result["text"] if isinstance(result, dict) else str(result)
    except Exception as err:
        return f"❌ {err}"
    runtime = time.time() - start

    return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s"

# gradio interface
with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
    gr.Markdown("""
    # 🎙️ Speech-to-Text Demo — Wave2Vec (Luxembourgish) 
    Use **Record** to capture speech live or **Upload** to select an audio file (.wav, .mp3, .flac).  
    Hit **Transcribe** to convert your recording into text, and **Clear** to reset both fields.
    """)

    with gr.Row():
        audio_input = gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="Input audio",
            autoplay=False,
        )
        output_text = gr.Textbox(
            label="Transcript",
            placeholder="Your transcript will appear here …",
            show_copy_button=True,
            lines=10,
        )

    with gr.Row(equal_height=True, elem_classes="centered-row") as row:
        transcribe_btn = gr.Button("Transcribe ✨", scale=0)
        clear_btn = gr.ClearButton(
            [audio_input, output_text], scale=0, elem_classes="clear-btn"
        )

    transcribe_btn.click(transcribe_gradio, inputs=audio_input, outputs=output_text)


if __name__ == "__main__":
    demo.launch()