Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisperx | |
| # ----------------------------- | |
| # Device and compute settings | |
| # ----------------------------- | |
| device = "cpu" # Free-tier Spaces only have CPU | |
| compute_type = "int8" # float16 only works on GPU | |
| # ----------------------------- | |
| # Load WhisperX model | |
| # ----------------------------- | |
| model_name = "inesc-id/WhisperLv3-EP-X" # Portuguese fine-tuned Whisper model | |
| model = whisperx.load_model( | |
| model_name, | |
| device=device, | |
| compute_type=compute_type, | |
| language="pt", | |
| task="transcribe" | |
| ) | |
| # ----------------------------- | |
| # Transcription function | |
| # ----------------------------- | |
| def transcribe(audio_file): | |
| # Load audio and resample to 16 kHz | |
| audio = whisperx.load_audio(audio_file, sr=16000) | |
| # Transcribe | |
| outputs = model.transcribe(audio, batch_size=4, language="pt", task="transcribe") | |
| # Concatenate segments | |
| if outputs['segments']: | |
| text = " ".join(segment['text'] for segment in outputs['segments']) | |
| else: | |
| text = "" | |
| return text | |
| # ----------------------------- | |
| # Gradio interface | |
| # ----------------------------- | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), | |
| outputs="text", | |
| title="CAMÕES European Portuguese Automatic Speech Recognition Demo", | |
| description=""" | |
| This is a demo for **CAMÕES**, a Whisper Model fine-tuned on around 420h of European Portuguese by the HLT lab at INESC-ID. | |
| The model being used here is "WhisperLv3-X". For more details about CAMÕES check out the [paper here](https://arxiv.org/abs/2508.19721). | |
| """) | |
| demo.launch() | |