Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import torch | |
# Model setup | |
MODEL_NAME = "Rookiezz/whisper-small-th" | |
device = 0 if torch.cuda.is_available() else "cpu" | |
pipe = pipeline( | |
task="automatic-speech-recognition", | |
model=MODEL_NAME, | |
chunk_length_s=448, | |
device=device, | |
) | |
# Configure forced_decoder_ids for translation | |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids( | |
language='th', | |
task="translate" | |
) | |
# Function to process uploaded audio and return transcription | |
def transcribe(audio_file_path): | |
# Pass the uploaded audio file to the model pipeline | |
result = pipe(audio_file_path) | |
return result["text"] | |
# Gradio interface | |
interface = gr.Interface( | |
fn=transcribe, # Function to process the input | |
inputs=gr.Audio(type="filepath"), # File upload for audio input | |
outputs="text", # Display the transcription result as text | |
title="Thai-to-English Audio Translation", | |
description="Upload an audio file in Thai, and this app will transcribe and translate it to English.", | |
allow_flagging="never", # Disable flagging for simplicity | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
interface.launch(share=True) | |