import gradio as gr from transformers import pipeline import torch # Load the Whisper model pipeline for speech recognition with optimizations model_name = "Vira21/Whisper-Small-Khmer" whisper_pipeline = pipeline( "automatic-speech-recognition", model=model_name, device=0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwise use CPU ) def transcribe_audio(audio): try: # Process and transcribe the audio result = whisper_pipeline(audio)["text"] return result except Exception as e: # Handle errors and return an error message return f"An error occurred during transcription: {str(e)}" # Gradio Interface with optimizations interface = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(type="filepath"), outputs="text", title="Whisper Base Khmer Speech-to-Text", description="Upload an audio file or record your voice to get the transcription in Khmer.", examples=[["Example Audio/126.wav"], ["Example Audio/232.wav"], ["Example Audio/tomholland28282.wav"]], allow_flagging="never" # Disables flagging to save resources ) # Launch the app with queue enabled for better handling on free CPU if __name__ == "__main__": interface.queue() # Enable asynchronous queuing for better performance interface.launch()