import gradio as gr from transformers import pipeline import torch import spaces # Initialize model on CPU model = pipeline( "automatic-speech-recognition", model="Aekanun/whisper-small-hi", device="cpu" ) @spaces.GPU def transcribe_speech(audio): """Speech transcription with GPU support""" try: if audio is None: return "กรุณาบันทึกเสียงก่อน" # Move model to GPU model.model = model.model.to("cuda") # Make sure input is on the same device as model with torch.cuda.amp.autocast(): # Process audio result = model(audio, batch_size=1) # Get text result text = result["text"] if isinstance(result, dict) else result # Move model back to CPU model.model = model.model.to("cpu") torch.cuda.empty_cache() return text except Exception as e: # Make sure model is back on CPU in case of error model.model = model.model.to("cpu") torch.cuda.empty_cache() return f"เกิดข้อผิดพลาด: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(type="filepath"), # Simplified Audio component outputs=gr.Textbox(label="ข้อความ"), title="Thai Speech Transcription", description="บันทึกเสียงเพื่อแปลงเป็นข้อความภาษาไทย", ) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0")