from transformers import pipeline import gradio as gr import base64 pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", chunk_length_s=30, device="cpu") def get_transcription(audio, encoded_audio): print(audio, encoded_audio) if encoded_audio != "": decoded_file_data = base64.b64decode(encoded_audio) with open("temp_audio.wav", "wb") as f: f.write(decoded_file_data) audio = "temp_audio.wav" else: audio = audio[1] print(audio) response = pipe(audio)["text"] return response audio = gr.Audio(source="microphone", type="filepath", label="Audio") transciption = gr.Textbox(label="Transcription") iface = gr.Interface(fn=get_transcription, inputs=["audio", "text"], outputs=transciption.style(show_copy_button=True), title="Speech Transcription") iface.launch()