msibertman's picture
Update app.py
ded6c13
from transformers import pipeline
import gradio as gr
import base64
pipe = pipeline("automatic-speech-recognition",
model="openai/whisper-base.en",
chunk_length_s=30, device="cpu")
def get_transcription(audio, encoded_audio):
print(audio, encoded_audio)
if encoded_audio != "":
decoded_file_data = base64.b64decode(encoded_audio)
with open("temp_audio.wav", "wb") as f:
f.write(decoded_file_data)
audio = "temp_audio.wav"
else:
audio = audio[1]
print(audio)
response = pipe(audio)["text"]
return response
audio = gr.Audio(source="microphone", type="filepath", label="Audio")
transciption = gr.Textbox(label="Transcription")
iface = gr.Interface(fn=get_transcription,
inputs=["audio", "text"],
outputs=transciption.style(show_copy_button=True),
title="Speech Transcription")
iface.launch()