#Build a shareable app with Gradio import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline #from datasets import load_dataset device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 from transformers import pipeline asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") import os import gradio as gr demo = gr.Blocks() def transcribe_speech(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = asr(filepath) return output["text"] mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(server_port=int(os.environ['PORT1'])) ''' import soundfile as sf import io audio, sampling_rate = sf.read('output.wav') print(audio.shape) '''