Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import librosa | |
import traceback | |
# Initialize the ASR pipeline | |
asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") | |
def transcribe_long_form(file_info): | |
if not file_info: | |
return "No audio file provided." | |
try: | |
# Unpack the audio array and sample rate from the tuple | |
audio_array, sample_rate = file_info | |
# Check if the audio array is multi-channel (stereo) and convert it to mono | |
if audio_array.ndim > 1: | |
audio_mono = librosa.to_mono(audio_array) | |
else: | |
audio_mono = audio_array | |
# Resample the audio to 16 kHz if the current sample rate is different | |
if sample_rate != 16000: | |
audio_mono = librosa.resample(audio_mono, orig_sr=sample_rate, target_sr=16000) | |
# Transcribe the audio using the ASR pipeline | |
result = asr_pipeline(audio_mono, sampling_rate=16000) | |
# Access the first result's 'text' field | |
return result[0]['text'] | |
except Exception as e: | |
traceback.print_exc() | |
return f"An error occurred: {str(e)}" | |
# Define Gradio interfaces for microphone and file upload | |
mic_transcribe = gr.Interface( | |
fn=transcribe_speech, | |
inputs=gr.Audio(sources="microphone", type="filepath"), | |
outputs=gr.Textbox(label="Transcription", lines=3), | |
allow_flagging="never" | |
) | |
file_transcribe = gr.Interface( | |
fn=transcribe_speech, | |
inputs=gr.Audio(sources="upload", type="filepath"), | |
outputs=gr.Textbox(label="Transcription", lines=3), | |
allow_flagging="never" | |
) | |
# Setup the main Gradio app with tabbed interfaces for different input sources | |
with gr.Blocks() as demo: | |
gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"]) | |
# Launch the Gradio app | |
demo.launch(share=True) | |