import gradio as gr from transformers import pipeline import librosa import numpy as np import traceback # Initialize the ASR pipeline asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") # Define the function to transcribe the speech def transcribe_long_form(file_info): try: # Read the audio file audio, sample_rate = file_info # Convert to mono if necessary if audio.ndim > 1: audio = librosa.to_mono(audio) # Resample to 16 kHz if necessary if sample_rate != 16000: audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000) # Transcribe the audio result = asr_pipeline(audio) return result['text'] except Exception as e: # Print the full traceback to the console print(traceback.format_exc()) # Return the error message for the Gradio interface return f"An error occurred: {str(e)}" # Define Gradio interface iface = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(label="Record or Upload Audio"), outputs="text", title="Transcribe Audio" ) # Launch the Gradio app iface.launch()