Spaces:

Eldermind
/

Ai-Transcript

Runtime error

File size: 1,896 Bytes

63e4766
95da6fb
 
 
63e4766
95da6fb
 
63e4766
d20136b
 
 
1db7f41
95da6fb
d20136b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95da6fb
4659c06
de5f056
63e4766
d20136b
06b7f96
f9f786c
 
 
 
 
 
aeba14a
f9f786c
 
 
 
 
63e4766
 
06b7f96
f9f786c
 
 
95da6fb
f9f786c

import gradio as gr
from transformers import pipeline
import librosa
import traceback

# Initialize the ASR pipeline
asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")

def transcribe_long_form(file_info):
    if not file_info:
        return "No audio file provided."

    try:
        # Unpack the audio array and sample rate from the tuple
        audio_array, sample_rate = file_info

        # Check if the audio array is multi-channel (stereo) and convert it to mono
        if audio_array.ndim > 1:
            audio_mono = librosa.to_mono(audio_array)
        else:
            audio_mono = audio_array

        # Resample the audio to 16 kHz if the current sample rate is different
        if sample_rate != 16000:
            audio_mono = librosa.resample(audio_mono, orig_sr=sample_rate, target_sr=16000)

        # Transcribe the audio using the ASR pipeline
        result = asr_pipeline(audio_mono, sampling_rate=16000)
        # Access the first result's 'text' field
        return result[0]['text']
    except Exception as e:
        traceback.print_exc()
        return f"An error occurred: {str(e)}"


# Define Gradio interfaces for microphone and file upload
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)

file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)

# Setup the main Gradio app with tabbed interfaces for different input sources
with gr.Blocks() as demo:
    gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])

# Launch the Gradio app
demo.launch(share=True)