File size: 1,207 Bytes
c914bf1
c41b2a2
cce4a3b
be95868
c41b2a2
c914bf1
bbbea86
 
5b3df6c
38bc3e8
 
 
 
c41b2a2
38bc3e8
c41b2a2
 
38bc3e8
 
 
 
 
 
 
c41b2a2
 
 
 
38bc3e8
 
 
c914bf1
c41b2a2
 
38bc3e8
c914bf1
 
38bc3e8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from transformers import pipeline
import librosa
import numpy as np
import traceback

# Initialize the ASR pipeline
asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")

# Define the function to transcribe the speech
def transcribe_long_form(file_info):
    try:
        # Read the audio file
        audio, sample_rate = file_info
        # Convert to mono if necessary
        if audio.ndim > 1:
            audio = librosa.to_mono(audio)
        # Resample to 16 kHz if necessary
        if sample_rate != 16000:
            audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)
        # Transcribe the audio
        result = asr_pipeline(audio)
        return result['text']
    except Exception as e:
        # Print the full traceback to the console
        print(traceback.format_exc())
        # Return the error message for the Gradio interface
        return f"An error occurred: {str(e)}"

# Define Gradio interface
iface = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(label="Record or Upload Audio"),
    outputs="text",
    title="Transcribe Audio"
)

# Launch the Gradio app
iface.launch()