Ai-Transcript / app55555.py
Eldermind's picture
Rename app.py to app55555.py
6bfad91 verified
import gradio as gr
from transformers import pipeline
import librosa
import traceback
# Initialize the ASR pipeline
asr_pipeline = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
def transcribe_long_form(file_info):
if not file_info:
return "No audio file provided."
try:
# Unpack the audio array and sample rate from the tuple
audio_array, sample_rate = file_info
# Check if the audio array is multi-channel (stereo) and convert it to mono
if audio_array.ndim > 1:
audio_mono = librosa.to_mono(audio_array)
else:
audio_mono = audio_array
# Resample the audio to 16 kHz if the current sample rate is different
if sample_rate != 16000:
audio_mono = librosa.resample(audio_mono, orig_sr=sample_rate, target_sr=16000)
# Transcribe the audio using the ASR pipeline
result = asr_pipeline(audio_mono, sampling_rate=16000)
# Access the first result's 'text' field
return result[0]['text']
except Exception as e:
traceback.print_exc()
return f"An error occurred: {str(e)}"
# Define Gradio interfaces for microphone and file upload
mic_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="microphone", type="filepath"),
outputs=gr.Textbox(label="Transcription", lines=3),
allow_flagging="never"
)
file_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs=gr.Textbox(label="Transcription", lines=3),
allow_flagging="never"
)
# Setup the main Gradio app with tabbed interfaces for different input sources
with gr.Blocks() as demo:
gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
# Launch the Gradio app
demo.launch(share=True)