speech2text / app.py
Abbas0786's picture
Update app.py
ab0b8b5 verified
import streamlit as st
from transformers import pipeline
import librosa
import soundfile as sf
import numpy as np
import io
# Load the ASR pipeline with the specified model
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
def load_audio(audio_file):
"""Load an audio file and convert to the correct format."""
audio_bytes = audio_file.read()
audio = io.BytesIO(audio_bytes)
# Use librosa to load the audio file
audio_np, sr = librosa.load(audio, sr=16000)
return audio_np, sr
def transcribe_audio(audio_np):
"""Transcribe the given audio numpy array using the model pipeline."""
# Convert the audio numpy array to a format acceptable by the pipeline
audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav')
# Transcribe audio
transcription = pipe(audio)
return transcription['text']
# Streamlit UI
st.title("Urdu Speech-to-Text Transcription App")
st.write("Upload an audio file to transcribe its content into Urdu text.")
uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"])
if uploaded_file is not None:
try:
# Load and process the audio file
audio_np, sr = load_audio(uploaded_file)
# Transcribe the audio
text = transcribe_audio(audio_np)
# Display the transcription result
st.subheader("Transcription Result:")
st.write(text)
except Exception as e:
st.error(f"An error occurred: {e}")