Spaces:
Running
Running
import streamlit as st | |
from transformers import pipeline | |
import librosa | |
import soundfile as sf | |
import numpy as np | |
import io | |
# Load the ASR pipeline with the specified model | |
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu") | |
def load_audio(audio_file): | |
"""Load an audio file and convert to the correct format.""" | |
audio_bytes = audio_file.read() | |
audio = io.BytesIO(audio_bytes) | |
# Use librosa to load the audio file | |
audio_np, sr = librosa.load(audio, sr=16000) | |
return audio_np, sr | |
def transcribe_audio(audio_np): | |
"""Transcribe the given audio numpy array using the model pipeline.""" | |
# Convert the audio numpy array to a format acceptable by the pipeline | |
audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav') | |
# Transcribe audio | |
transcription = pipe(audio) | |
return transcription['text'] | |
# Streamlit UI | |
st.title("Urdu Speech-to-Text Transcription App") | |
st.write("Upload an audio file to transcribe its content into Urdu text.") | |
uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"]) | |
if uploaded_file is not None: | |
try: | |
# Load and process the audio file | |
audio_np, sr = load_audio(uploaded_file) | |
# Transcribe the audio | |
text = transcribe_audio(audio_np) | |
# Display the transcription result | |
st.subheader("Transcription Result:") | |
st.write(text) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |