Spaces:

Abbas0786
/

speech2text

Running

File size: 1,537 Bytes

7e54b28
 
ab0b8b5
 
6d4cee8
7e54b28
 
ab0b8b5
 
7e54b28
ab0b8b5
 
 
 
6d4cee8
ab0b8b5
 
6d4cee8
ab0b8b5
6d4cee8
ab0b8b5
 
 
 
7e54b28
 
ab0b8b5
 
7e54b28
 
 
ab0b8b5
 
7e54b28
ab0b8b5
7e54b28
 
6d4cee8
ab0b8b5
 
 
 
 
 
 
 
 
6d4cee8

import streamlit as st
from transformers import pipeline
import librosa
import soundfile as sf
import numpy as np
import io

# Load the ASR pipeline with the specified model
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")

def load_audio(audio_file):
    """Load an audio file and convert to the correct format."""
    audio_bytes = audio_file.read()
    audio = io.BytesIO(audio_bytes)
    
    # Use librosa to load the audio file
    audio_np, sr = librosa.load(audio, sr=16000)
    
    return audio_np, sr

def transcribe_audio(audio_np):
    """Transcribe the given audio numpy array using the model pipeline."""
    # Convert the audio numpy array to a format acceptable by the pipeline
    audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav')
    
    # Transcribe audio
    transcription = pipe(audio)
    
    return transcription['text']

# Streamlit UI
st.title("Urdu Speech-to-Text Transcription App")
st.write("Upload an audio file to transcribe its content into Urdu text.")

uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"])

if uploaded_file is not None:
    try:
        # Load and process the audio file
        audio_np, sr = load_audio(uploaded_file)
        
        # Transcribe the audio
        text = transcribe_audio(audio_np)
        
        # Display the transcription result
        st.subheader("Transcription Result:")
        st.write(text)
    except Exception as e:
        st.error(f"An error occurred: {e}")