Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| import librosa | |
| import soundfile as sf | |
| import numpy as np | |
| import io | |
| # Load the ASR pipeline with the specified model | |
| pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu") | |
| def load_audio(audio_file): | |
| """Load an audio file and convert to the correct format.""" | |
| audio_bytes = audio_file.read() | |
| audio = io.BytesIO(audio_bytes) | |
| # Use librosa to load the audio file | |
| audio_np, sr = librosa.load(audio, sr=16000) | |
| return audio_np, sr | |
| def transcribe_audio(audio_np): | |
| """Transcribe the given audio numpy array using the model pipeline.""" | |
| # Convert the audio numpy array to a format acceptable by the pipeline | |
| audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav') | |
| # Transcribe audio | |
| transcription = pipe(audio) | |
| return transcription['text'] | |
| # Streamlit UI | |
| st.title("Urdu Speech-to-Text Transcription App") | |
| st.write("Upload an audio file to transcribe its content into Urdu text.") | |
| uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"]) | |
| if uploaded_file is not None: | |
| try: | |
| # Load and process the audio file | |
| audio_np, sr = load_audio(uploaded_file) | |
| # Transcribe the audio | |
| text = transcribe_audio(audio_np) | |
| # Display the transcription result | |
| st.subheader("Transcription Result:") | |
| st.write(text) | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |