Spaces:

dlmn
/

SakecAI

Sleeping

File size: 2,034 Bytes

489b056

import streamlit as st
from transformers import pipeline
import torchaudio
import tempfile
import os
import torch

# Create a Streamlit app title
st.title("ASR with Hugging Face Whisper")

# Load the ASR model
asr = pipeline(task = "automatic-speech-recognition", model="openai/whisper-large-v2",
               device=0 if torch.cuda.is_available() else "cpu")

# Create a file uploader widget
uploaded_audio = st.file_uploader("Upload an audio file (wav/mp3)")

# Check if an audio file is uploaded
if uploaded_audio:
    # Read the uploaded audio file
    audio_data, sample_rate = torchaudio.load(uploaded_audio)

    # Perform ASR on the uploaded audio
    with st.spinner("Performing ASR..."):
        transcriptions = asr(audio_data.numpy(), sample_rate=sample_rate)

    # Display the ASR result
    st.subheader("Transcription:")
    for idx, transcription in enumerate(transcriptions):
        st.write(f"Segment {idx + 1}: {transcription['text']}")

# Provide instructions
st.write("Instructions:")
st.write("1. Upload an audio file in WAV or MP3 format.")
st.write("2. Click the 'Perform ASR' button to transcribe the audio.")

# Add a sample audio file for testing (optional)
st.write("Sample Audio for Testing:")
sample_audio = "Wave_files_demos_Welcome.wav"
st.audio(sample_audio, format="audio/wav")

# Define the path to the sample audio file
sample_audio_path = os.path.join(os.getcwd(), sample_audio)

# Add a button to transcribe the sample audio (optional)
if st.button("Transcribe Sample Audio"):
    # Read the sample audio file
    sample_audio_data, sample_audio_rate = torchaudio.load(sample_audio_path)

    # Perform ASR on the sample audio
    with st.spinner("Performing ASR..."):
        sample_transcriptions = asr(sample_audio_data.numpy(), sample_rate=sample_audio_rate)

    # Display the ASR result for the sample audio
    st.subheader("Transcription (Sample Audio):")
    for idx, transcription in enumerate(sample_transcriptions):
        st.write(f"Segment {idx + 1}: {transcription['text']}")