Spaces:
Build error
Build error
File size: 985 Bytes
ee623f0 213b75b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import streamlit as st
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
# Load the model and tokenizer
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# Define a function to transcribe audio
def transcribe(audio_file):
audio, sample_rate = torchaudio.load(audio_file)
input_values = processor(audio, sampling_rate=sample_rate, return_tensors="pt").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.decode(predicted_ids[0])
return transcription
# Set up the Streamlit app
st.title("Speech Recognition with Wav2Vec2")
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
if audio_file is not None:
st.audio(audio_file, format="audio/wav")
transcript = transcribe(audio_file)
st.write("Transcription: ", transcript)
|