|
import torch |
|
import torchaudio |
|
from transformers import pipeline |
|
import streamlit as st |
|
|
|
|
|
model = pipeline('voice-recognition', model='facebook/wav2vec2-base-960h') |
|
|
|
def recognize_voice(audio_file): |
|
waveform, sample_rate = torchaudio.load(audio_file) |
|
transcript = model(waveform.numpy()[0], sample_rate=sample_rate) |
|
return transcript[0]['sentence'] |
|
|
|
def main(): |
|
st.title("Voice Recognition App") |
|
|
|
|
|
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"]) |
|
|
|
if audio_file is not None: |
|
st.audio(audio_file, format='audio/wav') |
|
|
|
if st.button("Recognize Voice"): |
|
try: |
|
|
|
result = recognize_voice(audio_file) |
|
st.success(f"Recognition Result: {result}") |
|
except Exception as e: |
|
st.error(f"Error: {e}") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|