Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import torch | |
| import soundfile as sf | |
| # Correctly load the Wav2Vec2Processor and model | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53") | |
| def transcribe_audio(audio): | |
| """ | |
| Takes an audio file, processes it using Hugging Face Wav2Vec2 model, | |
| and returns the transcribed text. | |
| """ | |
| # Read the audio file | |
| audio_input, _ = sf.read(audio.name) | |
| # Process audio input using the processor | |
| input_values = processor(audio_input, return_tensors="pt").input_values | |
| # Get model logits (raw prediction) | |
| logits = model(input_values).logits | |
| # Decode the prediction into text | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids) | |
| return transcription[0] | |
| # Create a Gradio interface for users to upload audio files | |
| iface = gr.Interface(fn=transcribe_audio, | |
| inputs=gr.Audio(source="upload", type="file"), | |
| outputs="text", | |
| title="Voice Login System", | |
| description="Upload an audio file for transcription using Wav2Vec2 model.") | |
| iface.launch() | |