aoxo
/

wav2vec2-base-mal

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

aoxo commited on Dec 1, 2024

Commit

ae42d95

·

verified ·

1 Parent(s): 57cf122

Create inference.py

Files changed (1) hide show

inference.py +31 -0

inference.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+import librosa
+from transformers import AutoModelForCTC, Wav2Vec2Processor
+# Load the model and processor
+model = AutoModelForCTC.from_pretrained("aoxo/wav2vec2-base-mal")
+processor = Wav2Vec2Processor.from_pretrained("aoxo/wav2vec2-base-mal")
+# Function to transcribe audio
+def transcribe_audio(audio_path):
+    # Load the audio file
+    # Resample to 16kHz if needed
+    waveform, _ = librosa.load(audio_path, sr=16000)
+    # Process the audio
+    inputs = processor(waveform, sampling_rate=16000, return_tensors="pt")
+    # Perform inference
+    with torch.no_grad():
+        logits = model(inputs.input_values).logits
+    # Decode the prediction
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)[0]
+    return transcription
+# Example usage
+audio_path = "path/to/your/audio/file.wav"
+transcription = transcribe_audio(audio_path)
+print("Transcription:", transcription)