linto-asr-ar-tn-0.1 / inference.py
hedhoud12's picture
upload the model, WebRTC app and the inference script
4588bab
raw
history blame
908 Bytes
import os
import sys
import wave
import json
from vosk import Model, KaldiRecognizer
def load_model(model_dir):
model = Model(model_dir)
return model
def transcribe_audio(model, audio_file):
with wave.open(audio_file, "rb") as wf:
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
raise ValueError("Audio file must be WAV format mono PCM.")
rec = KaldiRecognizer(model, wf.getframerate())
rec.AcceptWaveform(wf.readframes(wf.getnframes()))
res = rec.FinalResult()
result = json.loads(res)["text"]
return result
if __name__ == "__main__":
model_dir = sys.argv[1] # Replace with your model path
audio_file = sys.argv[2] # Replace with your audio file path
model = load_model(model_dir)
transcript = transcribe_audio(model, audio_file)
print(f"Transcript: {transcript}")