import os import sys import wave import json from vosk import Model, KaldiRecognizer def load_model(model_dir): model = Model(model_dir) return model def transcribe_audio(model, audio_file): with wave.open(audio_file, "rb") as wf: if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE": raise ValueError("Audio file must be WAV format mono PCM.") rec = KaldiRecognizer(model, wf.getframerate()) rec.AcceptWaveform(wf.readframes(wf.getnframes())) res = rec.FinalResult() result = json.loads(res)["text"] return result if __name__ == "__main__": model_dir = sys.argv[1] # Replace with your model path audio_file = sys.argv[2] # Replace with your audio file path model = load_model(model_dir) transcript = transcribe_audio(model, audio_file) print(f"Transcript: {transcript}")