File size: 908 Bytes
4588bab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import os
import sys
import wave
import json
from vosk import Model, KaldiRecognizer
def load_model(model_dir):
model = Model(model_dir)
return model
def transcribe_audio(model, audio_file):
with wave.open(audio_file, "rb") as wf:
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
raise ValueError("Audio file must be WAV format mono PCM.")
rec = KaldiRecognizer(model, wf.getframerate())
rec.AcceptWaveform(wf.readframes(wf.getnframes()))
res = rec.FinalResult()
result = json.loads(res)["text"]
return result
if __name__ == "__main__":
model_dir = sys.argv[1] # Replace with your model path
audio_file = sys.argv[2] # Replace with your audio file path
model = load_model(model_dir)
transcript = transcribe_audio(model, audio_file)
print(f"Transcript: {transcript}")
|