File size: 908 Bytes
4588bab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
import sys
import wave
import json
from vosk import Model, KaldiRecognizer

def load_model(model_dir):
    model = Model(model_dir)
    return model

def transcribe_audio(model, audio_file):
    with wave.open(audio_file, "rb") as wf:
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            raise ValueError("Audio file must be WAV format mono PCM.")
        
        rec = KaldiRecognizer(model, wf.getframerate())
        rec.AcceptWaveform(wf.readframes(wf.getnframes()))
        res = rec.FinalResult()
        result = json.loads(res)["text"]
        return result

if __name__ == "__main__":
    model_dir = sys.argv[1]  # Replace with your model path
    audio_file = sys.argv[2]  # Replace with your audio file path

    model = load_model(model_dir)
    transcript = transcribe_audio(model, audio_file)
    print(f"Transcript: {transcript}")