File size: 2,354 Bytes
6a62ffb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
"""
Speech-to-text module based on Vosk for SillyTavern Extras
- Vosk website: https://alphacephei.com/vosk/
- Vosk api: https://github.com/alphacep/vosk-api
Authors:
- Tony Ribeiro (https://github.com/Tony-sama)
Models are saved into user cache folder, example: C:/Users/toto/.cache/vosk
References:
- Code adapted from: https://github.com/alphacep/vosk-api/blob/master/python/example/test_simple.py
"""
from flask import jsonify, abort, request
import wave
from vosk import Model, KaldiRecognizer, SetLogLevel
import soundfile
DEBUG_PREFIX = "<stt vosk module>"
RECORDING_FILE_PATH = "stt_test.wav"
model = None
SetLogLevel(-1)
def load_model(file_path=None):
"""
Load given vosk model from file or default to en-us model.
Download model to user cache folder, example: C:/Users/toto/.cache/vosk
"""
if file_path is None:
return Model(lang="en-us")
else:
return Model(file_path)
def process_audio():
"""
Transcript request audio file to text using Whisper
"""
if model is None:
print(DEBUG_PREFIX,"Vosk model not initialized yet.")
return ""
try:
file = request.files.get('AudioFile')
file.save(RECORDING_FILE_PATH)
# Read and rewrite the file with soundfile
data, samplerate = soundfile.read(RECORDING_FILE_PATH)
soundfile.write(RECORDING_FILE_PATH, data, samplerate)
wf = wave.open(RECORDING_FILE_PATH, "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print("Audio file must be WAV format mono PCM.")
abort(500, DEBUG_PREFIX+" Audio file must be WAV format mono PCM.")
rec = KaldiRecognizer(model, wf.getframerate())
#rec.SetWords(True)
#rec.SetPartialWords(True)
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
break
transcript = rec.Result()[14:-3]
print(DEBUG_PREFIX, "Transcripted from request audio file:", transcript)
return jsonify({"transcript": transcript})
except Exception as e: # No exception observed during test but we never know
print(e)
abort(500, DEBUG_PREFIX+" Exception occurs while processing audio") |