|
function freeConfig(config) { |
|
if ('buffer' in config) { |
|
_free(config.buffer); |
|
} |
|
|
|
if ('config' in config) { |
|
freeConfig(config.config) |
|
} |
|
|
|
if ('transducer' in config) { |
|
freeConfig(config.transducer) |
|
} |
|
|
|
if ('paraformer' in config) { |
|
freeConfig(config.paraformer) |
|
} |
|
|
|
if ('ctc' in config) { |
|
freeConfig(config.ctc) |
|
} |
|
|
|
if ('feat' in config) { |
|
freeConfig(config.feat) |
|
} |
|
|
|
if ('model' in config) { |
|
freeConfig(config.model) |
|
} |
|
|
|
_free(config.ptr); |
|
} |
|
|
|
|
|
function initSherpaOnnxOnlineTransducerModelConfig(config) { |
|
let encoderLen = lengthBytesUTF8(config.encoder) + 1; |
|
let decoderLen = lengthBytesUTF8(config.decoder) + 1; |
|
let joinerLen = lengthBytesUTF8(config.joiner) + 1; |
|
|
|
let n = encoderLen + decoderLen + joinerLen; |
|
|
|
let buffer = _malloc(n); |
|
|
|
let len = 3 * 4; |
|
let ptr = _malloc(len); |
|
|
|
let offset = 0; |
|
stringToUTF8(config.encoder, buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
stringToUTF8(config.decoder, buffer + offset, decoderLen); |
|
offset += decoderLen; |
|
|
|
stringToUTF8(config.joiner, buffer + offset, joinerLen); |
|
|
|
offset = 0; |
|
setValue(ptr, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
setValue(ptr + 4, buffer + offset, 'i8*'); |
|
offset += decoderLen; |
|
|
|
setValue(ptr + 8, buffer + offset, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOnlineParaformerModelConfig(config) { |
|
let encoderLen = lengthBytesUTF8(config.encoder) + 1; |
|
let decoderLen = lengthBytesUTF8(config.decoder) + 1; |
|
|
|
let n = encoderLen + decoderLen; |
|
let buffer = _malloc(n); |
|
|
|
let len = 2 * 4; |
|
let ptr = _malloc(len); |
|
|
|
let offset = 0; |
|
stringToUTF8(config.encoder, buffer + offset, encoderLen); |
|
offset += encoderLen; |
|
|
|
stringToUTF8(config.decoder, buffer + offset, decoderLen); |
|
|
|
offset = 0; |
|
setValue(ptr, buffer + offset, 'i8*'); |
|
offset += encoderLen; |
|
|
|
setValue(ptr + 4, buffer + offset, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config) { |
|
let n = lengthBytesUTF8(config.model) + 1; |
|
let buffer = _malloc(n); |
|
|
|
let len = 1 * 4; |
|
let ptr = _malloc(len); |
|
|
|
stringToUTF8(config.model, buffer, n); |
|
|
|
setValue(ptr, buffer, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOnlineModelConfig(config) { |
|
let transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer); |
|
let paraformer = initSherpaOnnxOnlineParaformerModelConfig(config.paraformer); |
|
let ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(config.zipformer2Ctc); |
|
|
|
let len = transducer.len + paraformer.len + ctc.len + 5 * 4; |
|
let ptr = _malloc(len); |
|
|
|
let offset = 0; |
|
_CopyHeap(transducer.ptr, transducer.len, ptr + offset); |
|
offset += transducer.len; |
|
|
|
_CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); |
|
offset += paraformer.len; |
|
|
|
_CopyHeap(ctc.ptr, ctc.len, ptr + offset); |
|
offset += ctc.len; |
|
|
|
let tokensLen = lengthBytesUTF8(config.tokens) + 1; |
|
let providerLen = lengthBytesUTF8(config.provider) + 1; |
|
let modelTypeLen = lengthBytesUTF8(config.modelType) + 1; |
|
let bufferLen = tokensLen + providerLen + modelTypeLen; |
|
let buffer = _malloc(bufferLen); |
|
|
|
offset = 0; |
|
stringToUTF8(config.tokens, buffer, tokensLen); |
|
offset += tokensLen; |
|
|
|
stringToUTF8(config.provider, buffer + offset, providerLen); |
|
offset += providerLen; |
|
|
|
stringToUTF8(config.modelType, buffer + offset, modelTypeLen); |
|
|
|
offset = transducer.len + paraformer.len + ctc.len; |
|
setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.numThreads, 'i32'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, buffer + tokensLen, 'i8*'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.debug, 'i32'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, buffer + tokensLen + providerLen, 'i8*'); |
|
offset += 4; |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
|
paraformer: paraformer, ctc: ctc |
|
} |
|
} |
|
|
|
function initSherpaOnnxFeatureConfig(config) { |
|
let len = 2 * 4; |
|
let ptr = _malloc(len); |
|
|
|
setValue(ptr, config.sampleRate, 'i32'); |
|
setValue(ptr + 4, config.featureDim, 'i32'); |
|
return {ptr: ptr, len: len}; |
|
} |
|
|
|
function initSherpaOnnxOnlineRecognizerConfig(config) { |
|
let feat = initSherpaOnnxFeatureConfig(config.featConfig); |
|
let model = initSherpaOnnxOnlineModelConfig(config.modelConfig); |
|
|
|
let len = feat.len + model.len + 8 * 4; |
|
let ptr = _malloc(len); |
|
|
|
let offset = 0; |
|
_CopyHeap(feat.ptr, feat.len, ptr + offset); |
|
offset += feat.len; |
|
|
|
_CopyHeap(model.ptr, model.len, ptr + offset); |
|
offset += model.len; |
|
|
|
let decodingMethodLen = lengthBytesUTF8(config.decodingMethod) + 1; |
|
let hotwordsFileLen = lengthBytesUTF8(config.hotwordsFile) + 1; |
|
let bufferLen = decodingMethodLen + hotwordsFileLen; |
|
let buffer = _malloc(bufferLen); |
|
|
|
offset = 0; |
|
stringToUTF8(config.decodingMethod, buffer, decodingMethodLen); |
|
offset += decodingMethodLen; |
|
|
|
stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); |
|
|
|
offset = feat.len + model.len; |
|
setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.maxActivePaths, 'i32'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.enableEndpoint, 'i32'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.rule1MinTrailingSilence, 'float'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.rule2MinTrailingSilence, 'float'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.rule3MinUtteranceLength, 'float'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); |
|
offset += 4; |
|
|
|
setValue(ptr + offset, config.hotwordsScore, 'float'); |
|
offset += 4; |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model |
|
} |
|
} |
|
|
|
|
|
function createRecognizer() { |
|
let onlineTransducerModelConfig = { |
|
encoder: '', |
|
decoder: '', |
|
joiner: '', |
|
} |
|
|
|
let onlineParaformerModelConfig = { |
|
encoder: '', |
|
decoder: '', |
|
} |
|
|
|
let onlineZipformer2CtcModelConfig = { |
|
model: '', |
|
} |
|
|
|
let type = 0; |
|
|
|
switch (type) { |
|
case 0: |
|
|
|
onlineTransducerModelConfig.encoder = './encoder.onnx'; |
|
onlineTransducerModelConfig.decoder = './decoder.onnx'; |
|
onlineTransducerModelConfig.joiner = './joiner.onnx'; |
|
break; |
|
case 1: |
|
|
|
onlineParaformerModelConfig.encoder = './encoder.onnx'; |
|
onlineParaformerModelConfig.decoder = './decoder.onnx'; |
|
break; |
|
case 2: |
|
|
|
onlineZipformer2CtcModelConfig.model = './encoder.onnx'; |
|
break; |
|
} |
|
|
|
|
|
let onlineModelConfig = { |
|
transducer: onlineTransducerModelConfig, |
|
paraformer: onlineParaformerModelConfig, |
|
zipformer2Ctc: onlineZipformer2CtcModelConfig, |
|
tokens: './tokens.txt', |
|
numThreads: 1, |
|
provider: 'cpu', |
|
debug: 1, |
|
modelType: '', |
|
} |
|
|
|
let featureConfig = { |
|
sampleRate: 16000, |
|
featureDim: 80, |
|
} |
|
|
|
let recognizerConfig = { |
|
featConfig: featureConfig, |
|
modelConfig: onlineModelConfig, |
|
decodingMethod: 'greedy_search', |
|
maxActivePaths: 4, |
|
enableEndpoint: 1, |
|
rule1MinTrailingSilence: 2.4, |
|
rule2MinTrailingSilence: 1.2, |
|
rule3MinUtteranceLength: 20, |
|
hotwordsFile: '', |
|
hotwordsScore: 1.5, |
|
} |
|
|
|
return new OnlineRecognizer(recognizerConfig); |
|
} |
|
|
|
class OnlineStream { |
|
constructor(handle) { |
|
this.handle = handle; |
|
this.pointer = null; |
|
this.n = 0; |
|
} |
|
|
|
free() { |
|
if (this.handle) { |
|
_DestroyOnlineStream(this.handle); |
|
this.handle = null; |
|
_free(this.pointer); |
|
this.pointer = null; |
|
this.n = 0; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
acceptWaveform(sampleRate, samples) { |
|
if (this.n < samples.length) { |
|
_free(this.pointer); |
|
this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT); |
|
this.n = samples.length |
|
} |
|
|
|
Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); |
|
_AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length); |
|
} |
|
|
|
inputFinished() { |
|
_InputFinished(this.handle); |
|
} |
|
}; |
|
|
|
class OnlineRecognizer { |
|
constructor(configObj) { |
|
let config = initSherpaOnnxOnlineRecognizerConfig(configObj) |
|
let handle = _CreateOnlineRecognizer(config.ptr); |
|
|
|
freeConfig(config); |
|
|
|
this.handle = handle; |
|
} |
|
|
|
free() { |
|
_DestroyOnlineRecognizer(this.handle); |
|
this.handle = 0 |
|
} |
|
|
|
createStream() { |
|
let handle = _CreateOnlineStream(this.handle); |
|
return new OnlineStream(handle); |
|
} |
|
|
|
isReady(stream) { |
|
return _IsOnlineStreamReady(this.handle, stream.handle) == 1; |
|
} |
|
|
|
decode(stream) { |
|
return _DecodeOnlineStream(this.handle, stream.handle); |
|
} |
|
|
|
isEndpoint(stream) { |
|
return _IsEndpoint(this.handle, stream.handle) == 1; |
|
} |
|
|
|
reset(stream) { |
|
_Reset(this.handle, stream.handle); |
|
} |
|
|
|
getResult(stream) { |
|
let r = _GetOnlineStreamResult(this.handle, stream.handle); |
|
let textPtr = getValue(r, 'i8*'); |
|
let text = UTF8ToString(textPtr); |
|
_DestroyOnlineRecognizerResult(r); |
|
return text; |
|
} |
|
} |
|
|