Spaces:
Running
Running
function freeConfig(config, Module) { | |
if ('buffer' in config) { | |
Module._free(config.buffer); | |
} | |
if ('config' in config) { | |
freeConfig(config.config, Module) | |
} | |
if ('transducer' in config) { | |
freeConfig(config.transducer, Module) | |
} | |
if ('paraformer' in config) { | |
freeConfig(config.paraformer, Module) | |
} | |
if ('ctc' in config) { | |
freeConfig(config.ctc, Module) | |
} | |
if ('feat' in config) { | |
freeConfig(config.feat, Module) | |
} | |
if ('model' in config) { | |
freeConfig(config.model, Module) | |
} | |
if ('nemoCtc' in config) { | |
freeConfig(config.nemoCtc, Module) | |
} | |
if ('whisper' in config) { | |
freeConfig(config.whisper, Module) | |
} | |
if ('tdnn' in config) { | |
freeConfig(config.tdnn, Module) | |
} | |
if ('senseVoice' in config) { | |
freeConfig(config.senseVoice, Module) | |
} | |
if ('lm' in config) { | |
freeConfig(config.lm, Module) | |
} | |
if ('ctcFstDecoder' in config) { | |
freeConfig(config.ctcFstDecoder, Module) | |
} | |
Module._free(config.ptr); | |
} | |
// The user should free the returned pointers | |
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { | |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; | |
const n = encoderLen + decoderLen + joinerLen; | |
const buffer = Module._malloc(n); | |
const len = 3 * 4; // 3 pointers | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
offset += encoderLen; | |
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
offset += decoderLen; | |
Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); | |
offset = 0; | |
Module.setValue(ptr, buffer + offset, 'i8*'); | |
offset += encoderLen; | |
Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
offset += decoderLen; | |
Module.setValue(ptr + 8, buffer + offset, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOnlineParaformerModelConfig(config, Module) { | |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
const n = encoderLen + decoderLen; | |
const buffer = Module._malloc(n); | |
const len = 2 * 4; // 2 pointers | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
offset += encoderLen; | |
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
offset = 0; | |
Module.setValue(ptr, buffer + offset, 'i8*'); | |
offset += encoderLen; | |
Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | |
const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
const buffer = Module._malloc(n); | |
const len = 1 * 4; // 1 pointer | |
const ptr = Module._malloc(len); | |
Module.stringToUTF8(config.model || '', buffer, n); | |
Module.setValue(ptr, buffer, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOnlineModelConfig(config, Module) { | |
if (!('transducer' in config)) { | |
config.transducer = { | |
encoder: '', | |
decoder: '', | |
joiner: '', | |
}; | |
} | |
if (!('paraformer' in config)) { | |
config.paraformer = { | |
encoder: '', | |
decoder: '', | |
}; | |
} | |
if (!('zipformer2Ctc' in config)) { | |
config.zipformer2Ctc = { | |
model: '', | |
}; | |
} | |
const transducer = | |
initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); | |
const paraformer = | |
initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); | |
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( | |
config.zipformer2Ctc, Module); | |
const len = transducer.len + paraformer.len + ctc.len + 7 * 4; | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); | |
offset += transducer.len; | |
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | |
offset += paraformer.len; | |
Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); | |
offset += ctc.len; | |
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | |
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | |
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | |
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | |
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | |
const bufferLen = | |
tokensLen + providerLen + modelTypeLen + modelingUnitLen + bpeVocabLen; | |
const buffer = Module._malloc(bufferLen); | |
offset = 0; | |
Module.stringToUTF8(config.tokens || '', buffer, tokensLen); | |
offset += tokensLen; | |
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); | |
offset += providerLen; | |
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); | |
offset += modelTypeLen; | |
Module.stringToUTF8( | |
config.modelingUnit || '', buffer + offset, modelingUnitLen); | |
offset += modelingUnitLen; | |
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen); | |
offset += bpeVocabLen; | |
offset = transducer.len + paraformer.len + ctc.len; | |
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | |
offset += 4; | |
Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); | |
offset += 4; | |
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | |
offset += 4; | |
Module.setValue(ptr + offset, config.debug || 0, 'i32'); | |
offset += 4; | |
Module.setValue( | |
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | |
offset += 4; | |
Module.setValue( | |
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen, | |
'i8*'); // modelingUnit | |
offset += 4; | |
Module.setValue( | |
ptr + offset, | |
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen, | |
'i8*'); // bpeVocab | |
offset += 4; | |
return { | |
buffer: buffer, ptr: ptr, len: len, transducer: transducer, | |
paraformer: paraformer, ctc: ctc | |
} | |
} | |
function initSherpaOnnxFeatureConfig(config, Module) { | |
const len = 2 * 4; // 2 pointers | |
const ptr = Module._malloc(len); | |
Module.setValue(ptr, config.sampleRate || 16000, 'i32'); | |
Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); | |
return {ptr: ptr, len: len}; | |
} | |
function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { | |
const len = 2 * 4; | |
const ptr = Module._malloc(len); | |
const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1; | |
const buffer = Module._malloc(graphLen); | |
Module.stringToUTF8(config.graph, buffer, graphLen); | |
Module.setValue(ptr, buffer, 'i8*'); | |
Module.setValue(ptr + 4, config.maxActive || 3000, 'i32'); | |
return {ptr: ptr, len: len, buffer: buffer}; | |
} | |
function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | |
if (!('featConfig' in config)) { | |
config.featConfig = { | |
sampleRate: 16000, | |
featureDim: 80, | |
}; | |
} | |
if (!('ctcFstDecoderConfig' in config)) { | |
config.ctcFstDecoderConfig = { | |
graph: '', | |
maxActive: 3000, | |
}; | |
} | |
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | |
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); | |
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( | |
config.ctcFstDecoderConfig, Module) | |
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 3 * 4; | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module._CopyHeap(feat.ptr, feat.len, ptr + offset); | |
offset += feat.len; | |
Module._CopyHeap(model.ptr, model.len, ptr + offset); | |
offset += model.len; | |
const decodingMethodLen = | |
Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; | |
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; | |
const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; | |
const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | |
const bufferLen = | |
decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + ruleFarsFileLen; | |
const buffer = Module._malloc(bufferLen); | |
offset = 0; | |
Module.stringToUTF8( | |
config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); | |
offset += decodingMethodLen; | |
Module.stringToUTF8( | |
config.hotwordsFile || '', buffer + offset, hotwordsFileLen); | |
offset += hotwordsFileLen; | |
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); | |
offset += ruleFstsFileLen; | |
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen); | |
offset += ruleFarsFileLen; | |
offset = feat.len + model.len; | |
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | |
offset += 4; | |
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float'); | |
offset += 4; | |
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); | |
offset += 4; | |
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); | |
offset += ctcFstDecoder.len; | |
Module.setValue( | |
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); | |
offset += 4; | |
Module.setValue( | |
ptr + offset, | |
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); | |
offset += 4; | |
return { | |
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, | |
ctcFstDecoder: ctcFstDecoder | |
} | |
} | |
function createOnlineRecognizer(Module, myConfig) { | |
const onlineTransducerModelConfig = { | |
encoder: '', | |
decoder: '', | |
joiner: '', | |
}; | |
const onlineParaformerModelConfig = { | |
encoder: '', | |
decoder: '', | |
}; | |
const onlineZipformer2CtcModelConfig = { | |
model: '', | |
}; | |
let type = 0; | |
switch (type) { | |
case 0: | |
// transducer | |
onlineTransducerModelConfig.encoder = './encoder.onnx'; | |
onlineTransducerModelConfig.decoder = './decoder.onnx'; | |
onlineTransducerModelConfig.joiner = './joiner.onnx'; | |
break; | |
case 1: | |
// paraformer | |
onlineParaformerModelConfig.encoder = './encoder.onnx'; | |
onlineParaformerModelConfig.decoder = './decoder.onnx'; | |
break; | |
case 2: | |
// ctc | |
onlineZipformer2CtcModelConfig.model = './encoder.onnx'; | |
break; | |
} | |
const onlineModelConfig = { | |
transducer: onlineTransducerModelConfig, | |
paraformer: onlineParaformerModelConfig, | |
zipformer2Ctc: onlineZipformer2CtcModelConfig, | |
tokens: './tokens.txt', | |
numThreads: 1, | |
provider: 'cpu', | |
debug: 1, | |
modelType: '', | |
modelingUnit: 'cjkchar', | |
bpeVocab: '', | |
}; | |
const featureConfig = { | |
sampleRate: 16000, | |
featureDim: 80, | |
}; | |
let recognizerConfig = { | |
featConfig: featureConfig, | |
modelConfig: onlineModelConfig, | |
decodingMethod: 'modified_beam_search', | |
maxActivePaths: 4, | |
enableEndpoint: 0, | |
rule1MinTrailingSilence: 2.4, | |
rule2MinTrailingSilence: 1.2, | |
rule3MinUtteranceLength: 20, | |
hotwordsFile: '', | |
hotwordsScore: 1.5, | |
ctcFstDecoderConfig: { | |
graph: '', | |
maxActive: 3000, | |
}, | |
ruleFsts: '', | |
ruleFars: '', | |
}; | |
if (myConfig) { | |
recognizerConfig = myConfig; | |
} | |
return new OnlineRecognizer(recognizerConfig, Module); | |
} | |
function initSherpaOnnxOfflineTransducerModelConfig(config, Module) { | |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1; | |
const n = encoderLen + decoderLen + joinerLen; | |
const buffer = Module._malloc(n); | |
const len = 3 * 4; // 3 pointers | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
offset += encoderLen; | |
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
offset += decoderLen; | |
Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen); | |
offset = 0; | |
Module.setValue(ptr, buffer + offset, 'i8*'); | |
offset += encoderLen; | |
Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
offset += decoderLen; | |
Module.setValue(ptr + 8, buffer + offset, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineParaformerModelConfig(config, Module) { | |
const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
const buffer = Module._malloc(n); | |
const len = 1 * 4; // 1 pointer | |
const ptr = Module._malloc(len); | |
Module.stringToUTF8(config.model || '', buffer, n); | |
Module.setValue(ptr, buffer, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) { | |
const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
const buffer = Module._malloc(n); | |
const len = 1 * 4; // 1 pointer | |
const ptr = Module._malloc(len); | |
Module.stringToUTF8(config.model || '', buffer, n); | |
Module.setValue(ptr, buffer, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | |
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | |
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | |
const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; | |
const taskLen = Module.lengthBytesUTF8(config.task || '') + 1; | |
const n = encoderLen + decoderLen + languageLen + taskLen; | |
const buffer = Module._malloc(n); | |
const len = 5 * 4; // 4 pointers | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | |
offset += encoderLen; | |
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen); | |
offset += decoderLen; | |
Module.stringToUTF8(config.language || '', buffer + offset, languageLen); | |
offset += languageLen; | |
Module.stringToUTF8(config.task || '', buffer + offset, taskLen); | |
offset = 0; | |
Module.setValue(ptr, buffer + offset, 'i8*'); | |
offset += encoderLen; | |
Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
offset += decoderLen; | |
Module.setValue(ptr + 8, buffer + offset, 'i8*'); | |
offset += languageLen; | |
Module.setValue(ptr + 12, buffer + offset, 'i8*'); | |
offset += taskLen; | |
Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | |
const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
const buffer = Module._malloc(n); | |
const len = 1 * 4; // 1 pointer | |
const ptr = Module._malloc(len); | |
Module.stringToUTF8(config.model || '', buffer, n); | |
Module.setValue(ptr, buffer, 'i8*'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) { | |
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; | |
const languageLen = Module.lengthBytesUTF8(config.language || '') + 1; | |
// useItn is a integer with 4 bytes | |
const n = modelLen + languageLen; | |
const buffer = Module._malloc(n); | |
const len = 3 * 4; // 2 pointers + 1 int | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module.stringToUTF8(config.model || '', buffer + offset, modelLen); | |
offset += modelLen; | |
Module.stringToUTF8(config.language || '', buffer + offset, languageLen); | |
offset += languageLen; | |
offset = 0; | |
Module.setValue(ptr, buffer + offset, 'i8*'); | |
offset += modelLen; | |
Module.setValue(ptr + 4, buffer + offset, 'i8*'); | |
offset += languageLen; | |
Module.setValue(ptr + 8, config.useInverseTextNormalization || 0, 'i32'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineLMConfig(config, Module) { | |
const n = Module.lengthBytesUTF8(config.model || '') + 1; | |
const buffer = Module._malloc(n); | |
const len = 2 * 4; | |
const ptr = Module._malloc(len); | |
Module.stringToUTF8(config.model || '', buffer, n); | |
Module.setValue(ptr, buffer, 'i8*'); | |
Module.setValue(ptr + 4, config.scale || 1, 'float'); | |
return { | |
buffer: buffer, ptr: ptr, len: len, | |
} | |
} | |
function initSherpaOnnxOfflineModelConfig(config, Module) { | |
if (!('transducer' in config)) { | |
config.transducer = { | |
encoder: '', | |
decoder: '', | |
joiner: '', | |
}; | |
} | |
if (!('paraformer' in config)) { | |
config.paraformer = { | |
model: '', | |
}; | |
} | |
if (!('nemoCtc' in config)) { | |
config.nemoCtc = { | |
model: '', | |
}; | |
} | |
if (!('whisper' in config)) { | |
config.whisper = { | |
encoder: '', | |
decoder: '', | |
language: '', | |
task: '', | |
tailPaddings: -1, | |
}; | |
} | |
if (!('tdnn' in config)) { | |
config.tdnn = { | |
model: '', | |
}; | |
} | |
if (!('senseVoice' in config)) { | |
config.senseVoice = { | |
model: '', | |
language: '', | |
useInverseTextNormalization: 0, | |
}; | |
} | |
const transducer = | |
initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module); | |
const paraformer = | |
initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module); | |
const nemoCtc = | |
initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module); | |
const whisper = | |
initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module); | |
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module); | |
const senseVoice = | |
initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module); | |
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | |
tdnn.len + 8 * 4 + senseVoice.len; | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); | |
offset += transducer.len; | |
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | |
offset += paraformer.len; | |
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); | |
offset += nemoCtc.len; | |
Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset); | |
offset += whisper.len; | |
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); | |
offset += tdnn.len; | |
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | |
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | |
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | |
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1; | |
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1; | |
const teleSpeechCtcLen = | |
Module.lengthBytesUTF8(config.teleSpeechCtc || '') + 1; | |
const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen + | |
bpeVocabLen + teleSpeechCtcLen; | |
const buffer = Module._malloc(bufferLen); | |
offset = 0; | |
Module.stringToUTF8(config.tokens, buffer, tokensLen); | |
offset += tokensLen; | |
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); | |
offset += providerLen; | |
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen); | |
offset += modelTypeLen; | |
Module.stringToUTF8( | |
config.modelingUnit || '', buffer + offset, modelingUnitLen); | |
offset += modelingUnitLen; | |
Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen); | |
offset += bpeVocabLen; | |
Module.stringToUTF8( | |
config.teleSpeechCtc || '', buffer + offset, teleSpeechCtcLen); | |
offset += teleSpeechCtcLen; | |
offset = | |
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len; | |
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | |
offset += 4; | |
Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.debug || 0, 'i32'); | |
offset += 4; | |
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider | |
offset += 4; | |
Module.setValue( | |
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType | |
offset += 4; | |
Module.setValue( | |
ptr + offset, buffer + tokensLen + providerLen + modelTypeLen, | |
'i8*'); // modelingUnit | |
offset += 4; | |
Module.setValue( | |
ptr + offset, | |
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen, | |
'i8*'); // bpeVocab | |
offset += 4; | |
Module.setValue( | |
ptr + offset, | |
buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen + | |
bpeVocabLen, | |
'i8*'); // teleSpeechCtc | |
offset += 4; | |
Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset); | |
return { | |
buffer: buffer, ptr: ptr, len: len, transducer: transducer, | |
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, | |
senseVoice: senseVoice, | |
} | |
} | |
function initSherpaOnnxOfflineRecognizerConfig(config, Module) { | |
if (!('featConfig' in config)) { | |
config.featConfig = { | |
sampleRate: 16000, | |
featureDim: 80, | |
}; | |
} | |
if (!('lmConfig' in config)) { | |
config.lmConfig = { | |
model: '', | |
scale: 1.0, | |
}; | |
} | |
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | |
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); | |
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); | |
const len = feat.len + model.len + lm.len + 7 * 4; | |
const ptr = Module._malloc(len); | |
let offset = 0; | |
Module._CopyHeap(feat.ptr, feat.len, ptr + offset); | |
offset += feat.len; | |
Module._CopyHeap(model.ptr, model.len, ptr + offset); | |
offset += model.len; | |
Module._CopyHeap(lm.ptr, lm.len, ptr + offset); | |
offset += lm.len; | |
const decodingMethodLen = | |
Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1; | |
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1; | |
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; | |
const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; | |
const bufferLen = | |
decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen; | |
const buffer = Module._malloc(bufferLen); | |
offset = 0; | |
Module.stringToUTF8( | |
config.decodingMethod || 'greedy_search', buffer, decodingMethodLen); | |
offset += decodingMethodLen; | |
Module.stringToUTF8( | |
config.hotwordsFile || '', buffer + offset, hotwordsFileLen); | |
offset += hotwordsFileLen; | |
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); | |
offset += ruleFstsLen; | |
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen); | |
offset += ruleFarsLen; | |
offset = feat.len + model.len + lm.len; | |
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method | |
offset += 4; | |
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); | |
offset += 4; | |
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float'); | |
offset += 4; | |
Module.setValue( | |
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); | |
offset += 4; | |
Module.setValue( | |
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen, | |
'i8*'); | |
offset += 4; | |
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float'); | |
offset += 4; | |
return { | |
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm | |
} | |
} | |
class OfflineStream { | |
constructor(handle, Module) { | |
this.handle = handle; | |
this.Module = Module; | |
} | |
free() { | |
if (this.handle) { | |
this.Module._SherpaOnnxDestroyOfflineStream(this.handle); | |
this.handle = null; | |
} | |
} | |
/** | |
* @param sampleRate {Number} | |
* @param samples {Float32Array} Containing samples in the range [-1, 1] | |
*/ | |
acceptWaveform(sampleRate, samples) { | |
const pointer = | |
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); | |
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); | |
this.Module._SherpaOnnxAcceptWaveformOffline( | |
this.handle, sampleRate, pointer, samples.length); | |
this.Module._free(pointer); | |
} | |
}; | |
class OfflineRecognizer { | |
constructor(configObj, Module) { | |
this.config = configObj; | |
const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module); | |
const handle = Module._SherpaOnnxCreateOfflineRecognizer(config.ptr); | |
freeConfig(config, Module); | |
this.handle = handle; | |
this.Module = Module; | |
} | |
free() { | |
this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle); | |
this.handle = 0 | |
} | |
createStream() { | |
const handle = this.Module._SherpaOnnxCreateOfflineStream(this.handle); | |
return new OfflineStream(handle, this.Module); | |
} | |
decode(stream) { | |
this.Module._SherpaOnnxDecodeOfflineStream(this.handle, stream.handle); | |
} | |
getResult(stream) { | |
const r = | |
this.Module._SherpaOnnxGetOfflineStreamResultAsJson(stream.handle); | |
const jsonStr = this.Module.UTF8ToString(r); | |
const ans = JSON.parse(jsonStr); | |
this.Module._SherpaOnnxDestroyOfflineStreamResultJson(r); | |
return ans; | |
} | |
}; | |
class OnlineStream { | |
constructor(handle, Module) { | |
this.handle = handle; | |
this.pointer = null; // buffer | |
this.n = 0; // buffer size | |
this.Module = Module; | |
} | |
free() { | |
if (this.handle) { | |
this.Module._SherpaOnnxDestroyOnlineStream(this.handle); | |
this.handle = null; | |
this.Module._free(this.pointer); | |
this.pointer = null; | |
this.n = 0; | |
} | |
} | |
/** | |
* @param sampleRate {Number} | |
* @param samples {Float32Array} Containing samples in the range [-1, 1] | |
*/ | |
acceptWaveform(sampleRate, samples) { | |
if (this.n < samples.length) { | |
this.Module._free(this.pointer); | |
this.pointer = | |
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); | |
this.n = samples.length | |
} | |
this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); | |
this.Module._SherpaOnnxOnlineStreamAcceptWaveform( | |
this.handle, sampleRate, this.pointer, samples.length); | |
} | |
inputFinished() { | |
this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle); | |
} | |
}; | |
class OnlineRecognizer { | |
constructor(configObj, Module) { | |
this.config = configObj; | |
const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module) | |
const handle = Module._SherpaOnnxCreateOnlineRecognizer(config.ptr); | |
freeConfig(config, Module); | |
this.handle = handle; | |
this.Module = Module; | |
} | |
free() { | |
this.Module._SherpaOnnxDestroyOnlineRecognizer(this.handle); | |
this.handle = 0 | |
} | |
createStream() { | |
const handle = this.Module._SherpaOnnxCreateOnlineStream(this.handle); | |
return new OnlineStream(handle, this.Module); | |
} | |
isReady(stream) { | |
return this.Module._SherpaOnnxIsOnlineStreamReady( | |
this.handle, stream.handle) == 1; | |
} | |
decode(stream) { | |
this.Module._SherpaOnnxDecodeOnlineStream(this.handle, stream.handle); | |
} | |
isEndpoint(stream) { | |
return this.Module._SherpaOnnxOnlineStreamIsEndpoint( | |
this.handle, stream.handle) == 1; | |
} | |
reset(stream) { | |
this.Module._SherpaOnnxOnlineStreamReset(this.handle, stream.handle); | |
} | |
getResult(stream) { | |
const r = this.Module._SherpaOnnxGetOnlineStreamResultAsJson( | |
this.handle, stream.handle); | |
const jsonStr = this.Module.UTF8ToString(r); | |
const ans = JSON.parse(jsonStr); | |
this.Module._SherpaOnnxDestroyOnlineStreamResultJson(r); | |
return ans; | |
} | |
} | |
if (typeof process == 'object' && typeof process.versions == 'object' && | |
typeof process.versions.node == 'string') { | |
module.exports = { | |
createOnlineRecognizer, | |
OfflineRecognizer, | |
}; | |
} | |