File size: 1,447 Bytes
022e8a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
/**
* Sample rate of the input audio.
* Coindicentally, this is the same for both models (Moonshine and Silero VAD)
*/
export const INPUT_SAMPLE_RATE = 16000;
const INPUT_SAMPLE_RATE_MS = INPUT_SAMPLE_RATE / 1000;
/**
* Probabilities ABOVE this value are considered as SPEECH
*/
export const SPEECH_THRESHOLD = 0.3;
/**
* If current state is SPEECH, and the probability of the next state
* is below this value, it is considered as NON-SPEECH.
*/
export const EXIT_THRESHOLD = 0.1;
/**
* After each speech chunk, wait for at least this amount of silence
* before considering the next chunk as a new speech chunk
*/
export const MIN_SILENCE_DURATION_MS = 400;
export const MIN_SILENCE_DURATION_SAMPLES =
MIN_SILENCE_DURATION_MS * INPUT_SAMPLE_RATE_MS;
/**
* Pad the speech chunk with this amount each side
*/
export const SPEECH_PAD_MS = 80;
export const SPEECH_PAD_SAMPLES = SPEECH_PAD_MS * INPUT_SAMPLE_RATE_MS;
/**
* Final speech chunks below this duration are discarded
*/
export const MIN_SPEECH_DURATION_SAMPLES = 250 * INPUT_SAMPLE_RATE_MS; // 250 ms
/**
* Maximum duration of audio that can be handled by Moonshine
*/
export const MAX_BUFFER_DURATION = 30;
/**
* Size of the incoming buffers
*/
export const NEW_BUFFER_SIZE = 512;
/**
* The number of previous buffers to keep, to ensure the audio is padded correctly
*/
export const MAX_NUM_PREV_BUFFERS = Math.ceil(
SPEECH_PAD_SAMPLES / NEW_BUFFER_SIZE,
);
|