pocketsphinx-20.04

37a92a9 about 3 years ago

7.95 kB

	/* -- c-basic-offset:4; indent-tabs-mode: nil -- */
	/* ====================================================================
	* Copyright (c) 2022 David Huggins-Daines. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
	* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
	* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
	* OF THE POSSIBILITY OF SUCH DAMAGE.
	* ====================================================================
	*/
	/**
	* @file endpointer.h
	* @brief VAD-based endpointer for PocketSphinx
	*
	* Because doxygen is Bad Software, the actual documentation can only
	* exist in \ref ps_endpointer_t. Sorry about that.
	*/

	#ifndef __PS_ENDPOINTER_H__
	#define __PS_ENDPOINTER_H__


	#include <pocketsphinx/prim_type.h>
	#include <pocketsphinx/export.h>

	#ifdef __cplusplus
	extern "C" {
	#endif
	#if 0
	}
	#endif

	#include <pocketsphinx/prim_type.h>
	#include <pocketsphinx/export.h>
	#include <pocketsphinx/vad.h>

	/**
	* @struct ps_endpointer_t pocketsphinx/endpointer.h
	* @brief Simple voice activity detection based endpointing
	*/
	typedef struct ps_endpointer_s ps_endpointer_t;

	/**
	* Default window in seconds of audio to use for speech start/end decision.
	*/
	#define PS_ENDPOINTER_DEFAULT_WINDOW 0.3
	/**
	* Default ratio of frames in window to trigger start/end decision.
	*/
	#define PS_ENDPOINTER_DEFAULT_RATIO 0.9

	/**
	* Initialize endpointing.
	*
	* @memberof ps_endpointer_t
	* @param window Seconds of audio to use in speech start/end decision,
	* or 0 to use the default (PS_ENDPOINTER_DEFAULT_WINDOW).
	* @param ratio Ratio of frames needed to trigger start/end decision,
	* or 0 for the default (PS_ENDPOINTER_DEFAULT_RATIO).
	* @param mode "Aggressiveness" of voice activity detection. Stricter
	* values (see ps_vad_mode_t) are less likely to
	* misclassify non-speech as speech.
	* @param sample_rate Sampling rate of input, or 0 for default (which can
	* be obtained with ps_vad_sample_rate()). Only 8000,
	* 16000, 32000, 48000 are directly supported, others
	* will use the closest supported rate (within reason).
	* Note that this means that the actual frame length
	* may not be exactly the one requested, so you must
	* always use the one returned by
	* ps_endpointer_frame_size()
	* (in samples) or ps_endpointer_frame_length() (in
	* seconds).
	* @param frame_length Requested frame length in seconds, or 0.0 for the
	* default. Only 0.01, 0.02, 0.03 currently supported.
	* **Actual frame length may be different, you must
	* always use ps_endpointer_frame_length() to obtain it.**
	* @return Endpointer object or NULL on failure (invalid parameter for
	* instance).
	*/
	POCKETSPHINX_EXPORT
	ps_endpointer_t *ps_endpointer_init(double window,
	double ratio,
	ps_vad_mode_t mode,
	int sample_rate, double frame_length);

	/**
	* Retain a pointer to endpointer
	*
	* @memberof ps_endpointer_t
	* @param ep Endpointer.
	* @return Endpointer with incremented reference count.
	*/
	POCKETSPHINX_EXPORT
	ps_endpointer_t ps_endpointer_retain(ps_endpointer_t ep);

	/**
	* Release a pointer to endpointer.
	*
	* @memberof ps_endpointer_t
	* @param ep Endpointer
	* @return New reference count (0 if freed).
	*/
	POCKETSPHINX_EXPORT
	int ps_endpointer_free(ps_endpointer_t *ep);

	/**
	* Get the voice activity detector used by the endpointer.
	*
	* @memberof ps_endpointer_t
	* @return VAD object. The endpointer retains ownership of this
	* object, so you must use ps_vad_retain() if you wish to use it
	* outside of the lifetime of the endpointer.
	*/
	POCKETSPHINX_EXPORT
	ps_vad_t ps_endpointer_vad(ps_endpointer_t ep);

	/**
	* Get the frame size (in samples) consumed by the endpointer.
	*
	* Multiply this by 2 to get the size of the frame buffer required.
	*/
	#define ps_endpointer_frame_size(ep) ps_vad_frame_size(ps_endpointer_vad(ep))

	/**
	* Get the frame length (in seconds) consumed by the endpointer.
	*/
	#define ps_endpointer_frame_length(ep) ps_vad_frame_length(ps_endpointer_vad(ep))

	/**
	* Get the sample rate required by the endpointer.
	*/
	#define ps_endpointer_sample_rate(ep) ps_vad_sample_rate(ps_endpointer_vad(ep))

	/**
	* Process a frame of audio, returning a frame if in a speech region.
	*
	* Note that the endpointer is not thread-safe. You must call all
	* endpointer functions from the same thread.
	*
	* @memberof ps_endpointer_t
	* @param ep Endpointer.
	* @param frame Frame of data, must contain ps_endpointer_frame_size()
	* samples.
	* @return NULL if no speech available, or pointer to a frame of
	* ps_endpointer_frame_size() samples (no more and no less).
	*/
	POCKETSPHINX_EXPORT
	const int16 ps_endpointer_process(ps_endpointer_t ep,
	const int16 *frame);

	/**
	* Process remaining samples at end of stream.
	*
	* Note that the endpointer is not thread-safe. You must call all
	* endpointer functions from the same thread.
	*
	* @memberof ps_endpointer_t
	* @param ep Endpointer.
	* @param frame Frame of data, must contain ps_endpointer_frame_size()
	* samples or less.
	* @param nsamp: Number of samples in frame.
	* @param out_nsamp: Output, number of samples available.
	* @return Pointer to available samples, or NULL if none available.
	*/
	POCKETSPHINX_EXPORT
	const int16 ps_endpointer_end_stream(ps_endpointer_t ep,
	const int16 *frame,
	size_t nsamp,
	size_t *out_nsamp);

	/**
	* Get the current state (speech/not-speech) of the endpointer.
	*
	* This function can be used to detect speech/non-speech transitions.
	* If it returns 0, and a subsequent call to ps_endpointer_process()
	* returns non-NULL, this indicates a transition to speech.
	* Conversely, if ps_endpointer_process() returns non-NULL and a
	* subsequent call to this function returns 0, this indicates a
	* transition to non-speech.
	*
	* @memberof ps_endpointer_t
	* @param ep Endpointer.
	* @return non-zero if in a speech segment after processing the last
	* frame of data.
	*/
	POCKETSPHINX_EXPORT
	int ps_endpointer_in_speech(ps_endpointer_t *ep);

	/**
	* Get the start time of the last speech segment.
	* @memberof ps_endpointer_t
	*/
	POCKETSPHINX_EXPORT
	double ps_endpointer_speech_start(ps_endpointer_t *ep);

	/**
	* Get the end time of the last speech segment
	* @memberof ps_endpointer_t
	*/
	POCKETSPHINX_EXPORT
	double ps_endpointer_speech_end(ps_endpointer_t *ep);

	#ifdef __cplusplus
	}
	#endif

	#endif /* __PS_ENDPOINTER_H__ */