| from .base import SequentialDataPipe | |
| from .common_pipes import LoadAudio, SetOutputKeys | |
| from .extract_feat_pipes import ExtractNpcFeat | |
| from .masked_reconstruction_pipes import PrepareTargetFeat | |
| from .valid_label_mask_pipes import LabelMaskFromLen | |
| class PretrainNpcPipe(SequentialDataPipe): | |
| """ | |
| each item in the input dataset should have: | |
| wav_path: str | |
| """ | |
| def __init__( | |
| self, | |
| output_keys: dict = None, | |
| feat_type: str = "fbank", | |
| feat_dim: int = 80, | |
| frame_length: int = 25, | |
| frame_shift: int = 10, | |
| decode_wav: bool = False, | |
| cmvn: bool = True, | |
| audio_sample_rate: int = 16000, | |
| audio_channel_reduction: str = "first", | |
| n_jobs: int = 6, | |
| ): | |
| """ | |
| Args: | |
| output_keys (dict): args for the output handle | |
| feat_type (str): feature type | |
| feat_dim (int): feature dimension | |
| frame_length (int): window size in ms | |
| frame_shift (int): hop size in ms | |
| decode_wav (bool): whether to decode wav | |
| cmvn (bool): whether to apply uttr.-wised CMVN on feature | |
| audio_sample_rate (int): audio sample rate | |
| audio_channel_reduction (str): "first" channel | |
| n_jobs (int): number of workers | |
| """ | |
| output_keys = output_keys or dict( | |
| x="source_feat", | |
| label="target_feat", | |
| label_mask="label_mask", | |
| unique_name="id", | |
| ) | |
| super().__init__( | |
| LoadAudio( | |
| n_jobs=n_jobs, | |
| audio_sample_rate=audio_sample_rate, | |
| audio_channel_reduction=audio_channel_reduction, | |
| ), | |
| ExtractNpcFeat( | |
| feat_type=feat_type, | |
| feat_dim=feat_dim, | |
| frame_length=frame_length, | |
| frame_shift=frame_shift, | |
| decode_wav=decode_wav, | |
| cmvn=cmvn, | |
| feat_name="source_feat", | |
| ), | |
| LabelMaskFromLen( | |
| target_feat_name="target_feat", label_mask_name="label_mask" | |
| ), | |
| PrepareTargetFeat( | |
| use_copy=True, | |
| source_feat_name="source_feat", | |
| target_feat_name="target_feat", | |
| ), | |
| SetOutputKeys(output_keys=output_keys), | |
| ) | |