File size: 4,304 Bytes
8437114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""isort:skip_file"""

from .dictionary import Dictionary, TruncatedDictionary

from .fairseq_dataset import FairseqDataset, FairseqIterableDataset

from .base_wrapper_dataset import BaseWrapperDataset

from .add_target_dataset import AddTargetDataset
from .append_token_dataset import AppendTokenDataset
from .audio.raw_audio_dataset import BinarizedAudioDataset, FileAudioDataset
from .audio.hubert_dataset import HubertDataset
from .backtranslation_dataset import BacktranslationDataset
from .bucket_pad_length_dataset import BucketPadLengthDataset
from .colorize_dataset import ColorizeDataset
from .concat_dataset import ConcatDataset
from .concat_sentences_dataset import ConcatSentencesDataset
from .denoising_dataset import DenoisingDataset
from .id_dataset import IdDataset
from .indexed_dataset import (
    IndexedCachedDataset,
    IndexedDataset,
    IndexedRawTextDataset,
    MMapIndexedDataset,
)
from .language_pair_dataset import LanguagePairDataset
from .list_dataset import ListDataset
from .lm_context_window_dataset import LMContextWindowDataset
from .lru_cache_dataset import LRUCacheDataset
from .mask_tokens_dataset import MaskTokensDataset
from .monolingual_dataset import MonolingualDataset
from .multi_corpus_sampled_dataset import MultiCorpusSampledDataset
from .nested_dictionary_dataset import NestedDictionaryDataset
from .noising import NoisingDataset
from .numel_dataset import NumelDataset
from .num_samples_dataset import NumSamplesDataset
from .offset_tokens_dataset import OffsetTokensDataset
from .pad_dataset import LeftPadDataset, PadDataset, RightPadDataset
from .prepend_dataset import PrependDataset
from .prepend_token_dataset import PrependTokenDataset
from .raw_label_dataset import RawLabelDataset
from .replace_dataset import ReplaceDataset
from .resampling_dataset import ResamplingDataset
from .roll_dataset import RollDataset
from .round_robin_zip_datasets import RoundRobinZipDatasets
from .sort_dataset import SortDataset
from .strip_token_dataset import StripTokenDataset
from .subsample_dataset import SubsampleDataset
from .token_block_dataset import TokenBlockDataset
from .transform_eos_dataset import TransformEosDataset
from .transform_eos_lang_pair_dataset import TransformEosLangPairDataset
from .shorten_dataset import TruncateDataset, RandomCropDataset
from .multilingual.sampled_multi_dataset import SampledMultiDataset
from .multilingual.sampled_multi_epoch_dataset import SampledMultiEpochDataset
from .fasta_dataset import FastaDataset, EncodedFastaDataset

from .iterators import (
    CountingIterator,
    EpochBatchIterator,
    GroupedIterator,
    ShardedIterator,
)

__all__ = [
    "AddTargetDataset",
    "AppendTokenDataset",
    "BacktranslationDataset",
    "BaseWrapperDataset",
    "BinarizedAudioDataset",
    "BucketPadLengthDataset",
    "ColorizeDataset",
    "ConcatDataset",
    "ConcatSentencesDataset",
    "CountingIterator",
    "DenoisingDataset",
    "Dictionary",
    "EncodedFastaDataset",
    "EpochBatchIterator",
    "FairseqDataset",
    "FairseqIterableDataset",
    "FastaDataset",
    "FileAudioDataset",
    "GroupedIterator",
    "HubertDataset",
    "IdDataset",
    "IndexedCachedDataset",
    "IndexedDataset",
    "IndexedRawTextDataset",
    "LanguagePairDataset",
    "LeftPadDataset",
    "ListDataset",
    "LMContextWindowDataset",
    "LRUCacheDataset",
    "MaskTokensDataset",
    "MMapIndexedDataset",
    "MonolingualDataset",
    "MultiCorpusSampledDataset",
    "NestedDictionaryDataset",
    "NoisingDataset",
    "NumelDataset",
    "NumSamplesDataset",
    "OffsetTokensDataset",
    "PadDataset",
    "PrependDataset",
    "PrependTokenDataset",
    "RandomCropDataset",
    "RawLabelDataset",
    "ResamplingDataset",
    "ReplaceDataset",
    "RightPadDataset",
    "RollDataset",
    "RoundRobinZipDatasets",
    "SampledMultiDataset",
    "SampledMultiEpochDataset",
    "ShardedIterator",
    "SortDataset",
    "StripTokenDataset",
    "SubsampleDataset",
    "TokenBlockDataset",
    "TransformEosDataset",
    "TransformEosLangPairDataset",
    "TruncateDataset",
    "TruncatedDictionary",
]