felixonmars
commited on
Commit
•
d1236f2
1
Parent(s):
895f0a0
Correct typos in datasets.py (#639)
Browse files- src/axolotl/datasets.py +2 -2
src/axolotl/datasets.py
CHANGED
@@ -22,7 +22,7 @@ class TokenizedPromptDataset(Dataset):
|
|
22 |
"""
|
23 |
Dataset that returns tokenized prompts from a stream of text files.
|
24 |
Args:
|
25 |
-
prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for
|
26 |
dataset (dataset.Dataset): Dataset with text files.
|
27 |
"""
|
28 |
|
@@ -55,7 +55,7 @@ class ConstantLengthDataset(IterableDataset):
|
|
55 |
"""
|
56 |
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
57 |
Args:
|
58 |
-
tokenizer (Tokenizer): The processor used for
|
59 |
dataset (dataset.Dataset): Dataset with text files.
|
60 |
seq_length (int): Length of token sequences to return.
|
61 |
"""
|
|
|
22 |
"""
|
23 |
Dataset that returns tokenized prompts from a stream of text files.
|
24 |
Args:
|
25 |
+
prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data.
|
26 |
dataset (dataset.Dataset): Dataset with text files.
|
27 |
"""
|
28 |
|
|
|
55 |
"""
|
56 |
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
57 |
Args:
|
58 |
+
tokenizer (Tokenizer): The processor used for processing the data.
|
59 |
dataset (dataset.Dataset): Dataset with text files.
|
60 |
seq_length (int): Length of token sequences to return.
|
61 |
"""
|