Upload stream.py with huggingface_hub
Browse files
stream.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from typing import Dict, Iterable
|
2 |
|
3 |
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
@@ -86,6 +87,10 @@ def is_stream(obj):
|
|
86 |
return isinstance(obj, IterableDataset) or isinstance(obj, Stream) or isinstance(obj, Dataset)
|
87 |
|
88 |
|
|
|
|
|
|
|
|
|
89 |
class MultiStream(dict):
|
90 |
"""A class for handling multiple streams of data in a dictionary-like format.
|
91 |
|
@@ -179,7 +184,7 @@ class MultiStream(dict):
|
|
179 |
|
180 |
return cls(
|
181 |
{
|
182 |
-
key: Stream(
|
183 |
for key, iterable in iterables.items()
|
184 |
}
|
185 |
)
|
|
|
1 |
+
from copy import deepcopy
|
2 |
from typing import Dict, Iterable
|
3 |
|
4 |
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
|
87 |
return isinstance(obj, IterableDataset) or isinstance(obj, Stream) or isinstance(obj, Dataset)
|
88 |
|
89 |
|
90 |
+
def iterable_starter(iterable):
|
91 |
+
return iter(deepcopy(iterable))
|
92 |
+
|
93 |
+
|
94 |
class MultiStream(dict):
|
95 |
"""A class for handling multiple streams of data in a dictionary-like format.
|
96 |
|
|
|
184 |
|
185 |
return cls(
|
186 |
{
|
187 |
+
key: Stream(iterable_starter, gen_kwargs={"iterable": iterable}, streaming=streaming, caching=caching)
|
188 |
for key, iterable in iterables.items()
|
189 |
}
|
190 |
)
|