Elron commited on
Commit
59ca01e
1 Parent(s): 6ffc9c6

Upload stream.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. stream.py +6 -1
stream.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Dict, Iterable
2
 
3
  from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
@@ -86,6 +87,10 @@ def is_stream(obj):
86
  return isinstance(obj, IterableDataset) or isinstance(obj, Stream) or isinstance(obj, Dataset)
87
 
88
 
 
 
 
 
89
  class MultiStream(dict):
90
  """A class for handling multiple streams of data in a dictionary-like format.
91
 
@@ -179,7 +184,7 @@ class MultiStream(dict):
179
 
180
  return cls(
181
  {
182
- key: Stream(iterable.__iter__, gen_kwargs={}, streaming=streaming, caching=caching)
183
  for key, iterable in iterables.items()
184
  }
185
  )
 
1
+ from copy import deepcopy
2
  from typing import Dict, Iterable
3
 
4
  from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 
87
  return isinstance(obj, IterableDataset) or isinstance(obj, Stream) or isinstance(obj, Dataset)
88
 
89
 
90
+ def iterable_starter(iterable):
91
+ return iter(deepcopy(iterable))
92
+
93
+
94
  class MultiStream(dict):
95
  """A class for handling multiple streams of data in a dictionary-like format.
96
 
 
184
 
185
  return cls(
186
  {
187
+ key: Stream(iterable_starter, gen_kwargs={"iterable": iterable}, streaming=streaming, caching=caching)
188
  for key, iterable in iterables.items()
189
  }
190
  )