Spaces:
Running
Running
Fix count lines
Browse files- dataset.py +1 -9
dataset.py
CHANGED
@@ -1,17 +1,9 @@
|
|
1 |
from torch.utils.data import IterableDataset
|
2 |
|
3 |
|
4 |
-
def blocks(files, size=65536):
|
5 |
-
while True:
|
6 |
-
b = files.read(size)
|
7 |
-
if not b:
|
8 |
-
break
|
9 |
-
yield b
|
10 |
-
|
11 |
-
|
12 |
def count_lines(input_path: str) -> int:
|
13 |
with open(input_path, "r", encoding="utf8") as f:
|
14 |
-
return sum(
|
15 |
|
16 |
|
17 |
class DatasetReader(IterableDataset):
|
|
|
1 |
from torch.utils.data import IterableDataset
|
2 |
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
def count_lines(input_path: str) -> int:
|
5 |
with open(input_path, "r", encoding="utf8") as f:
|
6 |
+
return sum(1 for _ in f)
|
7 |
|
8 |
|
9 |
class DatasetReader(IterableDataset):
|