Spaces:
Running
Running
Add progress bar to serialize
Browse files- app/utils.py +9 -2
app/utils.py
CHANGED
@@ -11,16 +11,23 @@ if TYPE_CHECKING:
|
|
11 |
__all__ = ["serialize", "deserialize"]
|
12 |
|
13 |
|
14 |
-
def serialize(data: Sequence[str], path: Path, max_size: int = 100000) -> None:
|
15 |
"""Serialize data to a file
|
16 |
|
17 |
Args:
|
18 |
data: The data to serialize
|
19 |
path: The path to save the serialized data
|
20 |
max_size: The maximum size a chunk can be (in elements)
|
|
|
21 |
"""
|
22 |
# first file is path, next chunks have ".1", ".2", etc. appended
|
23 |
-
for i, chunk in enumerate(
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
fd = path.with_suffix(f".{i}.pkl" if i else ".pkl")
|
25 |
with fd.open("wb") as f:
|
26 |
joblib.dump(chunk, f, compress=3)
|
|
|
11 |
__all__ = ["serialize", "deserialize"]
|
12 |
|
13 |
|
14 |
+
def serialize(data: Sequence[str], path: Path, max_size: int = 100000, show_progress: bool = False) -> None:
|
15 |
"""Serialize data to a file
|
16 |
|
17 |
Args:
|
18 |
data: The data to serialize
|
19 |
path: The path to save the serialized data
|
20 |
max_size: The maximum size a chunk can be (in elements)
|
21 |
+
show_progress: Whether to show a progress bar
|
22 |
"""
|
23 |
# first file is path, next chunks have ".1", ".2", etc. appended
|
24 |
+
for i, chunk in enumerate(
|
25 |
+
tqdm(
|
26 |
+
[data[i : i + max_size] for i in range(0, len(data), max_size)],
|
27 |
+
unit="chunk",
|
28 |
+
disable=not show_progress,
|
29 |
+
),
|
30 |
+
):
|
31 |
fd = path.with_suffix(f".{i}.pkl" if i else ".pkl")
|
32 |
with fd.open("wb") as f:
|
33 |
joblib.dump(chunk, f, compress=3)
|