Spaces:
Sleeping
Sleeping
"""Utilities for unit tests.""" | |
import os | |
import pathlib | |
import uuid | |
from typing import Union | |
import pyarrow.parquet as pq | |
from .schema import ROWID, DataType, Field, Item, Schema, schema_to_arrow_schema | |
def read_items(data_dir: Union[str, pathlib.Path], filepaths: list[str], | |
schema: Schema) -> list[Item]: | |
"""Read the source items from a dataset output directory.""" | |
items: list[Item] = [] | |
schema.fields[ROWID] = Field(dtype=DataType.STRING) | |
for filepath in filepaths: | |
items.extend( | |
pq.read_table(os.path.join(data_dir, filepath), | |
schema=schema_to_arrow_schema(schema)).to_pylist()) | |
return items | |
def fake_uuid(id: bytes) -> uuid.UUID: | |
"""Create a test UUID.""" | |
return uuid.UUID((id * 16).hex()) | |