File size: 763 Bytes
bfc0ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
"""Utilities for unit tests."""

import os
import pathlib
import uuid
from typing import Union

import pyarrow.parquet as pq

from .schema import ROWID, DataType, Field, Item, Schema, schema_to_arrow_schema


def read_items(data_dir: Union[str, pathlib.Path], filepaths: list[str],
               schema: Schema) -> list[Item]:
  """Read the source items from a dataset output directory."""
  items: list[Item] = []
  schema.fields[ROWID] = Field(dtype=DataType.STRING)
  for filepath in filepaths:
    items.extend(
      pq.read_table(os.path.join(data_dir, filepath),
                    schema=schema_to_arrow_schema(schema)).to_pylist())
  return items


def fake_uuid(id: bytes) -> uuid.UUID:
  """Create a test UUID."""
  return uuid.UUID((id * 16).hex())