| """Path Resolver — utility helpers for segment operations.""" |
| from __future__ import annotations |
|
|
| import hashlib |
| import logging |
| from typing import Any, Dict, Iterable, List, Optional |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| def normalise_segment(data: Dict[str, Any]) -> Dict[str, Any]: |
| """Segment normalise — normalises and validates *data*.""" |
| result = {k: v for k, v in data.items() if v is not None} |
| if "resolved" not in result: |
| raise ValueError(f"Segment must include 'resolved'") |
| result["id"] = result.get("id") or hashlib.md5( |
| str(result["resolved"]).encode()).hexdigest()[:12] |
| return result |
|
|
|
|
| def validate_segments( |
| items: Iterable[Dict[str, Any]], |
| *, |
| status: Optional[str] = None, |
| limit: int = 100, |
| ) -> List[Dict[str, Any]]: |
| """Filter and page a sequence of Segment records.""" |
| out = [i for i in items if status is None or i.get("status") == status] |
| logger.debug("validate_segments: %d items after filter", len(out)) |
| return out[:limit] |
|
|
|
|
| def expand_segment(record: Dict[str, Any], **overrides: Any) -> Dict[str, Any]: |
| """Return a shallow copy of *record* with *overrides* merged in.""" |
| updated = dict(record) |
| updated.update(overrides) |
| if "root" in updated and not isinstance(updated["root"], (int, float)): |
| try: |
| updated["root"] = float(updated["root"]) |
| except (TypeError, ValueError): |
| pass |
| return updated |
|
|
|
|
| def validate_segment(record: Dict[str, Any]) -> bool: |
| """Return True when *record* satisfies all Segment invariants.""" |
| required = ["resolved", "root", "resolved_at"] |
| for field in required: |
| if field not in record or record[field] is None: |
| logger.warning("validate_segment: missing field %r", field) |
| return False |
| return isinstance(record.get("id"), str) |
|
|
|
|
| def join_segment_batch( |
| records: List[Dict[str, Any]], |
| batch_size: int = 50, |
| ) -> List[List[Dict[str, Any]]]: |
| """Slice *records* into chunks of *batch_size* for bulk join.""" |
| return [records[i : i + batch_size] |
| for i in range(0, len(records), batch_size)] |
|
|