Spaces:
Runtime error
Runtime error
File size: 900 Bytes
e4f9cbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
"""Utilities for testing text splitters."""
from typing import Optional
from ...data.dataset_utils import lilac_span
from ...schema import TEXT_SPAN_END_FEATURE, TEXT_SPAN_START_FEATURE, VALUE_KEY, Item
def spans_to_text(text: str, spans: Optional[list[Item]]) -> list[str]:
"""Convert text and a list of spans to a list of strings."""
if not spans:
return []
return [
text[span[VALUE_KEY][TEXT_SPAN_START_FEATURE]:span[VALUE_KEY][TEXT_SPAN_END_FEATURE]]
for span in spans
]
def text_to_expected_spans(text: str, splits: list[str]) -> list[Item]:
"""Convert text and a list of splits to a list of expected spans."""
start_offset = 0
expected_spans: list[Item] = []
for split in splits:
start = text.find(split, start_offset)
end = start + len(split)
expected_spans.append(lilac_span(start=start, end=end))
start_offset = end
return expected_spans
|