Spaces:
Runtime error
Runtime error
"""Utilities for testing text splitters.""" | |
from typing import Optional | |
from ...data.dataset_utils import lilac_span | |
from ...schema import TEXT_SPAN_END_FEATURE, TEXT_SPAN_START_FEATURE, VALUE_KEY, Item | |
def spans_to_text(text: str, spans: Optional[list[Item]]) -> list[str]: | |
"""Convert text and a list of spans to a list of strings.""" | |
if not spans: | |
return [] | |
return [ | |
text[span[VALUE_KEY][TEXT_SPAN_START_FEATURE]:span[VALUE_KEY][TEXT_SPAN_END_FEATURE]] | |
for span in spans | |
] | |
def text_to_expected_spans(text: str, splits: list[str]) -> list[Item]: | |
"""Convert text and a list of splits to a list of expected spans.""" | |
start_offset = 0 | |
expected_spans: list[Item] = [] | |
for split in splits: | |
start = text.find(split, start_offset) | |
end = start + len(split) | |
expected_spans.append(lilac_span(start=start, end=end)) | |
start_offset = end | |
return expected_spans | |