Spaces:
Build error
Build error
from typing import List, Tuple | |
def replace_refspans( | |
spans_to_replace: List[Tuple[int, int, str, str]], | |
full_string: str, | |
pre_padding: str = "", | |
post_padding: str = "", | |
btwn_padding: str = ", " | |
) -> str: | |
""" | |
For each span within the full string, replace that span with new text | |
:param spans_to_replace: list of tuples of form (start_ind, end_ind, span_text, new_substring) | |
:param full_string: | |
:param pre_padding: | |
:param post_padding: | |
:param btwn_padding: | |
:return: | |
""" | |
# assert all spans are equal to full_text span | |
assert all([full_string[start:end] == span for start, end, span, _ in spans_to_replace]) | |
# assert none of the spans start with the same start ind | |
start_inds = [rep[0] for rep in spans_to_replace] | |
assert len(set(start_inds)) == len(start_inds) | |
# sort by start index | |
spans_to_replace.sort(key=lambda x: x[0]) | |
# form strings for each span group | |
for i, entry in enumerate(spans_to_replace): | |
start, end, span, new_string = entry | |
# skip empties | |
if end <= 0: | |
continue | |
# compute shift amount | |
shift_amount = len(new_string) - len(span) + len(pre_padding) + len(post_padding) | |
# shift remaining appropriately | |
for ind in range(i + 1, len(spans_to_replace)): | |
next_start, next_end, next_span, next_string = spans_to_replace[ind] | |
# skip empties | |
if next_end <= 0: | |
continue | |
# if overlap between ref span and current ref span, remove from replacement | |
if next_start < end: | |
next_start = 0 | |
next_end = 0 | |
next_string = "" | |
# if ref span abuts previous reference span | |
elif next_start == end: | |
next_start += shift_amount | |
next_end += shift_amount | |
next_string = btwn_padding + pre_padding + next_string + post_padding | |
# if ref span starts after, shift starts and ends | |
elif next_start > end: | |
next_start += shift_amount | |
next_end += shift_amount | |
next_string = pre_padding + next_string + post_padding | |
# save adjusted span | |
spans_to_replace[ind] = (next_start, next_end, next_span, next_string) | |
spans_to_replace = [entry for entry in spans_to_replace if entry[1] > 0] | |
spans_to_replace.sort(key=lambda x: x[0]) | |
# apply shifts in series | |
for start, end, span, new_string in spans_to_replace: | |
assert full_string[start:end] == span | |
full_string = full_string[:start] + new_string + full_string[end:] | |
return full_string | |
def sub_spans_and_update_indices( | |
spans_to_replace: List[Tuple[int, int, str, str]], | |
full_string: str | |
) -> Tuple[str, List]: | |
""" | |
Replace all spans and recompute indices | |
:param spans_to_replace: | |
:param full_string: | |
:return: | |
""" | |
# TODO: check no spans overlapping | |
# TODO: check all spans well-formed | |
# assert all spans are equal to full_text span | |
assert all([full_string[start:end] == token for start, end, token, _ in spans_to_replace]) | |
# assert none of the spans start with the same start ind | |
start_inds = [rep[0] for rep in spans_to_replace] | |
assert len(set(start_inds)) == len(start_inds) | |
# sort by start index | |
spans_to_replace.sort(key=lambda x: x[0]) | |
# compute offsets for each span | |
new_spans = [[start, end, token, surface, 0] for start, end, token, surface in spans_to_replace] | |
for i, entry in enumerate(spans_to_replace): | |
start, end, token, surface = entry | |
new_end = start + len(surface) | |
offset = new_end - end | |
new_spans[i][1] += offset | |
for new_span_entry in new_spans[i+1:]: | |
new_span_entry[4] += offset | |
# generate new text and create final spans | |
new_text = replace_refspans(spans_to_replace, full_string, btwn_padding="") | |
new_spans = [(start + offset, end + offset, token, surface) for start, end, token, surface, offset in new_spans] | |
return new_text, new_spans | |