Spaces:
Build error
Build error
File size: 4,120 Bytes
02ae0bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from typing import List, Tuple
def replace_refspans(
spans_to_replace: List[Tuple[int, int, str, str]],
full_string: str,
pre_padding: str = "",
post_padding: str = "",
btwn_padding: str = ", "
) -> str:
"""
For each span within the full string, replace that span with new text
:param spans_to_replace: list of tuples of form (start_ind, end_ind, span_text, new_substring)
:param full_string:
:param pre_padding:
:param post_padding:
:param btwn_padding:
:return:
"""
# assert all spans are equal to full_text span
assert all([full_string[start:end] == span for start, end, span, _ in spans_to_replace])
# assert none of the spans start with the same start ind
start_inds = [rep[0] for rep in spans_to_replace]
assert len(set(start_inds)) == len(start_inds)
# sort by start index
spans_to_replace.sort(key=lambda x: x[0])
# form strings for each span group
for i, entry in enumerate(spans_to_replace):
start, end, span, new_string = entry
# skip empties
if end <= 0:
continue
# compute shift amount
shift_amount = len(new_string) - len(span) + len(pre_padding) + len(post_padding)
# shift remaining appropriately
for ind in range(i + 1, len(spans_to_replace)):
next_start, next_end, next_span, next_string = spans_to_replace[ind]
# skip empties
if next_end <= 0:
continue
# if overlap between ref span and current ref span, remove from replacement
if next_start < end:
next_start = 0
next_end = 0
next_string = ""
# if ref span abuts previous reference span
elif next_start == end:
next_start += shift_amount
next_end += shift_amount
next_string = btwn_padding + pre_padding + next_string + post_padding
# if ref span starts after, shift starts and ends
elif next_start > end:
next_start += shift_amount
next_end += shift_amount
next_string = pre_padding + next_string + post_padding
# save adjusted span
spans_to_replace[ind] = (next_start, next_end, next_span, next_string)
spans_to_replace = [entry for entry in spans_to_replace if entry[1] > 0]
spans_to_replace.sort(key=lambda x: x[0])
# apply shifts in series
for start, end, span, new_string in spans_to_replace:
assert full_string[start:end] == span
full_string = full_string[:start] + new_string + full_string[end:]
return full_string
def sub_spans_and_update_indices(
spans_to_replace: List[Tuple[int, int, str, str]],
full_string: str
) -> Tuple[str, List]:
"""
Replace all spans and recompute indices
:param spans_to_replace:
:param full_string:
:return:
"""
# TODO: check no spans overlapping
# TODO: check all spans well-formed
# assert all spans are equal to full_text span
assert all([full_string[start:end] == token for start, end, token, _ in spans_to_replace])
# assert none of the spans start with the same start ind
start_inds = [rep[0] for rep in spans_to_replace]
assert len(set(start_inds)) == len(start_inds)
# sort by start index
spans_to_replace.sort(key=lambda x: x[0])
# compute offsets for each span
new_spans = [[start, end, token, surface, 0] for start, end, token, surface in spans_to_replace]
for i, entry in enumerate(spans_to_replace):
start, end, token, surface = entry
new_end = start + len(surface)
offset = new_end - end
new_spans[i][1] += offset
for new_span_entry in new_spans[i+1:]:
new_span_entry[4] += offset
# generate new text and create final spans
new_text = replace_refspans(spans_to_replace, full_string, btwn_padding="")
new_spans = [(start + offset, end + offset, token, surface) for start, end, token, surface, offset in new_spans]
return new_text, new_spans
|