Spaces:
Running
Running
Update overlap.py
Browse files- overlap.py +39 -0
overlap.py
CHANGED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Utilities to get overlap between strings
|
| 2 |
+
|
| 3 |
+
def get_overlap_length(left: str, right: str):
|
| 4 |
+
good_length, overlap = 0, ""
|
| 5 |
+
for i in range(min(len(left), len(right))):
|
| 6 |
+
if left[-i:] == right[:i]:
|
| 7 |
+
good_length = i
|
| 8 |
+
overlap = left[-i:]
|
| 9 |
+
return good_length, overlap
|
| 10 |
+
|
| 11 |
+
def get_overlap_list(strings):
|
| 12 |
+
"""
|
| 13 |
+
Returns a list of tuples of the form (overlap_length, overlap), one tuple for each pair of strings in the input list.
|
| 14 |
+
"""
|
| 15 |
+
overlaps = []
|
| 16 |
+
for i in range(len(strings) - 1):
|
| 17 |
+
overlaps.append(get_overlap_length(strings[i], strings[i+1]))
|
| 18 |
+
return overlaps
|
| 19 |
+
|
| 20 |
+
def unoverlap_list(strings):
|
| 21 |
+
"""
|
| 22 |
+
Returns a list of tuples of the form (content, is_overlap), where is_overlap is a boolean indicating whether the content is an overlap or not.
|
| 23 |
+
"""
|
| 24 |
+
overlaps = get_overlap_list(strings)
|
| 25 |
+
new_list = []
|
| 26 |
+
for index, string in enumerate(strings):
|
| 27 |
+
# Add the last overlap when needed
|
| 28 |
+
if index > 0 and len(overlaps[index-1][1]) > 0:
|
| 29 |
+
new_list.append((overlaps[index-1][1], True))
|
| 30 |
+
|
| 31 |
+
# prune the string with left and right overlaps
|
| 32 |
+
left_overlap_length, right_overlap_length = 0, 0
|
| 33 |
+
if index > 0:
|
| 34 |
+
left_overlap_length = overlaps[index-1][0]
|
| 35 |
+
if index < len(strings) - 1:
|
| 36 |
+
right_overlap_length = overlaps[index][0]
|
| 37 |
+
|
| 38 |
+
new_list.append((string[left_overlap_length:len(string)-right_overlap_length], False))
|
| 39 |
+
return new_list
|