bayan-api / tests /test_diff.py
youssefreda9's picture
ui: Clean up editor placeholder text and alignment (top-right)
3c20b28
Raw
History Blame Contribute Delete
2.27 kB
import difflib
import re
def get_word_positions(text):
positions = []
for m in re.finditer(r'\S+', text):
positions.append((m.group(), m.start(), m.end()))
return positions
def get_word_diffs(original, corrected):
orig_words = get_word_positions(original)
corr_words = get_word_positions(corrected)
s = difflib.SequenceMatcher(None, [w[0] for w in orig_words], [w[0] for w in corr_words])
suggestions = []
for tag, i1, i2, j1, j2 in s.get_opcodes():
if tag == 'replace':
if i1 < len(orig_words) and i2 - 1 < len(orig_words):
start_char = orig_words[i1][1]
end_char = orig_words[i2-1][2]
suggestions.append({
'start': start_char,
'end': end_char,
'original': original[start_char:end_char],
'correction': " ".join([w[0] for w in corr_words[j1:j2]]),
'type': 'generic'
})
elif tag == 'delete':
if i1 < len(orig_words) and i2 - 1 < len(orig_words):
start_char = orig_words[i1][1]
end_char = orig_words[i2-1][2]
suggestions.append({
'start': start_char,
'end': end_char,
'original': original[start_char:end_char],
'correction': '',
'type': 'generic'
})
elif tag == 'insert':
pos = orig_words[i1][1] if i1 < len(orig_words) else len(original)
suggestions.append({
'start': pos,
'end': pos,
'original': '',
'correction': " ".join([w[0] for w in corr_words[j1:j2]]),
'type': 'generic'
})
return suggestions
def test():
original = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
corrected = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوبات..."
diffs = get_word_diffs(original, corrected)
for d in diffs:
print(d)
if __name__ == "__main__":
test()