File size: 2,691 Bytes
e49dd9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""A script from https://github.com/zhopto3/morpho-baseline defining
the edit tree class learned from the pledari grond dictionaries"""

from difflib import SequenceMatcher


class EditTreeNode(object):
    def __init__(self, val):
        self.left = None
        self.right = None
        self.val = val

    def apply(self, word):
        """Applies learned edit rules to a string; 
        I've changed the original code so that it lemmatizes rather than completes a pradigm"""
        assert isinstance(word, str)
        if isinstance(self.val[0], str):  # replace
            if word == self.val[0]:
                return self.val[1]

            return -1
        if isinstance(self.val[0], int):  # split
            assert isinstance(self.left, EditTreeNode)
            assert isinstance(self.right, EditTreeNode)
            word_left = word[: self.val[0]]
            word_mid = word[self.val[0] : len(word) - self.val[1]]
            word_right = word[len(word) - self.val[1] :]

            word_left = self.left.apply(word_left)
            word_right = self.right.apply(word_right)

            if word_left == -1 or word_right == -1:
                return -1
            out = word_left + word_mid + word_right
            return out

    def __str__(self):
        if self.left is None:  # leaf
            return str(self.val)

        left_str = str(self.left)
        right_str = str(self.right)
        ret = str(self.val) + "\n"
        for line in left_str.split("\n"):
            ret += "  " + line + "\n"
        for line in right_str.split("\n"):
            ret += "  " + line + "\n"
        return ret.strip()

    def __hash__(self):
        return hash(self.__str__())

    def __eq__(self, other):
        if not isinstance(other, EditTreeNode):
            return False

        if (
            (self.left == other.left)
            and (self.right == other.right)
            and (self.val == other.val)
        ):
            return True
        return False


def longestSubstring(str1, str2):
    """Get the longest substring between two str"""
    seqMatch = SequenceMatcher(None, str1, str2)
    match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
    return (match.a, match.b, match.size)


def editTree(str1, str2):
    """Define the edit rules between any two strings"""
    if str1 is None or str2 is None:
        return None
    idx1, idx2, size = longestSubstring(str1, str2)
    if size == 0:
        return EditTreeNode((str1, str2))
    node = EditTreeNode((idx1, len(str1) - idx1 - size))
    node.left = editTree(str1[:idx1], str2[:idx2])
    node.right = editTree(str1[idx1 + size :], str2[idx2 + size :])
    return node