File size: 5,776 Bytes
04b0dd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"Soros interpreter (see http://numbertext.org)"
from __future__ import unicode_literals
from __future__ import print_function
import re
import sys


def run(program, data, lang):
    return compile(program, lang).run(data)


def compile(program, lang):
    return _Soros(program, lang)

# conversion function


def _tr(text, chars, chars2, delim):
    for i in range(0, len(chars)):
        text = text.replace(delim + chars[i], chars2[i])
    return text


# string literals for metacharacter encoding
_m = "\\\";#$()|[]"
# Unicode private area
_c = u"\uE000\uE001\uE002\uE003\uE004\uE005\uE006\uE007\uE008\uE009"
_pipe = u"\uE003"
# separator prefix = \uE00A

# pattern to recognize function calls in the replacement string
_func = re.compile(_tr(r"""(?:\|?(?:\$\()+)?  # optional nested calls
                (\|?\$\(([^\(\)]*)\)\|?)      # inner call (2 subgroups)
                (?:\)+\|?)?""",               # optional nested calls
                       _m[4:8], _c[:4], "\\"), re.X)  # \$, \(, \), \| -> \uE000..\uE003


class _Soros:
    def __init__(self, prg, lang):
        self.lines = []
        if prg.find("__numbertext__") == -1:
            prg = "__numbertext__;" + prg
        # default left zero deletion
        # and separator function (no separation, if subcall returns with empty string)
        prg = prg.replace("__numbertext__", u"""0+(0|[1-9]\\d*) $1
\"([a-z][-a-z]* )0+(0|[1-9]\\d*)\" $(\\1\\2)
\"\uE00A(.*)\uE00A(.+)\uE00A(.*)\" \\1\\2\\3
\"\uE00A.*\uE00A\uE00A.*\"
""")
        prg = _tr(prg, _m[:4], _c[:4],
                  "\\")  # \\, \", \;, \# -> \uE000..\uE003
        # switch off all country-dependent lines, and switch on the requested ones
        prg = re.sub(
            r"(^|[\n;])([^\n;#]*#[^\n]*[\[]:[^\n:\]]*:][^\n]*)", r"\1#\2", prg)
        prg = re.sub(r"(^|[\n;])#([^\n;#]*#[^\n]*[\[]:" +
                     lang.replace("_", "-") + r":][^\n]*)", r"\1\2", prg)
        matchline = re.compile("^\s*(\"[^\"]*\"|[^\s]*)\s*(.*[^\s])?\s*$")
        prefix = ""
        for s in re.sub("(#[^\n]*)?(\n|$)", ";", prg).split(";"):
            macro = re.match("== *(.*[^ ]?) ==", s)
            if macro != None:
                prefix = macro.group(1)
                continue
            m = matchline.match(s)
            if prefix != "" and s != "" and m != None:
                s = m.group(1).strip("\"")
                space = " " if s != "" else ""
                caret = ""
                if s[0:1] == "^":
                    s = s[1:]
                    caret = "^"
                s2 = m.group(2) if m.group(2) != None else ""
                s = "\"" + caret + prefix + space + s + "\" " + s2
                m = matchline.match(s)
            if m != None:
                s = _tr(m.group(1).strip("\""), _c[1:4], _m[1:4], "") \
                    .replace(_c[_m.find("\\")], "\\\\")  # -> \\, ", ;, #
                if m.group(2) != None:
                    s2 = m.group(2).strip("\"")
                else:
                    s2 = ""
                # \$, \(, \), \|, \[, \] -> \uE004..\uE009
                s2 = _tr(s2, _m[4:], _c[4:], "\\")
                # call inner separator: [ ... $1 ... ] -> $(\uE00A ... \uE00A$1\uE00A ... )
                s2 = re.sub(r"[\[]\$(\d\d?|\([^\)]+\))",
                            u"$(\uE00A\uE00A|$\\1\uE00A", s2)
                s2 = re.sub(r"[\[]([^\$[\\]*)\$(\d\d?|\([^\)]+\))",
                            u"$(\uE00A\\1\uE00A$\\2\uE00A", s2)
                # add "|" in terminating position
                s2 = re.sub(r"\uE00A]$", "|\uE00A)", s2)
                s2 = re.sub(r"]", ")", s2)
                s2 = re.sub(r"(\$\d|\))\|\$", r"\1||$",
                            s2)  # $()|$() -> $()||$()
                # \uE000..\uE003-> \, ", ;, #
                s2 = _tr(s2, _c[:4], _m[:4], "")
                # $, (, ), | -> \uE000..\uE003
                s2 = _tr(s2, _m[4:8], _c[:4], "")
                # \uE004..\uE009 -> $, (, ), |, [, ]
                s2 = _tr(s2, _c[4:], _m[4:], "")
                s2 = re.sub(r"\\(\d)", r"\\g<\1>",
                            re.sub(r"\uE000(\d)", "\uE000\uE001\\\\g<\\1>\uE002", s2))
                try:
                    self.lines = self.lines + [[
                        re.compile("^" + s.lstrip("^").rstrip("$") + "$"),
                        s2, s[:1] == "^", s[-1:] == "$"]]
                except:
                    print("Error in following regex line: " + s, file=sys.stderr)
                    raise

    def run(self, data):
        return self._run(data, True, True)

    def _run(self, data, begin, end):
        for i in self.lines:
            if not ((begin == False and i[2]) or (end == False and i[3])):
                m = i[0].match(data)
                if m:
                    try:
                        s = m.expand(i[1])
                    except:
                        print("Error for the following input: " +
                              data, file=sys.stderr)
                        raise
                    n = _func.search(s)
                    while n:
                        b = False
                        e = False
                        if n.group(1)[0:1] == _pipe or n.group()[0:1] == _pipe:
                            b = True
                        elif n.start() == 0:
                            b = begin
                        if n.group(1)[-1:] == _pipe or n.group()[-1:] == _pipe:
                            e = True
                        elif n.end() == len(s):
                            e = end
                        s = s[:n.start(1)] + self._run(n.group(2),
                                                       b, e) + s[n.end(1):]
                        n = _func.search(s)
                    return s
        return ""