"Soros interpreter (see http://numbertext.org)" from __future__ import unicode_literals from __future__ import print_function import re import sys def run(program, data, lang): return compile(program, lang).run(data) def compile(program, lang): return _Soros(program, lang) # conversion function def _tr(text, chars, chars2, delim): for i in range(0, len(chars)): text = text.replace(delim + chars[i], chars2[i]) return text # string literals for metacharacter encoding _m = "\\\";#$()|[]" # Unicode private area _c = u"\uE000\uE001\uE002\uE003\uE004\uE005\uE006\uE007\uE008\uE009" _pipe = u"\uE003" # separator prefix = \uE00A # pattern to recognize function calls in the replacement string _func = re.compile(_tr(r"""(?:\|?(?:\$\()+)? # optional nested calls (\|?\$\(([^\(\)]*)\)\|?) # inner call (2 subgroups) (?:\)+\|?)?""", # optional nested calls _m[4:8], _c[:4], "\\"), re.X) # \$, \(, \), \| -> \uE000..\uE003 class _Soros: def __init__(self, prg, lang): self.lines = [] if prg.find("__numbertext__") == -1: prg = "__numbertext__;" + prg # default left zero deletion # and separator function (no separation, if subcall returns with empty string) prg = prg.replace("__numbertext__", u"""0+(0|[1-9]\\d*) $1 \"([a-z][-a-z]* )0+(0|[1-9]\\d*)\" $(\\1\\2) \"\uE00A(.*)\uE00A(.+)\uE00A(.*)\" \\1\\2\\3 \"\uE00A.*\uE00A\uE00A.*\" """) prg = _tr(prg, _m[:4], _c[:4], "\\") # \\, \", \;, \# -> \uE000..\uE003 # switch off all country-dependent lines, and switch on the requested ones prg = re.sub( r"(^|[\n;])([^\n;#]*#[^\n]*[\[]:[^\n:\]]*:][^\n]*)", r"\1#\2", prg) prg = re.sub(r"(^|[\n;])#([^\n;#]*#[^\n]*[\[]:" + lang.replace("_", "-") + r":][^\n]*)", r"\1\2", prg) matchline = re.compile("^\s*(\"[^\"]*\"|[^\s]*)\s*(.*[^\s])?\s*$") prefix = "" for s in re.sub("(#[^\n]*)?(\n|$)", ";", prg).split(";"): macro = re.match("== *(.*[^ ]?) ==", s) if macro != None: prefix = macro.group(1) continue m = matchline.match(s) if prefix != "" and s != "" and m != None: s = m.group(1).strip("\"") space = " " if s != "" else "" caret = "" if s[0:1] == "^": s = s[1:] caret = "^" s2 = m.group(2) if m.group(2) != None else "" s = "\"" + caret + prefix + space + s + "\" " + s2 m = matchline.match(s) if m != None: s = _tr(m.group(1).strip("\""), _c[1:4], _m[1:4], "") \ .replace(_c[_m.find("\\")], "\\\\") # -> \\, ", ;, # if m.group(2) != None: s2 = m.group(2).strip("\"") else: s2 = "" # \$, \(, \), \|, \[, \] -> \uE004..\uE009 s2 = _tr(s2, _m[4:], _c[4:], "\\") # call inner separator: [ ... $1 ... ] -> $(\uE00A ... \uE00A$1\uE00A ... ) s2 = re.sub(r"[\[]\$(\d\d?|\([^\)]+\))", u"$(\uE00A\uE00A|$\\1\uE00A", s2) s2 = re.sub(r"[\[]([^\$[\\]*)\$(\d\d?|\([^\)]+\))", u"$(\uE00A\\1\uE00A$\\2\uE00A", s2) # add "|" in terminating position s2 = re.sub(r"\uE00A]$", "|\uE00A)", s2) s2 = re.sub(r"]", ")", s2) s2 = re.sub(r"(\$\d|\))\|\$", r"\1||$", s2) # $()|$() -> $()||$() # \uE000..\uE003-> \, ", ;, # s2 = _tr(s2, _c[:4], _m[:4], "") # $, (, ), | -> \uE000..\uE003 s2 = _tr(s2, _m[4:8], _c[:4], "") # \uE004..\uE009 -> $, (, ), |, [, ] s2 = _tr(s2, _c[4:], _m[4:], "") s2 = re.sub(r"\\(\d)", r"\\g<\1>", re.sub(r"\uE000(\d)", "\uE000\uE001\\\\g<\\1>\uE002", s2)) try: self.lines = self.lines + [[ re.compile("^" + s.lstrip("^").rstrip("$") + "$"), s2, s[:1] == "^", s[-1:] == "$"]] except: print("Error in following regex line: " + s, file=sys.stderr) raise def run(self, data): return self._run(data, True, True) def _run(self, data, begin, end): for i in self.lines: if not ((begin == False and i[2]) or (end == False and i[3])): m = i[0].match(data) if m: try: s = m.expand(i[1]) except: print("Error for the following input: " + data, file=sys.stderr) raise n = _func.search(s) while n: b = False e = False if n.group(1)[0:1] == _pipe or n.group()[0:1] == _pipe: b = True elif n.start() == 0: b = begin if n.group(1)[-1:] == _pipe or n.group()[-1:] == _pipe: e = True elif n.end() == len(s): e = end s = s[:n.start(1)] + self._run(n.group(2), b, e) + s[n.end(1):] n = _func.search(s) return s return ""