from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr from fontTools.misc import eexec from .psOperators import ( PSOperators, ps_StandardEncoding, ps_array, ps_boolean, ps_dict, ps_integer, ps_literal, ps_mark, ps_name, ps_operator, ps_procedure, ps_procmark, ps_real, ps_string, ) import re from collections.abc import Callable from string import whitespace import logging log = logging.getLogger(__name__) ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"])) endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"]) endofthingRE = re.compile(endofthingPat) commentRE = re.compile(b"%[^\n\r]*") # XXX This not entirely correct as it doesn't allow *nested* embedded parens: stringPat = rb""" \( ( ( [^()]* \ [()] ) | ( [^()]* \( [^()]* \) ) )* [^()]* \) """ stringPat = b"".join(stringPat.split()) stringRE = re.compile(stringPat) hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"])) class PSTokenError(Exception): pass class PSError(Exception): pass class PSTokenizer(object): def __init__(self, buf=b"", encoding="ascii"): # Force self.buf to be a byte string buf = tobytes(buf) self.buf = buf self.len = len(buf) self.pos = 0 self.closed = False self.encoding = encoding def read(self, n=-1): """Read at most 'n' bytes from the buffer, or less if the read hits EOF before obtaining 'n' bytes. If 'n' is negative or omitted, read all data until EOF is reached. """ if self.closed: raise ValueError("I/O operation on closed file") if n is None or n < 0: newpos = self.len else: newpos = min(self.pos + n, self.len) r = self.buf[self.pos : newpos] self.pos = newpos return r def close(self): if not self.closed: self.closed = True del self.buf, self.pos def getnexttoken( self, # localize some stuff, for performance len=len, ps_special=ps_special, stringmatch=stringRE.match, hexstringmatch=hexstringRE.match, commentmatch=commentRE.match, endmatch=endofthingRE.match, ): self.skipwhite() if self.pos >= self.len: return None, None pos = self.pos buf = self.buf char = bytechr(byteord(buf[pos])) if char in ps_special: if char in b"{}[]": tokentype = "do_special" token = char elif char == b"%": tokentype = "do_comment" _, nextpos = commentmatch(buf, pos).span() token = buf[pos:nextpos] elif char == b"(": tokentype = "do_string" m = stringmatch(buf, pos) if m is None: raise PSTokenError("bad string at character %d" % pos) _, nextpos = m.span() token = buf[pos:nextpos] elif char == b"<": tokentype = "do_hexstring" m = hexstringmatch(buf, pos) if m is None: raise PSTokenError("bad hexstring at character %d" % pos) _, nextpos = m.span() token = buf[pos:nextpos] else: raise PSTokenError("bad token at character %d" % pos) else: if char == b"/": tokentype = "do_literal" m = endmatch(buf, pos + 1) else: tokentype = "" m = endmatch(buf, pos) if m is None: raise PSTokenError("bad token at character %d" % pos) _, nextpos = m.span() token = buf[pos:nextpos] self.pos = pos + len(token) token = tostr(token, encoding=self.encoding) return tokentype, token def skipwhite(self, whitematch=skipwhiteRE.match): _, nextpos = whitematch(self.buf, self.pos).span() self.pos = nextpos def starteexec(self): self.pos = self.pos + 1 self.dirtybuf = self.buf[self.pos :] self.buf, R = eexec.decrypt(self.dirtybuf, 55665) self.len = len(self.buf) self.pos = 4 def stopeexec(self): if not hasattr(self, "dirtybuf"): return self.buf = self.dirtybuf del self.dirtybuf class PSInterpreter(PSOperators): def __init__(self, encoding="ascii"): systemdict = {} userdict = {} self.encoding = encoding self.dictstack = [systemdict, userdict] self.stack = [] self.proclevel = 0 self.procmark = ps_procmark() self.fillsystemdict() def fillsystemdict(self): systemdict = self.dictstack[0] systemdict["["] = systemdict["mark"] = self.mark = ps_mark() systemdict["]"] = ps_operator("]", self.do_makearray) systemdict["true"] = ps_boolean(1) systemdict["false"] = ps_boolean(0) systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding) systemdict["FontDirectory"] = ps_dict({}) self.suckoperators(systemdict, self.__class__) def suckoperators(self, systemdict, klass): for name in dir(klass): attr = getattr(self, name) if isinstance(attr, Callable) and name[:3] == "ps_": name = name[3:] systemdict[name] = ps_operator(name, attr) for baseclass in klass.__bases__: self.suckoperators(systemdict, baseclass) def interpret(self, data, getattr=getattr): tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) getnexttoken = tokenizer.getnexttoken do_token = self.do_token handle_object = self.handle_object try: while 1: tokentype, token = getnexttoken() if not token: break if tokentype: handler = getattr(self, tokentype) object = handler(token) else: object = do_token(token) if object is not None: handle_object(object) tokenizer.close() self.tokenizer = None except: if self.tokenizer is not None: log.debug( "ps error:\n" "- - - - - - -\n" "%s\n" ">>>\n" "%s\n" "- - - - - - -", self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos], self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50], ) raise def handle_object(self, object): if not (self.proclevel or object.literal or object.type == "proceduretype"): if object.type != "operatortype": object = self.resolve_name(object.value) if object.literal: self.push(object) else: if object.type == "proceduretype": self.call_procedure(object) else: object.function() else: self.push(object) def call_procedure(self, proc): handle_object = self.handle_object for item in proc.value: handle_object(item) def resolve_name(self, name): dictstack = self.dictstack for i in range(len(dictstack) - 1, -1, -1): if name in dictstack[i]: return dictstack[i][name] raise PSError("name error: " + str(name)) def do_token( self, token, int=int, float=float, ps_name=ps_name, ps_integer=ps_integer, ps_real=ps_real, ): try: num = int(token) except (ValueError, OverflowError): try: num = float(token) except (ValueError, OverflowError): if "#" in token: hashpos = token.find("#") try: base = int(token[:hashpos]) num = int(token[hashpos + 1 :], base) except (ValueError, OverflowError): return ps_name(token) else: return ps_integer(num) else: return ps_name(token) else: return ps_real(num) else: return ps_integer(num) def do_comment(self, token): pass def do_literal(self, token): return ps_literal(token[1:]) def do_string(self, token): return ps_string(token[1:-1]) def do_hexstring(self, token): hexStr = "".join(token[1:-1].split()) if len(hexStr) % 2: hexStr = hexStr + "0" cleanstr = [] for i in range(0, len(hexStr), 2): cleanstr.append(chr(int(hexStr[i : i + 2], 16))) cleanstr = "".join(cleanstr) return ps_string(cleanstr) def do_special(self, token): if token == "{": self.proclevel = self.proclevel + 1 return self.procmark elif token == "}": proc = [] while 1: topobject = self.pop() if topobject == self.procmark: break proc.append(topobject) self.proclevel = self.proclevel - 1 proc.reverse() return ps_procedure(proc) elif token == "[": return self.mark elif token == "]": return ps_name("]") else: raise PSTokenError("huh?") def push(self, object): self.stack.append(object) def pop(self, *types): stack = self.stack if not stack: raise PSError("stack underflow") object = stack[-1] if types: if object.type not in types: raise PSError( "typecheck, expected %s, found %s" % (repr(types), object.type) ) del stack[-1] return object def do_makearray(self): array = [] while 1: topobject = self.pop() if topobject == self.mark: break array.append(topobject) array.reverse() self.push(ps_array(array)) def close(self): """Remove circular references.""" del self.stack del self.dictstack def unpack_item(item): tp = type(item.value) if tp == dict: newitem = {} for key, value in item.value.items(): newitem[key] = unpack_item(value) elif tp == list: newitem = [None] * len(item.value) for i in range(len(item.value)): newitem[i] = unpack_item(item.value[i]) if item.type == "proceduretype": newitem = tuple(newitem) else: newitem = item.value return newitem def suckfont(data, encoding="ascii"): m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data) if m: fontName = m.group(1) fontName = fontName.decode() else: fontName = None interpreter = PSInterpreter(encoding=encoding) interpreter.interpret( b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop" ) interpreter.interpret(data) fontdir = interpreter.dictstack[0]["FontDirectory"].value if fontName in fontdir: rawfont = fontdir[fontName] else: # fall back, in case fontName wasn't found fontNames = list(fontdir.keys()) if len(fontNames) > 1: fontNames.remove("Helvetica") fontNames.sort() rawfont = fontdir[fontNames[0]] interpreter.close() return unpack_item(rawfont)