|
"""Extend the Python codecs module with a few encodings that are used in OpenType (name table) |
|
but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details.""" |
|
|
|
import codecs |
|
import encodings |
|
|
|
|
|
class ExtendCodec(codecs.Codec): |
|
def __init__(self, name, base_encoding, mapping): |
|
self.name = name |
|
self.base_encoding = base_encoding |
|
self.mapping = mapping |
|
self.reverse = {v: k for k, v in mapping.items()} |
|
self.max_len = max(len(v) for v in mapping.values()) |
|
self.info = codecs.CodecInfo( |
|
name=self.name, encode=self.encode, decode=self.decode |
|
) |
|
codecs.register_error(name, self.error) |
|
|
|
def _map(self, mapper, output_type, exc_type, input, errors): |
|
base_error_handler = codecs.lookup_error(errors) |
|
length = len(input) |
|
out = output_type() |
|
while input: |
|
|
|
try: |
|
part = mapper(input, self.base_encoding, errors=self.name) |
|
out += part |
|
break |
|
except exc_type as e: |
|
|
|
out += mapper(input[: e.start], self.base_encoding, self.name) |
|
replacement, pos = base_error_handler(e) |
|
out += replacement |
|
input = input[pos:] |
|
return out, length |
|
|
|
def encode(self, input, errors="strict"): |
|
return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) |
|
|
|
def decode(self, input, errors="strict"): |
|
return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) |
|
|
|
def error(self, e): |
|
if isinstance(e, UnicodeDecodeError): |
|
for end in range(e.start + 1, e.end + 1): |
|
s = e.object[e.start : end] |
|
if s in self.mapping: |
|
return self.mapping[s], end |
|
elif isinstance(e, UnicodeEncodeError): |
|
for end in range(e.start + 1, e.start + self.max_len + 1): |
|
s = e.object[e.start : end] |
|
if s in self.reverse: |
|
return self.reverse[s], end |
|
e.encoding = self.name |
|
raise e |
|
|
|
|
|
_extended_encodings = { |
|
"x_mac_japanese_ttx": ( |
|
"shift_jis", |
|
{ |
|
b"\xFC": chr(0x007C), |
|
b"\x7E": chr(0x007E), |
|
b"\x80": chr(0x005C), |
|
b"\xA0": chr(0x00A0), |
|
b"\xFD": chr(0x00A9), |
|
b"\xFE": chr(0x2122), |
|
b"\xFF": chr(0x2026), |
|
}, |
|
), |
|
"x_mac_trad_chinese_ttx": ( |
|
"big5", |
|
{ |
|
b"\x80": chr(0x005C), |
|
b"\xA0": chr(0x00A0), |
|
b"\xFD": chr(0x00A9), |
|
b"\xFE": chr(0x2122), |
|
b"\xFF": chr(0x2026), |
|
}, |
|
), |
|
"x_mac_korean_ttx": ( |
|
"euc_kr", |
|
{ |
|
b"\x80": chr(0x00A0), |
|
b"\x81": chr(0x20A9), |
|
b"\x82": chr(0x2014), |
|
b"\x83": chr(0x00A9), |
|
b"\xFE": chr(0x2122), |
|
b"\xFF": chr(0x2026), |
|
}, |
|
), |
|
"x_mac_simp_chinese_ttx": ( |
|
"gb2312", |
|
{ |
|
b"\x80": chr(0x00FC), |
|
b"\xA0": chr(0x00A0), |
|
b"\xFD": chr(0x00A9), |
|
b"\xFE": chr(0x2122), |
|
b"\xFF": chr(0x2026), |
|
}, |
|
), |
|
} |
|
|
|
_cache = {} |
|
|
|
|
|
def search_function(name): |
|
name = encodings.normalize_encoding(name) |
|
if name in _extended_encodings: |
|
if name not in _cache: |
|
base_encoding, mapping = _extended_encodings[name] |
|
assert name[-4:] == "_ttx" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
base_encodings = [name[:-4], base_encoding] |
|
for base_encoding in base_encodings: |
|
try: |
|
codecs.lookup(base_encoding) |
|
except LookupError: |
|
continue |
|
_cache[name] = ExtendCodec(name, base_encoding, mapping) |
|
break |
|
return _cache[name].info |
|
|
|
return None |
|
|
|
|
|
codecs.register(search_function) |
|
|