Spaces:
Runtime error
Runtime error
from fontTools.misc import sstruct | |
from fontTools.misc.roundTools import otRound | |
from fontTools.misc.textTools import safeEval, num2binary, binary2num | |
from fontTools.ttLib.tables import DefaultTable | |
import bisect | |
import logging | |
log = logging.getLogger(__name__) | |
# panose classification | |
panoseFormat = """ | |
bFamilyType: B | |
bSerifStyle: B | |
bWeight: B | |
bProportion: B | |
bContrast: B | |
bStrokeVariation: B | |
bArmStyle: B | |
bLetterForm: B | |
bMidline: B | |
bXHeight: B | |
""" | |
class Panose(object): | |
def __init__(self, **kwargs): | |
_, names, _ = sstruct.getformat(panoseFormat) | |
for name in names: | |
setattr(self, name, kwargs.pop(name, 0)) | |
for k in kwargs: | |
raise TypeError(f"Panose() got an unexpected keyword argument {k!r}") | |
def toXML(self, writer, ttFont): | |
formatstring, names, fixes = sstruct.getformat(panoseFormat) | |
for name in names: | |
writer.simpletag(name, value=getattr(self, name)) | |
writer.newline() | |
def fromXML(self, name, attrs, content, ttFont): | |
setattr(self, name, safeEval(attrs["value"])) | |
# 'sfnt' OS/2 and Windows Metrics table - 'OS/2' | |
OS2_format_0 = """ | |
> # big endian | |
version: H # version | |
xAvgCharWidth: h # average character width | |
usWeightClass: H # degree of thickness of strokes | |
usWidthClass: H # aspect ratio | |
fsType: H # type flags | |
ySubscriptXSize: h # subscript horizontal font size | |
ySubscriptYSize: h # subscript vertical font size | |
ySubscriptXOffset: h # subscript x offset | |
ySubscriptYOffset: h # subscript y offset | |
ySuperscriptXSize: h # superscript horizontal font size | |
ySuperscriptYSize: h # superscript vertical font size | |
ySuperscriptXOffset: h # superscript x offset | |
ySuperscriptYOffset: h # superscript y offset | |
yStrikeoutSize: h # strikeout size | |
yStrikeoutPosition: h # strikeout position | |
sFamilyClass: h # font family class and subclass | |
panose: 10s # panose classification number | |
ulUnicodeRange1: L # character range | |
ulUnicodeRange2: L # character range | |
ulUnicodeRange3: L # character range | |
ulUnicodeRange4: L # character range | |
achVendID: 4s # font vendor identification | |
fsSelection: H # font selection flags | |
usFirstCharIndex: H # first unicode character index | |
usLastCharIndex: H # last unicode character index | |
sTypoAscender: h # typographic ascender | |
sTypoDescender: h # typographic descender | |
sTypoLineGap: h # typographic line gap | |
usWinAscent: H # Windows ascender | |
usWinDescent: H # Windows descender | |
""" | |
OS2_format_1_addition = """ | |
ulCodePageRange1: L | |
ulCodePageRange2: L | |
""" | |
OS2_format_2_addition = ( | |
OS2_format_1_addition | |
+ """ | |
sxHeight: h | |
sCapHeight: h | |
usDefaultChar: H | |
usBreakChar: H | |
usMaxContext: H | |
""" | |
) | |
OS2_format_5_addition = ( | |
OS2_format_2_addition | |
+ """ | |
usLowerOpticalPointSize: H | |
usUpperOpticalPointSize: H | |
""" | |
) | |
bigendian = " > # big endian\n" | |
OS2_format_1 = OS2_format_0 + OS2_format_1_addition | |
OS2_format_2 = OS2_format_0 + OS2_format_2_addition | |
OS2_format_5 = OS2_format_0 + OS2_format_5_addition | |
OS2_format_1_addition = bigendian + OS2_format_1_addition | |
OS2_format_2_addition = bigendian + OS2_format_2_addition | |
OS2_format_5_addition = bigendian + OS2_format_5_addition | |
class table_O_S_2f_2(DefaultTable.DefaultTable): | |
"""the OS/2 table""" | |
dependencies = ["head"] | |
def decompile(self, data, ttFont): | |
dummy, data = sstruct.unpack2(OS2_format_0, data, self) | |
if self.version == 1: | |
dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self) | |
elif self.version in (2, 3, 4): | |
dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self) | |
elif self.version == 5: | |
dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self) | |
self.usLowerOpticalPointSize /= 20 | |
self.usUpperOpticalPointSize /= 20 | |
elif self.version != 0: | |
from fontTools import ttLib | |
raise ttLib.TTLibError( | |
"unknown format for OS/2 table: version %s" % self.version | |
) | |
if len(data): | |
log.warning("too much 'OS/2' table data") | |
self.panose = sstruct.unpack(panoseFormat, self.panose, Panose()) | |
def compile(self, ttFont): | |
self.updateFirstAndLastCharIndex(ttFont) | |
panose = self.panose | |
head = ttFont["head"] | |
if (self.fsSelection & 1) and not (head.macStyle & 1 << 1): | |
log.warning( | |
"fsSelection bit 0 (italic) and " | |
"head table macStyle bit 1 (italic) should match" | |
) | |
if (self.fsSelection & 1 << 5) and not (head.macStyle & 1): | |
log.warning( | |
"fsSelection bit 5 (bold) and " | |
"head table macStyle bit 0 (bold) should match" | |
) | |
if (self.fsSelection & 1 << 6) and (self.fsSelection & 1 + (1 << 5)): | |
log.warning( | |
"fsSelection bit 6 (regular) is set, " | |
"bits 0 (italic) and 5 (bold) must be clear" | |
) | |
if self.version < 4 and self.fsSelection & 0b1110000000: | |
log.warning( | |
"fsSelection bits 7, 8 and 9 are only defined in " | |
"OS/2 table version 4 and up: version %s", | |
self.version, | |
) | |
self.panose = sstruct.pack(panoseFormat, self.panose) | |
if self.version == 0: | |
data = sstruct.pack(OS2_format_0, self) | |
elif self.version == 1: | |
data = sstruct.pack(OS2_format_1, self) | |
elif self.version in (2, 3, 4): | |
data = sstruct.pack(OS2_format_2, self) | |
elif self.version == 5: | |
d = self.__dict__.copy() | |
d["usLowerOpticalPointSize"] = round(self.usLowerOpticalPointSize * 20) | |
d["usUpperOpticalPointSize"] = round(self.usUpperOpticalPointSize * 20) | |
data = sstruct.pack(OS2_format_5, d) | |
else: | |
from fontTools import ttLib | |
raise ttLib.TTLibError( | |
"unknown format for OS/2 table: version %s" % self.version | |
) | |
self.panose = panose | |
return data | |
def toXML(self, writer, ttFont): | |
writer.comment( | |
"The fields 'usFirstCharIndex' and 'usLastCharIndex'\n" | |
"will be recalculated by the compiler" | |
) | |
writer.newline() | |
if self.version == 1: | |
format = OS2_format_1 | |
elif self.version in (2, 3, 4): | |
format = OS2_format_2 | |
elif self.version == 5: | |
format = OS2_format_5 | |
else: | |
format = OS2_format_0 | |
formatstring, names, fixes = sstruct.getformat(format) | |
for name in names: | |
value = getattr(self, name) | |
if name == "panose": | |
writer.begintag("panose") | |
writer.newline() | |
value.toXML(writer, ttFont) | |
writer.endtag("panose") | |
elif name in ( | |
"ulUnicodeRange1", | |
"ulUnicodeRange2", | |
"ulUnicodeRange3", | |
"ulUnicodeRange4", | |
"ulCodePageRange1", | |
"ulCodePageRange2", | |
): | |
writer.simpletag(name, value=num2binary(value)) | |
elif name in ("fsType", "fsSelection"): | |
writer.simpletag(name, value=num2binary(value, 16)) | |
elif name == "achVendID": | |
writer.simpletag(name, value=repr(value)[1:-1]) | |
else: | |
writer.simpletag(name, value=value) | |
writer.newline() | |
def fromXML(self, name, attrs, content, ttFont): | |
if name == "panose": | |
self.panose = panose = Panose() | |
for element in content: | |
if isinstance(element, tuple): | |
name, attrs, content = element | |
panose.fromXML(name, attrs, content, ttFont) | |
elif name in ( | |
"ulUnicodeRange1", | |
"ulUnicodeRange2", | |
"ulUnicodeRange3", | |
"ulUnicodeRange4", | |
"ulCodePageRange1", | |
"ulCodePageRange2", | |
"fsType", | |
"fsSelection", | |
): | |
setattr(self, name, binary2num(attrs["value"])) | |
elif name == "achVendID": | |
setattr(self, name, safeEval("'''" + attrs["value"] + "'''")) | |
else: | |
setattr(self, name, safeEval(attrs["value"])) | |
def updateFirstAndLastCharIndex(self, ttFont): | |
if "cmap" not in ttFont: | |
return | |
codes = set() | |
for table in getattr(ttFont["cmap"], "tables", []): | |
if table.isUnicode(): | |
codes.update(table.cmap.keys()) | |
if codes: | |
minCode = min(codes) | |
maxCode = max(codes) | |
# USHORT cannot hold codepoints greater than 0xFFFF | |
self.usFirstCharIndex = min(0xFFFF, minCode) | |
self.usLastCharIndex = min(0xFFFF, maxCode) | |
# misspelled attributes kept for legacy reasons | |
def usMaxContex(self): | |
return self.usMaxContext | |
def usMaxContex(self, value): | |
self.usMaxContext = value | |
def fsFirstCharIndex(self): | |
return self.usFirstCharIndex | |
def fsFirstCharIndex(self, value): | |
self.usFirstCharIndex = value | |
def fsLastCharIndex(self): | |
return self.usLastCharIndex | |
def fsLastCharIndex(self, value): | |
self.usLastCharIndex = value | |
def getUnicodeRanges(self): | |
"""Return the set of 'ulUnicodeRange*' bits currently enabled.""" | |
bits = set() | |
ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2 | |
ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4 | |
for i in range(32): | |
if ul1 & (1 << i): | |
bits.add(i) | |
if ul2 & (1 << i): | |
bits.add(i + 32) | |
if ul3 & (1 << i): | |
bits.add(i + 64) | |
if ul4 & (1 << i): | |
bits.add(i + 96) | |
return bits | |
def setUnicodeRanges(self, bits): | |
"""Set the 'ulUnicodeRange*' fields to the specified 'bits'.""" | |
ul1, ul2, ul3, ul4 = 0, 0, 0, 0 | |
for bit in bits: | |
if 0 <= bit < 32: | |
ul1 |= 1 << bit | |
elif 32 <= bit < 64: | |
ul2 |= 1 << (bit - 32) | |
elif 64 <= bit < 96: | |
ul3 |= 1 << (bit - 64) | |
elif 96 <= bit < 123: | |
ul4 |= 1 << (bit - 96) | |
else: | |
raise ValueError("expected 0 <= int <= 122, found: %r" % bit) | |
self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2 | |
self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4 | |
def recalcUnicodeRanges(self, ttFont, pruneOnly=False): | |
"""Intersect the codepoints in the font's Unicode cmap subtables with | |
the Unicode block ranges defined in the OpenType specification (v1.7), | |
and set the respective 'ulUnicodeRange*' bits if there is at least ONE | |
intersection. | |
If 'pruneOnly' is True, only clear unused bits with NO intersection. | |
""" | |
unicodes = set() | |
for table in ttFont["cmap"].tables: | |
if table.isUnicode(): | |
unicodes.update(table.cmap.keys()) | |
if pruneOnly: | |
empty = intersectUnicodeRanges(unicodes, inverse=True) | |
bits = self.getUnicodeRanges() - empty | |
else: | |
bits = intersectUnicodeRanges(unicodes) | |
self.setUnicodeRanges(bits) | |
return bits | |
def getCodePageRanges(self): | |
"""Return the set of 'ulCodePageRange*' bits currently enabled.""" | |
bits = set() | |
if self.version < 1: | |
return bits | |
ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2 | |
for i in range(32): | |
if ul1 & (1 << i): | |
bits.add(i) | |
if ul2 & (1 << i): | |
bits.add(i + 32) | |
return bits | |
def setCodePageRanges(self, bits): | |
"""Set the 'ulCodePageRange*' fields to the specified 'bits'.""" | |
ul1, ul2 = 0, 0 | |
for bit in bits: | |
if 0 <= bit < 32: | |
ul1 |= 1 << bit | |
elif 32 <= bit < 64: | |
ul2 |= 1 << (bit - 32) | |
else: | |
raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}") | |
if self.version < 1: | |
self.version = 1 | |
self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2 | |
def recalcCodePageRanges(self, ttFont, pruneOnly=False): | |
unicodes = set() | |
for table in ttFont["cmap"].tables: | |
if table.isUnicode(): | |
unicodes.update(table.cmap.keys()) | |
bits = calcCodePageRanges(unicodes) | |
if pruneOnly: | |
bits &= self.getCodePageRanges() | |
# when no codepage ranges can be enabled, fall back to enabling bit 0 | |
# (Latin 1) so that the font works in MS Word: | |
# https://github.com/googlei18n/fontmake/issues/468 | |
if not bits: | |
bits = {0} | |
self.setCodePageRanges(bits) | |
return bits | |
def recalcAvgCharWidth(self, ttFont): | |
"""Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table. | |
Set it to 0 if the unlikely event 'hmtx' table is not found. | |
""" | |
avg_width = 0 | |
hmtx = ttFont.get("hmtx") | |
if hmtx is not None: | |
widths = [width for width, _ in hmtx.metrics.values() if width > 0] | |
if widths: | |
avg_width = otRound(sum(widths) / len(widths)) | |
self.xAvgCharWidth = avg_width | |
return avg_width | |
# Unicode ranges data from the OpenType OS/2 table specification v1.7 | |
OS2_UNICODE_RANGES = ( | |
(("Basic Latin", (0x0000, 0x007F)),), | |
(("Latin-1 Supplement", (0x0080, 0x00FF)),), | |
(("Latin Extended-A", (0x0100, 0x017F)),), | |
(("Latin Extended-B", (0x0180, 0x024F)),), | |
( | |
("IPA Extensions", (0x0250, 0x02AF)), | |
("Phonetic Extensions", (0x1D00, 0x1D7F)), | |
("Phonetic Extensions Supplement", (0x1D80, 0x1DBF)), | |
), | |
( | |
("Spacing Modifier Letters", (0x02B0, 0x02FF)), | |
("Modifier Tone Letters", (0xA700, 0xA71F)), | |
), | |
( | |
("Combining Diacritical Marks", (0x0300, 0x036F)), | |
("Combining Diacritical Marks Supplement", (0x1DC0, 0x1DFF)), | |
), | |
(("Greek and Coptic", (0x0370, 0x03FF)),), | |
(("Coptic", (0x2C80, 0x2CFF)),), | |
( | |
("Cyrillic", (0x0400, 0x04FF)), | |
("Cyrillic Supplement", (0x0500, 0x052F)), | |
("Cyrillic Extended-A", (0x2DE0, 0x2DFF)), | |
("Cyrillic Extended-B", (0xA640, 0xA69F)), | |
), | |
(("Armenian", (0x0530, 0x058F)),), | |
(("Hebrew", (0x0590, 0x05FF)),), | |
(("Vai", (0xA500, 0xA63F)),), | |
(("Arabic", (0x0600, 0x06FF)), ("Arabic Supplement", (0x0750, 0x077F))), | |
(("NKo", (0x07C0, 0x07FF)),), | |
(("Devanagari", (0x0900, 0x097F)),), | |
(("Bengali", (0x0980, 0x09FF)),), | |
(("Gurmukhi", (0x0A00, 0x0A7F)),), | |
(("Gujarati", (0x0A80, 0x0AFF)),), | |
(("Oriya", (0x0B00, 0x0B7F)),), | |
(("Tamil", (0x0B80, 0x0BFF)),), | |
(("Telugu", (0x0C00, 0x0C7F)),), | |
(("Kannada", (0x0C80, 0x0CFF)),), | |
(("Malayalam", (0x0D00, 0x0D7F)),), | |
(("Thai", (0x0E00, 0x0E7F)),), | |
(("Lao", (0x0E80, 0x0EFF)),), | |
(("Georgian", (0x10A0, 0x10FF)), ("Georgian Supplement", (0x2D00, 0x2D2F))), | |
(("Balinese", (0x1B00, 0x1B7F)),), | |
(("Hangul Jamo", (0x1100, 0x11FF)),), | |
( | |
("Latin Extended Additional", (0x1E00, 0x1EFF)), | |
("Latin Extended-C", (0x2C60, 0x2C7F)), | |
("Latin Extended-D", (0xA720, 0xA7FF)), | |
), | |
(("Greek Extended", (0x1F00, 0x1FFF)),), | |
( | |
("General Punctuation", (0x2000, 0x206F)), | |
("Supplemental Punctuation", (0x2E00, 0x2E7F)), | |
), | |
(("Superscripts And Subscripts", (0x2070, 0x209F)),), | |
(("Currency Symbols", (0x20A0, 0x20CF)),), | |
(("Combining Diacritical Marks For Symbols", (0x20D0, 0x20FF)),), | |
(("Letterlike Symbols", (0x2100, 0x214F)),), | |
(("Number Forms", (0x2150, 0x218F)),), | |
( | |
("Arrows", (0x2190, 0x21FF)), | |
("Supplemental Arrows-A", (0x27F0, 0x27FF)), | |
("Supplemental Arrows-B", (0x2900, 0x297F)), | |
("Miscellaneous Symbols and Arrows", (0x2B00, 0x2BFF)), | |
), | |
( | |
("Mathematical Operators", (0x2200, 0x22FF)), | |
("Supplemental Mathematical Operators", (0x2A00, 0x2AFF)), | |
("Miscellaneous Mathematical Symbols-A", (0x27C0, 0x27EF)), | |
("Miscellaneous Mathematical Symbols-B", (0x2980, 0x29FF)), | |
), | |
(("Miscellaneous Technical", (0x2300, 0x23FF)),), | |
(("Control Pictures", (0x2400, 0x243F)),), | |
(("Optical Character Recognition", (0x2440, 0x245F)),), | |
(("Enclosed Alphanumerics", (0x2460, 0x24FF)),), | |
(("Box Drawing", (0x2500, 0x257F)),), | |
(("Block Elements", (0x2580, 0x259F)),), | |
(("Geometric Shapes", (0x25A0, 0x25FF)),), | |
(("Miscellaneous Symbols", (0x2600, 0x26FF)),), | |
(("Dingbats", (0x2700, 0x27BF)),), | |
(("CJK Symbols And Punctuation", (0x3000, 0x303F)),), | |
(("Hiragana", (0x3040, 0x309F)),), | |
( | |
("Katakana", (0x30A0, 0x30FF)), | |
("Katakana Phonetic Extensions", (0x31F0, 0x31FF)), | |
), | |
(("Bopomofo", (0x3100, 0x312F)), ("Bopomofo Extended", (0x31A0, 0x31BF))), | |
(("Hangul Compatibility Jamo", (0x3130, 0x318F)),), | |
(("Phags-pa", (0xA840, 0xA87F)),), | |
(("Enclosed CJK Letters And Months", (0x3200, 0x32FF)),), | |
(("CJK Compatibility", (0x3300, 0x33FF)),), | |
(("Hangul Syllables", (0xAC00, 0xD7AF)),), | |
(("Non-Plane 0 *", (0xD800, 0xDFFF)),), | |
(("Phoenician", (0x10900, 0x1091F)),), | |
( | |
("CJK Unified Ideographs", (0x4E00, 0x9FFF)), | |
("CJK Radicals Supplement", (0x2E80, 0x2EFF)), | |
("Kangxi Radicals", (0x2F00, 0x2FDF)), | |
("Ideographic Description Characters", (0x2FF0, 0x2FFF)), | |
("CJK Unified Ideographs Extension A", (0x3400, 0x4DBF)), | |
("CJK Unified Ideographs Extension B", (0x20000, 0x2A6DF)), | |
("Kanbun", (0x3190, 0x319F)), | |
), | |
(("Private Use Area (plane 0)", (0xE000, 0xF8FF)),), | |
( | |
("CJK Strokes", (0x31C0, 0x31EF)), | |
("CJK Compatibility Ideographs", (0xF900, 0xFAFF)), | |
("CJK Compatibility Ideographs Supplement", (0x2F800, 0x2FA1F)), | |
), | |
(("Alphabetic Presentation Forms", (0xFB00, 0xFB4F)),), | |
(("Arabic Presentation Forms-A", (0xFB50, 0xFDFF)),), | |
(("Combining Half Marks", (0xFE20, 0xFE2F)),), | |
( | |
("Vertical Forms", (0xFE10, 0xFE1F)), | |
("CJK Compatibility Forms", (0xFE30, 0xFE4F)), | |
), | |
(("Small Form Variants", (0xFE50, 0xFE6F)),), | |
(("Arabic Presentation Forms-B", (0xFE70, 0xFEFF)),), | |
(("Halfwidth And Fullwidth Forms", (0xFF00, 0xFFEF)),), | |
(("Specials", (0xFFF0, 0xFFFF)),), | |
(("Tibetan", (0x0F00, 0x0FFF)),), | |
(("Syriac", (0x0700, 0x074F)),), | |
(("Thaana", (0x0780, 0x07BF)),), | |
(("Sinhala", (0x0D80, 0x0DFF)),), | |
(("Myanmar", (0x1000, 0x109F)),), | |
( | |
("Ethiopic", (0x1200, 0x137F)), | |
("Ethiopic Supplement", (0x1380, 0x139F)), | |
("Ethiopic Extended", (0x2D80, 0x2DDF)), | |
), | |
(("Cherokee", (0x13A0, 0x13FF)),), | |
(("Unified Canadian Aboriginal Syllabics", (0x1400, 0x167F)),), | |
(("Ogham", (0x1680, 0x169F)),), | |
(("Runic", (0x16A0, 0x16FF)),), | |
(("Khmer", (0x1780, 0x17FF)), ("Khmer Symbols", (0x19E0, 0x19FF))), | |
(("Mongolian", (0x1800, 0x18AF)),), | |
(("Braille Patterns", (0x2800, 0x28FF)),), | |
(("Yi Syllables", (0xA000, 0xA48F)), ("Yi Radicals", (0xA490, 0xA4CF))), | |
( | |
("Tagalog", (0x1700, 0x171F)), | |
("Hanunoo", (0x1720, 0x173F)), | |
("Buhid", (0x1740, 0x175F)), | |
("Tagbanwa", (0x1760, 0x177F)), | |
), | |
(("Old Italic", (0x10300, 0x1032F)),), | |
(("Gothic", (0x10330, 0x1034F)),), | |
(("Deseret", (0x10400, 0x1044F)),), | |
( | |
("Byzantine Musical Symbols", (0x1D000, 0x1D0FF)), | |
("Musical Symbols", (0x1D100, 0x1D1FF)), | |
("Ancient Greek Musical Notation", (0x1D200, 0x1D24F)), | |
), | |
(("Mathematical Alphanumeric Symbols", (0x1D400, 0x1D7FF)),), | |
( | |
("Private Use (plane 15)", (0xF0000, 0xFFFFD)), | |
("Private Use (plane 16)", (0x100000, 0x10FFFD)), | |
), | |
( | |
("Variation Selectors", (0xFE00, 0xFE0F)), | |
("Variation Selectors Supplement", (0xE0100, 0xE01EF)), | |
), | |
(("Tags", (0xE0000, 0xE007F)),), | |
(("Limbu", (0x1900, 0x194F)),), | |
(("Tai Le", (0x1950, 0x197F)),), | |
(("New Tai Lue", (0x1980, 0x19DF)),), | |
(("Buginese", (0x1A00, 0x1A1F)),), | |
(("Glagolitic", (0x2C00, 0x2C5F)),), | |
(("Tifinagh", (0x2D30, 0x2D7F)),), | |
(("Yijing Hexagram Symbols", (0x4DC0, 0x4DFF)),), | |
(("Syloti Nagri", (0xA800, 0xA82F)),), | |
( | |
("Linear B Syllabary", (0x10000, 0x1007F)), | |
("Linear B Ideograms", (0x10080, 0x100FF)), | |
("Aegean Numbers", (0x10100, 0x1013F)), | |
), | |
(("Ancient Greek Numbers", (0x10140, 0x1018F)),), | |
(("Ugaritic", (0x10380, 0x1039F)),), | |
(("Old Persian", (0x103A0, 0x103DF)),), | |
(("Shavian", (0x10450, 0x1047F)),), | |
(("Osmanya", (0x10480, 0x104AF)),), | |
(("Cypriot Syllabary", (0x10800, 0x1083F)),), | |
(("Kharoshthi", (0x10A00, 0x10A5F)),), | |
(("Tai Xuan Jing Symbols", (0x1D300, 0x1D35F)),), | |
( | |
("Cuneiform", (0x12000, 0x123FF)), | |
("Cuneiform Numbers and Punctuation", (0x12400, 0x1247F)), | |
), | |
(("Counting Rod Numerals", (0x1D360, 0x1D37F)),), | |
(("Sundanese", (0x1B80, 0x1BBF)),), | |
(("Lepcha", (0x1C00, 0x1C4F)),), | |
(("Ol Chiki", (0x1C50, 0x1C7F)),), | |
(("Saurashtra", (0xA880, 0xA8DF)),), | |
(("Kayah Li", (0xA900, 0xA92F)),), | |
(("Rejang", (0xA930, 0xA95F)),), | |
(("Cham", (0xAA00, 0xAA5F)),), | |
(("Ancient Symbols", (0x10190, 0x101CF)),), | |
(("Phaistos Disc", (0x101D0, 0x101FF)),), | |
( | |
("Carian", (0x102A0, 0x102DF)), | |
("Lycian", (0x10280, 0x1029F)), | |
("Lydian", (0x10920, 0x1093F)), | |
), | |
(("Domino Tiles", (0x1F030, 0x1F09F)), ("Mahjong Tiles", (0x1F000, 0x1F02F))), | |
) | |
_unicodeStarts = [] | |
_unicodeValues = [None] | |
def _getUnicodeRanges(): | |
# build the ranges of codepoints for each unicode range bit, and cache result | |
if not _unicodeStarts: | |
unicodeRanges = [ | |
(start, (stop, bit)) | |
for bit, blocks in enumerate(OS2_UNICODE_RANGES) | |
for _, (start, stop) in blocks | |
] | |
for start, (stop, bit) in sorted(unicodeRanges): | |
_unicodeStarts.append(start) | |
_unicodeValues.append((stop, bit)) | |
return _unicodeStarts, _unicodeValues | |
def intersectUnicodeRanges(unicodes, inverse=False): | |
"""Intersect a sequence of (int) Unicode codepoints with the Unicode block | |
ranges defined in the OpenType specification v1.7, and return the set of | |
'ulUnicodeRanges' bits for which there is at least ONE intersection. | |
If 'inverse' is True, return the the bits for which there is NO intersection. | |
>>> intersectUnicodeRanges([0x0410]) == {9} | |
True | |
>>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122} | |
True | |
>>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == ( | |
... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122}) | |
True | |
""" | |
unicodes = set(unicodes) | |
unicodestarts, unicodevalues = _getUnicodeRanges() | |
bits = set() | |
for code in unicodes: | |
stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)] | |
if code <= stop: | |
bits.add(bit) | |
# The spec says that bit 57 ("Non Plane 0") implies that there's | |
# at least one codepoint beyond the BMP; so I also include all | |
# the non-BMP codepoints here | |
if any(0x10000 <= code < 0x110000 for code in unicodes): | |
bits.add(57) | |
return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits | |
def calcCodePageRanges(unicodes): | |
"""Given a set of Unicode codepoints (integers), calculate the | |
corresponding OS/2 CodePage range bits. | |
This is a direct translation of FontForge implementation: | |
https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158 | |
""" | |
bits = set() | |
hasAscii = set(range(0x20, 0x7E)).issubset(unicodes) | |
hasLineart = ord("┤") in unicodes | |
for uni in unicodes: | |
if uni == ord("Þ") and hasAscii: | |
bits.add(0) # Latin 1 | |
elif uni == ord("Ľ") and hasAscii: | |
bits.add(1) # Latin 2: Eastern Europe | |
if hasLineart: | |
bits.add(58) # Latin 2 | |
elif uni == ord("Б"): | |
bits.add(2) # Cyrillic | |
if ord("Ѕ") in unicodes and hasLineart: | |
bits.add(57) # IBM Cyrillic | |
if ord("╜") in unicodes and hasLineart: | |
bits.add(49) # MS-DOS Russian | |
elif uni == ord("Ά"): | |
bits.add(3) # Greek | |
if hasLineart and ord("½") in unicodes: | |
bits.add(48) # IBM Greek | |
if hasLineart and ord("√") in unicodes: | |
bits.add(60) # Greek, former 437 G | |
elif uni == ord("İ") and hasAscii: | |
bits.add(4) # Turkish | |
if hasLineart: | |
bits.add(56) # IBM turkish | |
elif uni == ord("א"): | |
bits.add(5) # Hebrew | |
if hasLineart and ord("√") in unicodes: | |
bits.add(53) # Hebrew | |
elif uni == ord("ر"): | |
bits.add(6) # Arabic | |
if ord("√") in unicodes: | |
bits.add(51) # Arabic | |
if hasLineart: | |
bits.add(61) # Arabic; ASMO 708 | |
elif uni == ord("ŗ") and hasAscii: | |
bits.add(7) # Windows Baltic | |
if hasLineart: | |
bits.add(59) # MS-DOS Baltic | |
elif uni == ord("₫") and hasAscii: | |
bits.add(8) # Vietnamese | |
elif uni == ord("ๅ"): | |
bits.add(16) # Thai | |
elif uni == ord("エ"): | |
bits.add(17) # JIS/Japan | |
elif uni == ord("ㄅ"): | |
bits.add(18) # Chinese: Simplified | |
elif uni == ord("ㄱ"): | |
bits.add(19) # Korean wansung | |
elif uni == ord("央"): | |
bits.add(20) # Chinese: Traditional | |
elif uni == ord("곴"): | |
bits.add(21) # Korean Johab | |
elif uni == ord("♥") and hasAscii: | |
bits.add(30) # OEM Character Set | |
# TODO: Symbol bit has a special meaning (check the spec), we need | |
# to confirm if this is wanted by default. | |
# elif chr(0xF000) <= char <= chr(0xF0FF): | |
# codepageRanges.add(31) # Symbol Character Set | |
elif uni == ord("þ") and hasAscii and hasLineart: | |
bits.add(54) # MS-DOS Icelandic | |
elif uni == ord("╚") and hasAscii: | |
bits.add(62) # WE/Latin 1 | |
bits.add(63) # US | |
elif hasAscii and hasLineart and ord("√") in unicodes: | |
if uni == ord("Å"): | |
bits.add(50) # MS-DOS Nordic | |
elif uni == ord("é"): | |
bits.add(52) # MS-DOS Canadian French | |
elif uni == ord("õ"): | |
bits.add(55) # MS-DOS Portuguese | |
if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes: | |
bits.add(29) # Macintosh Character Set (US Roman) | |
return bits | |
if __name__ == "__main__": | |
import doctest, sys | |
sys.exit(doctest.testmod().failed) | |