Spaces:

lsottani
/

RAG_file_preprocessing

Sleeping

App Files Files Community

RAG_file_preprocessing / venv /lib /python3.9 /site-packages /fontTools /cffLib /width.py

lsottani

Upload folder using huggingface_hub

d9f69e5 verified 2 months ago

raw

history blame contribute delete

6.07 kB

	# -- coding: utf-8 --

	"""T2CharString glyph width optimizer.

	CFF glyphs whose width equals the CFF Private dictionary's ``defaultWidthX``
	value do not need to specify their width in their charstring, saving bytes.
	This module determines the optimum ``defaultWidthX`` and ``nominalWidthX``
	values for a font, when provided with a list of glyph widths."""

	from fontTools.ttLib import TTFont
	from collections import defaultdict
	from operator import add
	from functools import reduce


	__all__ = ["optimizeWidths", "main"]


	class missingdict(dict):
	def __init__(self, missing_func):
	self.missing_func = missing_func

	def __missing__(self, v):
	return self.missing_func(v)


	def cumSum(f, op=add, start=0, decreasing=False):
	keys = sorted(f.keys())
	minx, maxx = keys[0], keys[-1]

	total = reduce(op, f.values(), start)

	if decreasing:
	missing = lambda x: start if x > maxx else total
	domain = range(maxx, minx - 1, -1)
	else:
	missing = lambda x: start if x < minx else total
	domain = range(minx, maxx + 1)

	out = missingdict(missing)

	v = start
	for x in domain:
	v = op(v, f[x])
	out[x] = v

	return out


	def byteCost(widths, default, nominal):
	if not hasattr(widths, "items"):
	d = defaultdict(int)
	for w in widths:
	d[w] += 1
	widths = d

	cost = 0
	for w, freq in widths.items():
	if w == default:
	continue
	diff = abs(w - nominal)
	if diff <= 107:
	cost += freq
	elif diff <= 1131:
	cost += freq * 2
	else:
	cost += freq * 5
	return cost


	def optimizeWidthsBruteforce(widths):
	"""Bruteforce version. Veeeeeeeeeeeeeeeeery slow. Only works for smallests of fonts."""

	d = defaultdict(int)
	for w in widths:
	d[w] += 1

	# Maximum number of bytes using default can possibly save
	maxDefaultAdvantage = 5 * max(d.values())

	minw, maxw = min(widths), max(widths)
	domain = list(range(minw, maxw + 1))

	bestCostWithoutDefault = min(byteCost(widths, None, nominal) for nominal in domain)

	bestCost = len(widths) * 5 + 1
	for nominal in domain:
	if byteCost(widths, None, nominal) > bestCost + maxDefaultAdvantage:
	continue
	for default in domain:
	cost = byteCost(widths, default, nominal)
	if cost < bestCost:
	bestCost = cost
	bestDefault = default
	bestNominal = nominal

	return bestDefault, bestNominal


	def optimizeWidths(widths):
	"""Given a list of glyph widths, or dictionary mapping glyph width to number of
	glyphs having that, returns a tuple of best CFF default and nominal glyph widths.

	This algorithm is linear in UPEM+numGlyphs."""

	if not hasattr(widths, "items"):
	d = defaultdict(int)
	for w in widths:
	d[w] += 1
	widths = d

	keys = sorted(widths.keys())
	minw, maxw = keys[0], keys[-1]
	domain = list(range(minw, maxw + 1))

	# Cumulative sum/max forward/backward.
	cumFrqU = cumSum(widths, op=add)
	cumMaxU = cumSum(widths, op=max)
	cumFrqD = cumSum(widths, op=add, decreasing=True)
	cumMaxD = cumSum(widths, op=max, decreasing=True)

	# Cost per nominal choice, without default consideration.
	nomnCostU = missingdict(
	lambda x: cumFrqU[x] + cumFrqU[x - 108] + cumFrqU[x - 1132] * 3
	)
	nomnCostD = missingdict(
	lambda x: cumFrqD[x] + cumFrqD[x + 108] + cumFrqD[x + 1132] * 3
	)
	nomnCost = missingdict(lambda x: nomnCostU[x] + nomnCostD[x] - widths[x])

	# Cost-saving per nominal choice, by best default choice.
	dfltCostU = missingdict(
	lambda x: max(cumMaxU[x], cumMaxU[x - 108] * 2, cumMaxU[x - 1132] * 5)
	)
	dfltCostD = missingdict(
	lambda x: max(cumMaxD[x], cumMaxD[x + 108] * 2, cumMaxD[x + 1132] * 5)
	)
	dfltCost = missingdict(lambda x: max(dfltCostU[x], dfltCostD[x]))

	# Combined cost per nominal choice.
	bestCost = missingdict(lambda x: nomnCost[x] - dfltCost[x])

	# Best nominal.
	nominal = min(domain, key=lambda x: bestCost[x])

	# Work back the best default.
	bestC = bestCost[nominal]
	dfltC = nomnCost[nominal] - bestCost[nominal]
	ends = []
	if dfltC == dfltCostU[nominal]:
	starts = [nominal, nominal - 108, nominal - 1132]
	for start in starts:
	while cumMaxU[start] and cumMaxU[start] == cumMaxU[start - 1]:
	start -= 1
	ends.append(start)
	else:
	starts = [nominal, nominal + 108, nominal + 1132]
	for start in starts:
	while cumMaxD[start] and cumMaxD[start] == cumMaxD[start + 1]:
	start += 1
	ends.append(start)
	default = min(ends, key=lambda default: byteCost(widths, default, nominal))

	return default, nominal


	def main(args=None):
	"""Calculate optimum defaultWidthX/nominalWidthX values"""

	import argparse

	parser = argparse.ArgumentParser(
	"fonttools cffLib.width",
	description=main.__doc__,
	)
	parser.add_argument(
	"inputs", metavar="FILE", type=str, nargs="+", help="Input TTF files"
	)
	parser.add_argument(
	"-b",
	"--brute-force",
	dest="brute",
	action="store_true",
	help="Use brute-force approach (VERY slow)",
	)

	args = parser.parse_args(args)

	for fontfile in args.inputs:
	font = TTFont(fontfile)
	hmtx = font["hmtx"]
	widths = [m[0] for m in hmtx.metrics.values()]
	if args.brute:
	default, nominal = optimizeWidthsBruteforce(widths)
	else:
	default, nominal = optimizeWidths(widths)
	print(
	"glyphs=%d default=%d nominal=%d byteCost=%d"
	% (len(widths), default, nominal, byteCost(widths, default, nominal))
	)


	if __name__ == "__main__":
	import sys

	if len(sys.argv) == 1:
	import doctest

	sys.exit(doctest.testmod().failed)
	main()