Training in progress, step 5

f67f66b over 2 years ago

193 kB

	/*
	* Levenshtein.c
	* @(#) $Id: Levenshtein.c,v 1.41 2005/01/13 20:05:36 yeti Exp $
	* Python extension computing Levenshtein distances, string similarities,
	* median strings and other goodies.
	*
	* Copyright (C) 2002-2003 David Necas (Yeti) <yeti@physics.muni.cz>.
	*
	* The Taus113 random generator:
	* Copyright (C) 2002 Atakan Gurkan
	* Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
	* (see below for more)
	*
	* This program is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License as published by the Free
	* Software Foundation; either version 2 of the License, or (at your option)
	* any later version.
	*
	* This program is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	* more details.
	*
	* You should have received a copy of the GNU General Public License along
	* with this program; if not, write to the Free Software Foundation, Inc.,
	* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
	**/

	/**
	* TODO:
	*
	* - Implement weighted string averaging, see:
	* H. Bunke et. al.: On the Weighted Mean of a Pair of Strings,
	* Pattern Analysis and Applications 2002, 5(1): 23-30.
	* X. Jiang et. al.: Dynamic Computations of Generalized Median Strings,
	* Pattern Analysis and Applications 2002, ???.
	* The latter also contains an interesting median-search algorithm.
	*
	* - Deal with stray symbols in greedy median() and median_improve().
	* There are two possibilities:
	* (i) Remember which strings contain which symbols. This allows certain
	* small optimizations when processing them.
	* (ii) Use some overall heuristics to find symbols which don't worth
	* trying. This is very appealing, but hard to do properly
	* (requires some inequality strong enough to allow practical exclusion
	* of certain symbols -- at certain positions)
	*
	* - Editops should be an object that only looks like a list (which means
	* it is a list in duck typing) to avoid never-ending conversions from
	* Python lists to LevEditOp arrays and back
	*
	* - Optimize munkers_blackman(), it's pretty dumb (no memory of visited
	* columns/rows)
	*
	* - Make it really usable as a C library (needs some wrappers, headers, ...,
	* and maybe even documentation ;-)
	*
	* - Add interface to various interesting auxiliary results, namely
	* set and sequence distance (only ratio is exported), the map from
	* munkers_blackman() itself, ...
	*
	* - Generalizations:
	* - character weight matrix/function
	* - arbitrary edit operation costs, decomposable edit operations
	*
	* - Create a test suite
	*
	* - Add more interesting algorithms ;-)
	*
	* Postponed TODO (investigated, and a big `but' was found):
	*
	* - A linear approximate set median algorithm:
	* P. Indyk: Sublinear time algorithms for metric space problems,
	* STOC 1999, http://citeseer.nj.nec.com/indyk00sublinear.html.
	* BUT: The algorithm seems to be advantageous only in the case of very
	* large sets -- if my estimates are correct (the article itself is quite
	* `asymptotic'), say 10^5 at least. On smaller sets either one would get
	* only an extermely rough median estimate, or the number of distance
	* computations would be in fact higher than in the dumb O(n^2) algorithm.
	*
	* - Improve setmedian() speed with triangular inequality, see:
	* Juan, A., E. Vidal: An Algorithm for Fast Median Search,
	* 1997, http://citeseer.nj.nec.com/article/juan97algorithm.html
	* BUT: It doesn't seem to help much in spaces of high dimension (see the
	* discussion and graphs in the article itself), a few percents at most,
	* and strings behave like a space with a very high dimension (locally), so
	* who knows, it probably wouldn't help much.
	*
	**/
	#ifdef NO_PYTHON
	#define _GNU_SOURCE
	#include <string.h>
	#include <math.h>
	/* for debugging */
	#include <stdio.h>
	#else /* NO_PYTHON */
	#define _LEV_STATIC_PY static
	#define lev_wchar Py_UNICODE
	#include <Python.h>
	#endif /* NO_PYTHON */

	#if PY_MAJOR_VERSION >= 3
	#define LEV_PYTHON3
	#define PyString_Type PyBytes_Type
	#define PyString_GET_SIZE PyBytes_GET_SIZE
	#define PyString_AS_STRING PyBytes_AS_STRING
	#define PyString_Check PyBytes_Check
	#define PyString_FromStringAndSize PyBytes_FromStringAndSize
	#define PyString_InternFromString PyUnicode_InternFromString
	#define PyInt_AS_LONG PyLong_AsLong
	#define PyInt_FromLong PyLong_FromLong
	#define PyInt_Check PyLong_Check
	#define PY_INIT_MOD(module, name, doc, methods) \
	static struct PyModuleDef moduledef = { \
	PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \
	module = PyModule_Create(&moduledef);
	#define PY_MOD_INIT_FUNC_DEF(name) PyObject* PyInit_##name(void)
	#else
	#define PY_INIT_MOD(module, name, doc, methods) \
	Py_InitModule3(name, methods, doc);
	#define PY_MOD_INIT_FUNC_DEF(name) void init##name(void)
	#endif /* PY_MAJOR_VERSION */

	#include <assert.h>
	#include "_levenshtein.h"

	/* FIXME: inline avaliability should be solved in setup.py, somehow, or
	* even better in Python.h, like const is...
	* this should inline at least with gcc and msvc */
	#ifndef __GNUC__
	# ifdef _MSC_VER
	# define inline __inline
	# else
	# define inline /* */
	# endif
	# define __attribute__(x) /* */
	#endif

	#define LEV_EPSILON 1e-14
	#define LEV_INFINITY 1e100

	/* Me thinks the second argument of PyArg_UnpackTuple() should be const.
	* Anyway I habitually pass a constant string.
	* A cast to placate the compiler. */
	#define PYARGCFIX(x) (char*)(x)

	/* local functions */
	static size_t*
	munkers_blackman(size_t n1,
	size_t n2,
	double *dists);

	#define TAUS113_LCG(n) ((69069UL * n) & 0xffffffffUL)
	#define TAUS113_MASK 0xffffffffUL

	typedef struct {
	unsigned long int z1, z2, z3, z4;
	} taus113_state_t;

	static inline unsigned long int
	taus113_get(taus113_state_t *state);

	static void
	taus113_set(taus113_state_t *state,
	unsigned long int s);

	#ifndef NO_PYTHON
	/* python interface and wrappers */
	/* declarations and docstrings {{{ */
	static PyObject* distance_py(PyObject self, PyObject args);
	static PyObject* ratio_py(PyObject self, PyObject args);
	static PyObject* hamming_py(PyObject self, PyObject args);
	static PyObject* jaro_py(PyObject self, PyObject args);
	static PyObject* jaro_winkler_py(PyObject self, PyObject args);
	static PyObject* median_py(PyObject self, PyObject args);
	static PyObject* median_improve_py(PyObject self, PyObject args);
	static PyObject* quickmedian_py(PyObject self, PyObject args);
	static PyObject* setmedian_py(PyObject self, PyObject args);
	static PyObject* seqratio_py(PyObject self, PyObject args);
	static PyObject* setratio_py(PyObject self, PyObject args);
	static PyObject* editops_py(PyObject self, PyObject args);
	static PyObject* opcodes_py(PyObject self, PyObject args);
	static PyObject* inverse_py(PyObject self, PyObject args);
	static PyObject* apply_edit_py(PyObject self, PyObject args);
	static PyObject* matching_blocks_py(PyObject self, PyObject args);
	static PyObject* subtract_edit_py(PyObject self, PyObject args);

	#define Levenshtein_DESC \
	"A C extension module for fast computation of:\n" \
	"- Levenshtein (edit) distance and edit sequence manipulation\n" \
	"- string similarity\n" \
	"- approximate median strings, and generally string averaging\n" \
	"- string sequence and set similarity\n" \
	"\n" \
	"Levenshtein has a some overlap with difflib (SequenceMatcher). It\n" \
	"supports only strings, not arbitrary sequence types, but on the\n" \
	"other hand it's much faster.\n" \
	"\n" \
	"It supports both normal and Unicode strings, but can't mix them, all\n" \
	"arguments to a function (method) have to be of the same type (or its\n" \
	"subclasses).\n"

	#define distance_DESC \
	"Compute absolute Levenshtein distance of two strings.\n" \
	"\n" \
	"distance(string1, string2)\n" \
	"\n" \
	"Examples (it's hard to spell Levenshtein correctly):\n" \
	"\n" \
	">>> distance('Levenshtein', 'Lenvinsten')\n" \
	"4\n" \
	">>> distance('Levenshtein', 'Levensthein')\n" \
	"2\n" \
	">>> distance('Levenshtein', 'Levenshten')\n" \
	"1\n" \
	">>> distance('Levenshtein', 'Levenshtein')\n" \
	"0\n" \
	"\n" \
	"Yeah, we've managed it at last.\n"

	#define ratio_DESC \
	"Compute similarity of two strings.\n" \
	"\n" \
	"ratio(string1, string2)\n" \
	"\n" \
	"The similarity is a number between 0 and 1, it's usually equal or\n" \
	"somewhat higher than difflib.SequenceMatcher.ratio(), because it's\n" \
	"based on real minimal edit distance.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> ratio('Hello world!', 'Holly grail!') # doctest: +ELLIPSIS\n" \
	"0.583333...\n" \
	">>> ratio('Brian', 'Jesus')\n" \
	"0.0\n" \
	"\n" \
	"Really? I thought there was some similarity.\n"

	#define hamming_DESC \
	"Compute Hamming distance of two strings.\n" \
	"\n" \
	"hamming(string1, string2)\n" \
	"\n" \
	"The Hamming distance is simply the number of differing characters.\n" \
	"That means the length of the strings must be the same.\n" \
	"\n" \
	"Examples:\n" \
	">>> hamming('Hello world!', 'Holly grail!')\n" \
	"7\n" \
	">>> hamming('Brian', 'Jesus')\n" \
	"5\n"

	#define jaro_DESC \
	"Compute Jaro string similarity metric of two strings.\n" \
	"\n" \
	"jaro(string1, string2)\n" \
	"\n" \
	"The Jaro string similarity metric is intended for short strings like\n" \
	"personal last names. It is 0 for completely different strings and\n" \
	"1 for identical strings.\n" \
	"\n" \
	"Examples:\n" \
	">>> jaro('Brian', 'Jesus')\n" \
	"0.0\n" \
	">>> jaro('Thorkel', 'Thorgier') # doctest: +ELLIPSIS\n" \
	"0.779761...\n" \
	">>> jaro('Dinsdale', 'D') # doctest: +ELLIPSIS\n" \
	"0.708333...\n"

	#define jaro_winkler_DESC \
	"Compute Jaro string similarity metric of two strings.\n" \
	"\n" \
	"jaro_winkler(string1, string2[, prefix_weight])\n" \
	"\n" \
	"The Jaro-Winkler string similarity metric is a modification of Jaro\n" \
	"metric giving more weight to common prefix, as spelling mistakes are\n" \
	"more likely to occur near ends of words.\n" \
	"\n" \
	"The prefix weight is inverse value of common prefix length sufficient\n" \
	"to consider the strings identical. If no prefix weight is\n" \
	"specified, 1/10 is used.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> jaro_winkler('Brian', 'Jesus')\n" \
	"0.0\n" \
	">>> jaro_winkler('Thorkel', 'Thorgier') # doctest: +ELLIPSIS\n" \
	"0.867857...\n" \
	">>> jaro_winkler('Dinsdale', 'D') # doctest: +ELLIPSIS\n" \
	"0.7375...\n" \
	">>> jaro_winkler('Thorkel', 'Thorgier', 0.25)\n" \
	"1.0\n"

	#define median_DESC \
	"Find an approximate generalized median string using greedy algorithm.\n" \
	"\n" \
	"median(string_sequence[, weight_sequence])\n" \
	"\n" \
	"You can optionally pass a weight for each string as the second\n" \
	"argument. The weights are interpreted as item multiplicities,\n" \
	"although any non-negative real numbers are accepted. Use them to\n" \
	"improve computation speed when strings often appear multiple times\n" \
	"in the sequence.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> median(['SpSm', 'mpamm', 'Spam', 'Spa', 'Sua', 'hSam'])\n" \
	"'Spam'\n" \
	">>> fixme = ['Levnhtein', 'Leveshein', 'Leenshten',\n" \
	"... 'Leveshtei', 'Lenshtein', 'Lvenstein',\n" \
	"... 'Levenhtin', 'evenshtei']\n" \
	">>> median(fixme)\n" \
	"'Levenshtein'\n" \
	"\n" \
	"Hm. Even a computer program can spell Levenshtein better than me.\n"

	#define median_improve_DESC \
	"Improve an approximate generalized median string by perturbations.\n" \
	"\n" \
	"median_improve(string, string_sequence[, weight_sequence])\n" \
	"\n" \
	"The first argument is the estimated generalized median string you\n" \
	"want to improve, the others are the same as in median(). It returns\n" \
	"a string with total distance less or equal to that of the given string.\n" \
	"\n" \
	"Note this is much slower than median(). Also note it performs only\n" \
	"one improvement step, calling median_improve() again on the result\n" \
	"may improve it further, though this is unlikely to happen unless the\n" \
	"given string was not very similar to the actual generalized median.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> fixme = ['Levnhtein', 'Leveshein', 'Leenshten',\n" \
	"... 'Leveshtei', 'Lenshtein', 'Lvenstein',\n" \
	"... 'Levenhtin', 'evenshtei']\n" \
	">>> median_improve('spam', fixme)\n" \
	"'enhtein'\n" \
	">>> median_improve(median_improve('spam', fixme), fixme)\n" \
	"'Levenshtein'\n" \
	"\n" \
	"It takes some work to change spam to Levenshtein.\n"

	#define quickmedian_DESC \
	"Find a very approximate generalized median string, but fast.\n" \
	"\n" \
	"quickmedian(string[, weight_sequence])\n" \
	"\n" \
	"See median() for argument description.\n" \
	"\n" \
	"This method is somewhere between setmedian() and picking a random\n" \
	"string from the set; both speedwise and quality-wise.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> fixme = ['Levnhtein', 'Leveshein', 'Leenshten',\n" \
	"... 'Leveshtei', 'Lenshtein', 'Lvenstein',\n" \
	"... 'Levenhtin', 'evenshtei']\n" \
	">>> quickmedian(fixme)\n" \
	"'Levnshein'\n"

	#define setmedian_DESC \
	"Find set median of a string set (passed as a sequence).\n" \
	"\n" \
	"setmedian(string[, weight_sequence])\n" \
	"\n" \
	"See median() for argument description.\n" \
	"\n" \
	"The returned string is always one of the strings in the sequence.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> setmedian(['ehee', 'cceaes', 'chees', 'chreesc',\n" \
	"... 'chees', 'cheesee', 'cseese', 'chetese'])\n" \
	"'chees'\n" \
	"\n" \
	"You haven't asked me about Limburger, sir.\n"

	#define seqratio_DESC \
	"Compute similarity ratio of two sequences of strings.\n" \
	"\n" \
	"seqratio(string_sequence1, string_sequence2)\n" \
	"\n" \
	"This is like ratio(), but for string sequences. A kind of ratio()\n" \
	"is used to to measure the cost of item change operation for the\n" \
	"strings.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> seqratio(['newspaper', 'litter bin', 'tinny', 'antelope'],\n" \
	"... ['caribou', 'sausage', 'gorn', 'woody'])\n" \
	"0.21517857142857144\n"

	#define setratio_DESC \
	"Compute similarity ratio of two strings sets (passed as sequences).\n" \
	"\n" \
	"setratio(string_sequence1, string_sequence2)\n" \
	"\n" \
	"The best match between any strings in the first set and the second\n" \
	"set (passed as sequences) is attempted. I.e., the order doesn't\n" \
	"matter here.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> setratio(['newspaper', 'litter bin', 'tinny', 'antelope'],\n" \
	"... ['caribou', 'sausage', 'gorn', 'woody']) # doctest: +ELLIPSIS\n" \
	"0.281845...\n" \
	"\n" \
	"No, even reordering doesn't help the tinny words to match the\n" \
	"woody ones.\n"

	#define editops_DESC \
	"Find sequence of edit operations transforming one string to another.\n" \
	"\n" \
	"editops(source_string, destination_string)\n" \
	"editops(edit_operations, source_length, destination_length)\n" \
	"\n" \
	"The result is a list of triples (operation, spos, dpos), where\n" \
	"operation is one of 'equal', 'replace', 'insert', or 'delete'; spos\n" \
	"and dpos are position of characters in the first (source) and the\n" \
	"second (destination) strings. These are operations on signle\n" \
	"characters. In fact the returned list doesn't contain the 'equal',\n" \
	"but all the related functions accept both lists with and without\n" \
	"'equal's.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> editops('spam', 'park')\n" \
	"[('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]\n" \
	"\n" \
	"The alternate form editops(opcodes, source_string, destination_string)\n" \
	"can be used for conversion from opcodes (5-tuples) to editops (you can\n" \
	"pass strings or their lengths, it doesn't matter).\n"

	#define opcodes_DESC \
	"Find sequence of edit operations transforming one string to another.\n" \
	"\n" \
	"opcodes(source_string, destination_string)\n" \
	"opcodes(edit_operations, source_length, destination_length)\n" \
	"\n" \
	"The result is a list of 5-tuples with the same meaning as in\n" \
	"SequenceMatcher's get_opcodes() output. But since the algorithms\n" \
	"differ, the actual sequences from Levenshtein and SequenceMatcher\n" \
	"may differ too.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> for x in opcodes('spam', 'park'):\n" \
	"... print(x)\n" \
	"...\n" \
	"('delete', 0, 1, 0, 0)\n" \
	"('equal', 1, 3, 0, 2)\n" \
	"('insert', 3, 3, 2, 3)\n" \
	"('replace', 3, 4, 3, 4)\n" \
	"\n" \
	"The alternate form opcodes(editops, source_string, destination_string)\n" \
	"can be used for conversion from editops (triples) to opcodes (you can\n" \
	"pass strings or their lengths, it doesn't matter).\n"

	#define inverse_DESC \
	"Invert the sense of an edit operation sequence.\n" \
	"\n" \
	"inverse(edit_operations)\n" \
	"\n" \
	"In other words, it returns a list of edit operations transforming the\n" \
	"second (destination) string to the first (source). It can be used\n" \
	"with both editops and opcodes.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> inverse(editops('spam', 'park'))\n" \
	"[('insert', 0, 0), ('delete', 2, 3), ('replace', 3, 3)]\n" \
	">>> editops('park', 'spam')\n" \
	"[('insert', 0, 0), ('delete', 2, 3), ('replace', 3, 3)]\n"

	#define apply_edit_DESC \
	"Apply a sequence of edit operations to a string.\n" \
	"\n" \
	"apply_edit(edit_operations, source_string, destination_string)\n" \
	"\n" \
	"In the case of editops, the sequence can be arbitrary ordered subset\n" \
	"of the edit sequence transforming source_string to destination_string.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> e = editops('man', 'scotsman')\n" \
	">>> apply_edit(e, 'man', 'scotsman')\n" \
	"'scotsman'\n" \
	">>> apply_edit(e[:3], 'man', 'scotsman')\n" \
	"'scoman'\n" \
	"\n" \
	"The other form of edit operations, opcodes, is not very suitable for\n" \
	"such a tricks, because it has to always span over complete strings,\n" \
	"subsets can be created by carefully replacing blocks with 'equal'\n" \
	"blocks, or by enlarging 'equal' block at the expense of other blocks\n" \
	"and adjusting the other blocks accordingly.\n" \
	"\n" \
	"Examples:\n" \
	">>> a, b = 'spam and eggs', 'foo and bar'\n" \
	">>> e = opcodes(a, b)\n" \
	">>> apply_edit(inverse(e), b, a)\n" \
	"'spam and eggs'\n" \
	">>> e[4] = ('equal', 10, 13, 8, 11)\n" \
	">>> e\n" \
	"[('delete', 0, 1, 0, 0), ('replace', 1, 4, 0, 3), ('equal', 4, 9, 3, 8), ('delete', 9, 10, 8, 8), ('equal', 10, 13, 8, 11)]\n" \
	">>> apply_edit(e, a, b)\n" \
	"'foo and ggs'\n"

	#define matching_blocks_DESC \
	"Find identical blocks in two strings.\n" \
	"\n" \
	"matching_blocks(edit_operations, source_length, destination_length)\n" \
	"\n" \
	"The result is a list of triples with the same meaning as in\n" \
	"SequenceMatcher's get_matching_blocks() output. It can be used with\n" \
	"both editops and opcodes. The second and third arguments don't\n" \
	"have to be actually strings, their lengths are enough.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> a, b = 'spam', 'park'\n" \
	">>> matching_blocks(editops(a, b), a, b)\n" \
	"[(1, 0, 2), (4, 4, 0)]\n" \
	">>> matching_blocks(editops(a, b), len(a), len(b))\n" \
	"[(1, 0, 2), (4, 4, 0)]\n" \
	"\n" \
	"The last zero-length block is not an error, but it's there for\n" \
	"compatibility with difflib which always emits it.\n" \
	"\n" \
	"One can join the matching blocks to get two identical strings:\n" \
	">>> a, b = 'dog kennels', 'mattresses'\n" \
	">>> mb = matching_blocks(editops(a,b), a, b)\n" \
	">>> ''.join([a[x[0]:x[0]+x[2]] for x in mb])\n" \
	"'ees'\n" \
	">>> ''.join([b[x[1]:x[1]+x[2]] for x in mb])\n" \
	"'ees'\n"

	#define subtract_edit_DESC \
	"Subtract an edit subsequence from a sequence.\n" \
	"\n" \
	"subtract_edit(edit_operations, subsequence)\n" \
	"\n" \
	"The result is equivalent to\n" \
	"editops(apply_edit(subsequence, s1, s2), s2), except that is\n" \
	"constructed directly from the edit operations. That is, if you apply\n" \
	"it to the result of subsequence application, you get the same final\n" \
	"string as from application complete edit_operations. It may be not\n" \
	"identical, though (in amibuous cases, like insertion of a character\n" \
	"next to the same character).\n" \
	"\n" \
	"The subtracted subsequence must be an ordered subset of\n" \
	"edit_operations.\n" \
	"\n" \
	"Note this function does not accept difflib-style opcodes as no one in\n" \
	"his right mind wants to create subsequences from them.\n" \
	"\n" \
	"Examples:\n" \
	"\n" \
	">>> e = editops('man', 'scotsman')\n" \
	">>> e1 = e[:3]\n" \
	">>> bastard = apply_edit(e1, 'man', 'scotsman')\n" \
	">>> bastard\n" \
	"'scoman'\n" \
	">>> apply_edit(subtract_edit(e, e1), bastard, 'scotsman')\n" \
	"'scotsman'\n" \

	#define METHODS_ITEM(x) { #x, x##_py, METH_VARARGS, x##_DESC }
	static PyMethodDef methods[] = {
	METHODS_ITEM(distance),
	METHODS_ITEM(ratio),
	METHODS_ITEM(hamming),
	METHODS_ITEM(jaro),
	METHODS_ITEM(jaro_winkler),
	METHODS_ITEM(median),
	METHODS_ITEM(median_improve),
	METHODS_ITEM(quickmedian),
	METHODS_ITEM(setmedian),
	METHODS_ITEM(seqratio),
	METHODS_ITEM(setratio),
	METHODS_ITEM(editops),
	METHODS_ITEM(opcodes),
	METHODS_ITEM(inverse),
	METHODS_ITEM(apply_edit),
	METHODS_ITEM(matching_blocks),
	METHODS_ITEM(subtract_edit),
	{ NULL, NULL, 0, NULL },
	};

	/* opcode names, these are to be initialized in the init func,
	* indexed by LevEditType values */
	struct {
	PyObject* pystring;
	const char *cstring;
	size_t len;
	}
	static opcode_names[] = {
	{ NULL, "equal", 0 },
	{ NULL, "replace", 0 },
	{ NULL, "insert", 0 },
	{ NULL, "delete", 0 },
	};
	#define N_OPCODE_NAMES ((sizeof(opcode_names)/sizeof(opcode_names[0])))

	typedef lev_byte (MedianFuncString)(size_t n,
	const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	size_t *medlength);
	typedef Py_UNICODE (MedianFuncUnicode)(size_t n,
	const size_t *lengths,
	const Py_UNICODE *strings[],
	const double *weights,
	size_t *medlength);
	typedef struct {
	MedianFuncString s;
	MedianFuncUnicode u;
	} MedianFuncs;

	typedef lev_byte (MedianImproveFuncString)(size_t len, const lev_byte *s,
	size_t n,
	const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	size_t *medlength);
	typedef Py_UNICODE (MedianImproveFuncUnicode)(size_t len, const Py_UNICODE *s,
	size_t n,
	const size_t *lengths,
	const Py_UNICODE *strings[],
	const double *weights,
	size_t *medlength);
	typedef struct {
	MedianImproveFuncString s;
	MedianImproveFuncUnicode u;
	} MedianImproveFuncs;

	typedef double (*SetSeqFuncString)(size_t n1,
	const size_t *lengths1,
	const lev_byte *strings1[],
	size_t n2,
	const size_t *lengths2,
	const lev_byte *strings2[]);
	typedef double (*SetSeqFuncUnicode)(size_t n1,
	const size_t *lengths1,
	const Py_UNICODE *strings1[],
	size_t n2,
	const size_t *lengths2,
	const Py_UNICODE *strings2[]);

	typedef struct {
	SetSeqFuncString s;
	SetSeqFuncUnicode u;
	} SetSeqFuncs;

	static long int
	levenshtein_common(PyObject *args,
	const char *name,
	size_t xcost,
	size_t *lensum);

	static int
	extract_stringlist(PyObject *list,
	const char *name,
	size_t n,
	size_t **sizelist,
	void *strlist);

	static double*
	extract_weightlist(PyObject *wlist,
	const char *name,
	size_t n);

	static PyObject*
	median_common(PyObject *args,
	const char *name,
	MedianFuncs foo);

	static PyObject*
	median_improve_common(PyObject *args,
	const char *name,
	MedianImproveFuncs foo);

	static double
	setseq_common(PyObject *args,
	const char *name,
	SetSeqFuncs foo,
	size_t *lensum);

	static void *
	safe_malloc(size_t nmemb, size_t size) {
	/* extra-conservative overflow check */
	if (SIZE_MAX / size <= nmemb) {
	return NULL;
	}
	return malloc(nmemb * size);
	}

	static void *
	safe_malloc_3(size_t nmemb1, size_t nmemb2, size_t size) {
	/* extra-conservative overflow check */
	if (SIZE_MAX / size <= nmemb2) {
	return NULL;
	}
	return safe_malloc(nmemb1, nmemb2 * size);
	}

	/* }}} */

	/****************************************************************************
	*
	* Python interface and subroutines
	*
	****************************************************************************/
	/* {{{ */

	static long int
	levenshtein_common(PyObject args, const char name, size_t xcost,
	size_t *lensum)
	{
	PyObject arg1, arg2;
	size_t len1, len2;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 2, 2, &arg1, &arg2))
	return -1;

	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2;

	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	*lensum = len1 + len2;
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);
	{
	size_t d = lev_edit_distance(len1, string1, len2, string2, xcost);
	if (d == (size_t)(-1)) {
	PyErr_NoMemory();
	return -1;
	}
	return d;
	}
	}
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2;

	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	*lensum = len1 + len2;
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);
	{
	size_t d = lev_u_edit_distance(len1, string1, len2, string2, xcost);
	if (d == (size_t)(-1)) {
	PyErr_NoMemory();
	return -1;
	}
	return d;
	}
	}
	else {
	PyErr_Format(PyExc_TypeError,
	"%s expected two Strings or two Unicodes", name);
	return -1;
	}
	}

	static PyObject*
	distance_py(PyObject self, PyObject args)
	{
	size_t lensum;
	long int ldist;

	if ((ldist = levenshtein_common(args, "distance", 0, &lensum)) < 0)
	return NULL;

	return PyInt_FromLong((long)ldist);
	}

	static PyObject*
	ratio_py(PyObject self, PyObject args)
	{
	size_t lensum;
	long int ldist;

	if ((ldist = levenshtein_common(args, "ratio", 1, &lensum)) < 0)
	return NULL;

	if (lensum == 0)
	return PyFloat_FromDouble(1.0);

	return PyFloat_FromDouble((double)(lensum - ldist)/(lensum));
	}

	static PyObject*
	hamming_py(PyObject self, PyObject args)
	{
	PyObject arg1, arg2;
	const char *name = "hamming";
	size_t len1, len2;
	long int dist;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 2, 2, &arg1, &arg2))
	return NULL;

	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2;

	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	if (len1 != len2) {
	PyErr_Format(PyExc_ValueError,
	"%s expected two strings of the same length", name);
	return NULL;
	}
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);
	dist = lev_hamming_distance(len1, string1, string2);
	return PyInt_FromLong(dist);
	}
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2;

	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	if (len1 != len2) {
	PyErr_Format(PyExc_ValueError,
	"%s expected two unicodes of the same length", name);
	return NULL;
	}
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);
	dist = lev_u_hamming_distance(len1, string1, string2);
	return PyInt_FromLong(dist);
	}
	else {
	PyErr_Format(PyExc_TypeError,
	"%s expected two Strings or two Unicodes", name);
	return NULL;
	}
	}

	static PyObject*
	jaro_py(PyObject self, PyObject args)
	{
	PyObject arg1, arg2;
	const char *name = "jaro";
	size_t len1, len2;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 2, 2, &arg1, &arg2))
	return NULL;

	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2;

	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);
	return PyFloat_FromDouble(lev_jaro_ratio(len1, string1, len2, string2));
	}
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2;

	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);
	return PyFloat_FromDouble(lev_u_jaro_ratio(len1, string1, len2, string2));
	}
	else {
	PyErr_Format(PyExc_TypeError,
	"%s expected two Strings or two Unicodes", name);
	return NULL;
	}
	}

	static PyObject*
	jaro_winkler_py(PyObject self, PyObject args)
	{
	PyObject arg1, arg2, *arg3 = NULL;
	double pfweight = 0.1;
	const char *name = "jaro_winkler";
	size_t len1, len2;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 2, 3, &arg1, &arg2, &arg3))
	return NULL;

	if (arg3) {
	if (!PyObject_TypeCheck(arg3, &PyFloat_Type)) {
	PyErr_Format(PyExc_TypeError, "%s third argument must be a Float", name);
	return NULL;
	}
	pfweight = PyFloat_AS_DOUBLE(arg3);
	if (pfweight < 0.0) {
	PyErr_Format(PyExc_ValueError, "%s negative prefix weight", name);
	return NULL;
	}
	}

	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2;

	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);
	return PyFloat_FromDouble(lev_jaro_winkler_ratio(len1, string1,
	len2, string2,
	pfweight));
	}
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2;

	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);
	return PyFloat_FromDouble(lev_u_jaro_winkler_ratio(len1, string1,
	len2, string2,
	pfweight));
	}
	else {
	PyErr_Format(PyExc_TypeError,
	"%s expected two Strings or two Unicodes", name);
	return NULL;
	}
	}

	static PyObject*
	median_py(PyObject self, PyObject args)
	{
	MedianFuncs engines = { lev_greedy_median, lev_u_greedy_median };
	return median_common(args, "median", engines);
	}

	static PyObject*
	median_improve_py(PyObject self, PyObject args)
	{
	MedianImproveFuncs engines = { lev_median_improve, lev_u_median_improve };
	return median_improve_common(args, "median_improve", engines);
	}

	static PyObject*
	quickmedian_py(PyObject self, PyObject args)
	{
	MedianFuncs engines = { lev_quick_median, lev_u_quick_median };
	return median_common(args, "quickmedian", engines);
	}

	static PyObject*
	setmedian_py(PyObject self, PyObject args)
	{
	MedianFuncs engines = { lev_set_median, lev_u_set_median };
	return median_common(args, "setmedian", engines);
	}

	static PyObject*
	median_common(PyObject args, const char name, MedianFuncs foo)
	{
	size_t n, len;
	void *strings = NULL;
	size_t *sizes = NULL;
	PyObject *strlist = NULL;
	PyObject *wlist = NULL;
	PyObject *strseq = NULL;
	double *weights;
	int stringtype;
	PyObject *result = NULL;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 1, 2, &strlist, &wlist))
	return NULL;

	if (!PySequence_Check(strlist)) {
	PyErr_Format(PyExc_TypeError,
	"%s first argument must be a Sequence", name);
	return NULL;
	}
	strseq = PySequence_Fast(strlist, name);

	n = PySequence_Fast_GET_SIZE(strseq);
	if (n == 0) {
	Py_INCREF(Py_None);
	Py_DECREF(strseq);
	return Py_None;
	}

	/* get (optional) weights, use 1 if none specified. */
	weights = extract_weightlist(wlist, name, n);
	if (!weights) {
	Py_DECREF(strseq);
	return NULL;
	}

	stringtype = extract_stringlist(strseq, name, n, &sizes, &strings);
	Py_DECREF(strseq);
	if (stringtype < 0) {
	free(weights);
	return NULL;
	}

	if (stringtype == 0) {
	lev_byte *medstr = foo.s(n, sizes, strings, weights, &len);
	if (!medstr && len)
	result = PyErr_NoMemory();
	else {
	result = PyString_FromStringAndSize(medstr, len);
	free(medstr);
	}
	}
	else if (stringtype == 1) {
	Py_UNICODE *medstr = foo.u(n, sizes, strings, weights, &len);
	if (!medstr && len)
	result = PyErr_NoMemory();
	else {
	result = PyUnicode_FromUnicode(medstr, len);
	free(medstr);
	}
	}
	else
	PyErr_Format(PyExc_SystemError, "%s internal error", name);

	free(strings);
	free(weights);
	free(sizes);
	return result;
	}

	static PyObject*
	median_improve_common(PyObject args, const char name, MedianImproveFuncs foo)
	{
	size_t n, len;
	void *strings = NULL;
	size_t *sizes = NULL;
	PyObject *arg1 = NULL;
	PyObject *strlist = NULL;
	PyObject *wlist = NULL;
	PyObject *strseq = NULL;
	double *weights;
	int stringtype;
	PyObject *result = NULL;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 2, 3, &arg1, &strlist, &wlist))
	return NULL;

	if (PyObject_TypeCheck(arg1, &PyString_Type))
	stringtype = 0;
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type))
	stringtype = 1;
	else {
	PyErr_Format(PyExc_TypeError,
	"%s first argument must be a String or Unicode", name);
	return NULL;
	}

	if (!PySequence_Check(strlist)) {
	PyErr_Format(PyExc_TypeError,
	"%s second argument must be a Sequence", name);
	return NULL;
	}
	strseq = PySequence_Fast(strlist, name);

	n = PySequence_Fast_GET_SIZE(strseq);
	if (n == 0) {
	Py_INCREF(Py_None);
	Py_DECREF(strseq);
	return Py_None;
	}

	/* get (optional) weights, use 1 if none specified. */
	weights = extract_weightlist(wlist, name, n);
	if (!weights) {
	Py_DECREF(strseq);
	return NULL;
	}

	if (extract_stringlist(strseq, name, n, &sizes, &strings) != stringtype) {
	PyErr_Format(PyExc_TypeError,
	"%s argument types don't match", name);
	free(weights);
	return NULL;
	}

	Py_DECREF(strseq);
	if (stringtype == 0) {
	lev_byte *s = PyString_AS_STRING(arg1);
	size_t l = PyString_GET_SIZE(arg1);
	lev_byte *medstr = foo.s(l, s, n, sizes, strings, weights, &len);
	if (!medstr && len)
	result = PyErr_NoMemory();
	else {
	result = PyString_FromStringAndSize(medstr, len);
	free(medstr);
	}
	}
	else if (stringtype == 1) {
	Py_UNICODE *s = PyUnicode_AS_UNICODE(arg1);
	size_t l = PyUnicode_GET_SIZE(arg1);
	Py_UNICODE *medstr = foo.u(l, s, n, sizes, strings, weights, &len);
	if (!medstr && len)
	result = PyErr_NoMemory();
	else {
	result = PyUnicode_FromUnicode(medstr, len);
	free(medstr);
	}
	}
	else
	PyErr_Format(PyExc_SystemError, "%s internal error", name);

	free(strings);
	free(weights);
	free(sizes);
	return result;
	}

	static double*
	extract_weightlist(PyObject wlist, const char name, size_t n)
	{
	size_t i;
	double *weights = NULL;
	PyObject *seq;

	if (wlist) {
	if (!PySequence_Check(wlist)) {
	PyErr_Format(PyExc_TypeError,
	"%s second argument must be a Sequence", name);
	return NULL;
	}
	seq = PySequence_Fast(wlist, name);
	if (PySequence_Fast_GET_SIZE(wlist) != n) {
	PyErr_Format(PyExc_ValueError,
	"%s got %i strings but %i weights",
	name, n, PyList_GET_SIZE(wlist));
	Py_DECREF(seq);
	return NULL;
	}
	weights = (double*)safe_malloc(n, sizeof(double));
	if (!weights)
	return (double*)PyErr_NoMemory();
	for (i = 0; i < n; i++) {
	PyObject *item = PySequence_Fast_GET_ITEM(wlist, i);
	PyObject *number = PyNumber_Float(item);

	if (!number) {
	free(weights);
	PyErr_Format(PyExc_TypeError,
	"%s weight #%i is not a Number", name, i);
	Py_DECREF(seq);
	return NULL;
	}
	weights[i] = PyFloat_AS_DOUBLE(number);
	Py_DECREF(number);
	if (weights[i] < 0) {
	free(weights);
	PyErr_Format(PyExc_ValueError,
	"%s weight #%i is negative", name, i);
	Py_DECREF(seq);
	return NULL;
	}
	}
	Py_DECREF(seq);
	}
	else {
	weights = (double*)safe_malloc(n, sizeof(double));
	if (!weights)
	return (double*)PyErr_NoMemory();
	for (i = 0; i < n; i++)
	weights[i] = 1.0;
	}

	return weights;
	}

	/* extract a list of strings or unicode strings, returns
	* 0 -- strings
	* 1 -- unicode strings
	* <0 -- failure
	*/
	static int
	extract_stringlist(PyObject list, const char name,
	size_t n, size_t *sizelist, void strlist)
	{
	size_t i;
	PyObject *first;

	/* check first object type. when it's a string then all others must be
	* strings too; when it's a unicode string then all others must be unicode
	* strings too. */
	first = PySequence_Fast_GET_ITEM(list, 0);
	/* i don't exactly understand why the problem doesn't exhibit itself earlier
	* but a queer error message is better than a segfault :o) */
	if (first == (PyObject*)-1) {
	PyErr_Format(PyExc_TypeError,
	"%s undecomposable Sequence???", name);
	return -1;
	}

	if (PyObject_TypeCheck(first, &PyString_Type)) {
	lev_byte **strings;
	size_t *sizes;

	strings = (lev_byte*)safe_malloc(n, sizeof(lev_byte));
	if (!strings) {
	PyErr_Format(PyExc_MemoryError,
	"%s cannot allocate memory", name);
	return -1;
	}
	sizes = (size_t*)safe_malloc(n, sizeof(size_t));
	if (!sizes) {
	free(strings);
	PyErr_Format(PyExc_MemoryError,
	"%s cannot allocate memory", name);
	return -1;
	}

	strings[0] = PyString_AS_STRING(first);
	sizes[0] = PyString_GET_SIZE(first);
	for (i = 1; i < n; i++) {
	PyObject *item = PySequence_Fast_GET_ITEM(list, i);

	if (!PyObject_TypeCheck(item, &PyString_Type)) {
	free(strings);
	free(sizes);
	PyErr_Format(PyExc_TypeError,
	"%s item #%i is not a String", name, i);
	return -1;
	}
	strings[i] = PyString_AS_STRING(item);
	sizes[i] = PyString_GET_SIZE(item);
	}

	(lev_byte**)strlist = strings;
	*sizelist = sizes;
	return 0;
	}
	if (PyObject_TypeCheck(first, &PyUnicode_Type)) {
	Py_UNICODE **strings;
	size_t *sizes;

	strings = (Py_UNICODE*)safe_malloc(n, sizeof(Py_UNICODE));
	if (!strings) {
	PyErr_NoMemory();
	return -1;
	}
	sizes = (size_t*)safe_malloc(n, sizeof(size_t));
	if (!sizes) {
	free(strings);
	PyErr_NoMemory();
	return -1;
	}

	strings[0] = PyUnicode_AS_UNICODE(first);
	sizes[0] = PyUnicode_GET_SIZE(first);
	for (i = 1; i < n; i++) {
	PyObject *item = PySequence_Fast_GET_ITEM(list, i);

	if (!PyObject_TypeCheck(item, &PyUnicode_Type)) {
	free(strings);
	free(sizes);
	PyErr_Format(PyExc_TypeError,
	"%s item #%i is not a Unicode", name, i);
	return -1;
	}
	strings[i] = PyUnicode_AS_UNICODE(item);
	sizes[i] = PyUnicode_GET_SIZE(item);
	}

	(Py_UNICODE**)strlist = strings;
	*sizelist = sizes;
	return 1;
	}

	PyErr_Format(PyExc_TypeError,
	"%s expected list of Strings or Unicodes", name);
	return -1;
	}

	static PyObject*
	seqratio_py(PyObject self, PyObject args)
	{
	SetSeqFuncs engines = { lev_edit_seq_distance, lev_u_edit_seq_distance };
	size_t lensum;
	double r = setseq_common(args, "seqratio", engines, &lensum);
	if (r < 0)
	return NULL;
	if (lensum == 0)
	return PyFloat_FromDouble(1.0);
	return PyFloat_FromDouble((lensum - r)/lensum);
	}

	static PyObject*
	setratio_py(PyObject self, PyObject args)
	{
	SetSeqFuncs engines = { lev_set_distance, lev_u_set_distance };
	size_t lensum;
	double r = setseq_common(args, "setratio", engines, &lensum);
	if (r < 0)
	return NULL;
	if (lensum == 0)
	return PyFloat_FromDouble(1.0);
	return PyFloat_FromDouble((lensum - r)/lensum);
	}

	static double
	setseq_common(PyObject args, const char name, SetSeqFuncs foo,
	size_t *lensum)
	{
	size_t n1, n2;
	void *strings1 = NULL;
	void *strings2 = NULL;
	size_t *sizes1 = NULL;
	size_t *sizes2 = NULL;
	PyObject *strlist1;
	PyObject *strlist2;
	PyObject *strseq1;
	PyObject *strseq2;
	int stringtype1, stringtype2;
	double r = -1.0;

	if (!PyArg_UnpackTuple(args, PYARGCFIX(name), 2, 2, &strlist1, &strlist2))
	return r;

	if (!PySequence_Check(strlist1)) {
	PyErr_Format(PyExc_TypeError,
	"%s first argument must be a Sequence", name);
	return r;
	}
	if (!PySequence_Check(strlist2)) {
	PyErr_Format(PyExc_TypeError,
	"%s second argument must be a Sequence", name);
	return r;
	}

	strseq1 = PySequence_Fast(strlist1, name);
	strseq2 = PySequence_Fast(strlist2, name);

	n1 = PySequence_Fast_GET_SIZE(strseq1);
	n2 = PySequence_Fast_GET_SIZE(strseq2);
	*lensum = n1 + n2;
	if (n1 == 0) {
	Py_DECREF(strseq1);
	Py_DECREF(strseq2);
	return (double)n2;
	}
	if (n2 == 0) {
	Py_DECREF(strseq1);
	Py_DECREF(strseq2);
	return (double)n1;
	}

	stringtype1 = extract_stringlist(strseq1, name, n1, &sizes1, &strings1);
	Py_DECREF(strseq1);
	if (stringtype1 < 0) {
	Py_DECREF(strseq2);
	return r;
	}
	stringtype2 = extract_stringlist(strseq2, name, n2, &sizes2, &strings2);
	Py_DECREF(strseq2);
	if (stringtype2 < 0) {
	free(sizes1);
	free(strings1);
	return r;
	}

	if (stringtype1 != stringtype2) {
	PyErr_Format(PyExc_TypeError,
	"%s both sequences must consist of items of the same type",
	name);
	}
	else if (stringtype1 == 0) {
	r = foo.s(n1, sizes1, strings1, n2, sizes2, strings2);
	if (r < 0.0)
	PyErr_NoMemory();
	}
	else if (stringtype1 == 1) {
	r = foo.u(n1, sizes1, strings1, n2, sizes2, strings2);
	if (r < 0.0)
	PyErr_NoMemory();
	}
	else
	PyErr_Format(PyExc_SystemError, "%s internal error", name);

	free(strings1);
	free(strings2);
	free(sizes1);
	free(sizes2);
	return r;
	}

	static inline LevEditType
	string_to_edittype(PyObject *string)
	{
	const char *s;
	size_t i, len;

	for (i = 0; i < N_OPCODE_NAMES; i++) {
	if (string == opcode_names[i].pystring)
	return i;
	}

	/* With Python >= 2.2, we shouldn't get here, except when the strings are
	* not Strings but subtypes. */
	#ifdef LEV_PYTHON3
	/* For Python 3, the string is an unicode object; use CompareWithAsciiString */
	if (!PyUnicode_Check(string)) {
	return LEV_EDIT_LAST;
	}

	for (i = 0; i < N_OPCODE_NAMES; i++) {
	if (PyUnicode_CompareWithASCIIString(string, opcode_names[i].cstring) == 0) {
	return i;
	}
	}

	#else

	if (!PyString_Check(string))
	return LEV_EDIT_LAST;

	s = PyString_AS_STRING(string);
	len = PyString_GET_SIZE(string);
	for (i = 0; i < N_OPCODE_NAMES; i++) {
	if (len == opcode_names[i].len
	&& memcmp(s, opcode_names[i].cstring, len) == 0) {
	return i;
	}
	}
	#endif

	return LEV_EDIT_LAST;
	}

	static LevEditOp*
	extract_editops(PyObject *list)
	{
	LevEditOp *ops;
	size_t i;
	LevEditType type;
	size_t n = PyList_GET_SIZE(list);

	ops = (LevEditOp*)safe_malloc(n, sizeof(LevEditOp));
	if (!ops)
	return (LevEditOp*)PyErr_NoMemory();
	for (i = 0; i < n; i++) {
	PyObject *item;
	PyObject *tuple = PyList_GET_ITEM(list, i);

	if (!PyTuple_Check(tuple) \|\| PyTuple_GET_SIZE(tuple) != 3) {
	free(ops);
	return NULL;
	}
	item = PyTuple_GET_ITEM(tuple, 0);
	if ((type = string_to_edittype(item)) == LEV_EDIT_LAST) {
	free(ops);
	return NULL;
	}
	ops[i].type = type;
	item = PyTuple_GET_ITEM(tuple, 1);
	if (!PyInt_Check(item)) {
	free(ops);
	return NULL;
	}
	ops[i].spos = (size_t)PyInt_AS_LONG(item);
	item = PyTuple_GET_ITEM(tuple, 2);
	if (!PyInt_Check(item)) {
	free(ops);
	return NULL;
	}
	ops[i].dpos = (size_t)PyInt_AS_LONG(item);
	}
	return ops;
	}

	static LevOpCode*
	extract_opcodes(PyObject *list)
	{
	LevOpCode *bops;
	size_t i;
	LevEditType type;
	size_t nb = PyList_GET_SIZE(list);

	bops = (LevOpCode*)safe_malloc(nb, sizeof(LevOpCode));
	if (!bops)
	return (LevOpCode*)PyErr_NoMemory();
	for (i = 0; i < nb; i++) {
	PyObject *item;
	PyObject *tuple = PyList_GET_ITEM(list, i);

	if (!PyTuple_Check(tuple) \|\| PyTuple_GET_SIZE(tuple) != 5) {
	free(bops);
	return NULL;
	}

	item = PyTuple_GET_ITEM(tuple, 0);
	if ((type = string_to_edittype(item)) == LEV_EDIT_LAST) {
	free(bops);
	return NULL;
	}
	bops[i].type = type;

	item = PyTuple_GET_ITEM(tuple, 1);
	if (!PyInt_Check(item)) {
	free(bops);
	return NULL;
	}
	bops[i].sbeg = (size_t)PyInt_AS_LONG(item);

	item = PyTuple_GET_ITEM(tuple, 2);
	if (!PyInt_Check(item)) {
	free(bops);
	return NULL;
	}
	bops[i].send = (size_t)PyInt_AS_LONG(item);

	item = PyTuple_GET_ITEM(tuple, 3);
	if (!PyInt_Check(item)) {
	free(bops);
	return NULL;
	}
	bops[i].dbeg = (size_t)PyInt_AS_LONG(item);

	item = PyTuple_GET_ITEM(tuple, 4);
	if (!PyInt_Check(item)) {
	free(bops);
	return NULL;
	}
	bops[i].dend = (size_t)PyInt_AS_LONG(item);
	}
	return bops;
	}

	static PyObject*
	editops_to_tuple_list(size_t n, LevEditOp *ops)
	{
	PyObject *list;
	size_t i;

	list = PyList_New(n);
	for (i = 0; i < n; i++, ops++) {
	PyObject *tuple = PyTuple_New(3);
	PyObject *is = opcode_names[ops->type].pystring;
	Py_INCREF(is);
	PyTuple_SET_ITEM(tuple, 0, is);
	PyTuple_SET_ITEM(tuple, 1, PyInt_FromLong((long)ops->spos));
	PyTuple_SET_ITEM(tuple, 2, PyInt_FromLong((long)ops->dpos));
	PyList_SET_ITEM(list, i, tuple);
	}

	return list;
	}

	static PyObject*
	matching_blocks_to_tuple_list(size_t len1, size_t len2,
	size_t nmb, LevMatchingBlock *mblocks)
	{
	PyObject list, tuple;
	size_t i;

	list = PyList_New(nmb + 1);
	for (i = 0; i < nmb; i++, mblocks++) {
	tuple = PyTuple_New(3);
	PyTuple_SET_ITEM(tuple, 0, PyInt_FromLong((long)mblocks->spos));
	PyTuple_SET_ITEM(tuple, 1, PyInt_FromLong((long)mblocks->dpos));
	PyTuple_SET_ITEM(tuple, 2, PyInt_FromLong((long)mblocks->len));
	PyList_SET_ITEM(list, i, tuple);
	}
	tuple = PyTuple_New(3);
	PyTuple_SET_ITEM(tuple, 0, PyInt_FromLong((long)len1));
	PyTuple_SET_ITEM(tuple, 1, PyInt_FromLong((long)len2));
	PyTuple_SET_ITEM(tuple, 2, PyInt_FromLong((long)0));
	PyList_SET_ITEM(list, nmb, tuple);

	return list;
	}

	static size_t
	get_length_of_anything(PyObject *object)
	{
	if (PyInt_Check(object)) {
	long int len = PyInt_AS_LONG(object);
	if (len < 0)
	len = -1;
	return (size_t)len;
	}
	if (PySequence_Check(object))
	return PySequence_Length(object);
	return (size_t)-1;
	}

	static PyObject*
	editops_py(PyObject self, PyObject args)
	{
	PyObject arg1, arg2, *arg3 = NULL;
	PyObject *oplist;
	size_t len1, len2, n;
	LevEditOp *ops;
	LevOpCode *bops;

	if (!PyArg_UnpackTuple(args, PYARGCFIX("editops"), 2, 3,
	&arg1, &arg2, &arg3)) {
	return NULL;
	}

	/* convert: we were called (bops, s1, s2) */
	if (arg3) {
	if (!PyList_Check(arg1)) {
	PyErr_Format(PyExc_ValueError,
	"editops first argument must be a List of edit operations");
	return NULL;
	}
	n = PyList_GET_SIZE(arg1);
	if (!n) {
	Py_INCREF(arg1);
	return arg1;
	}
	len1 = get_length_of_anything(arg2);
	len2 = get_length_of_anything(arg3);
	if (len1 == (size_t)-1 \|\| len2 == (size_t)-1) {
	PyErr_Format(PyExc_ValueError,
	"editops second and third argument must specify sizes");
	return NULL;
	}

	if ((bops = extract_opcodes(arg1)) != NULL) {
	if (lev_opcodes_check_errors(len1, len2, n, bops)) {
	PyErr_Format(PyExc_ValueError,
	"editops edit operation list is invalid");
	free(bops);
	return NULL;
	}
	ops = lev_opcodes_to_editops(n, bops, &n, 0); /* XXX: different n's! */
	if (!ops && n) {
	free(bops);
	return PyErr_NoMemory();
	}
	oplist = editops_to_tuple_list(n, ops);
	free(ops);
	free(bops);
	return oplist;
	}
	if ((ops = extract_editops(arg1)) != NULL) {
	if (lev_editops_check_errors(len1, len2, n, ops)) {
	PyErr_Format(PyExc_ValueError,
	"editops edit operation list is invalid");
	free(ops);
	return NULL;
	}
	free(ops);
	Py_INCREF(arg1); /* editops -> editops is identity */
	return arg1;
	}
	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"editops first argument must be a List of edit operations");
	return NULL;
	}

	/* find editops: we were called (s1, s2) */
	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2;

	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);
	ops = lev_editops_find(len1, string1, len2, string2, &n);
	}
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2;

	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);
	ops = lev_u_editops_find(len1, string1, len2, string2, &n);
	}
	else {
	PyErr_Format(PyExc_TypeError,
	"editops expected two Strings or two Unicodes");
	return NULL;
	}
	if (!ops && n)
	return PyErr_NoMemory();
	oplist = editops_to_tuple_list(n, ops);
	free(ops);
	return oplist;
	}

	static PyObject*
	opcodes_to_tuple_list(size_t nb, LevOpCode *bops)
	{
	PyObject *list;
	size_t i;

	list = PyList_New(nb);
	for (i = 0; i < nb; i++, bops++) {
	PyObject *tuple = PyTuple_New(5);
	PyObject *is = opcode_names[bops->type].pystring;
	Py_INCREF(is);
	PyTuple_SET_ITEM(tuple, 0, is);
	PyTuple_SET_ITEM(tuple, 1, PyInt_FromLong((long)bops->sbeg));
	PyTuple_SET_ITEM(tuple, 2, PyInt_FromLong((long)bops->send));
	PyTuple_SET_ITEM(tuple, 3, PyInt_FromLong((long)bops->dbeg));
	PyTuple_SET_ITEM(tuple, 4, PyInt_FromLong((long)bops->dend));
	PyList_SET_ITEM(list, i, tuple);
	}

	return list;
	}

	static PyObject*
	opcodes_py(PyObject self, PyObject args)
	{
	PyObject arg1, arg2, *arg3 = NULL;
	PyObject *oplist;
	size_t len1, len2, n, nb;
	LevEditOp *ops;
	LevOpCode *bops;

	if (!PyArg_UnpackTuple(args, PYARGCFIX("opcodes"), 2, 3,
	&arg1, &arg2, &arg3))
	return NULL;

	/* convert: we were called (ops, s1, s2) */
	if (arg3) {
	if (!PyList_Check(arg1)) {
	PyErr_Format(PyExc_TypeError,
	"opcodes first argument must be a List of edit operations");
	return NULL;
	}
	n = PyList_GET_SIZE(arg1);
	len1 = get_length_of_anything(arg2);
	len2 = get_length_of_anything(arg3);
	if (len1 == (size_t)-1 \|\| len2 == (size_t)-1) {
	PyErr_Format(PyExc_ValueError,
	"opcodes second and third argument must specify sizes");
	return NULL;
	}

	if ((ops = extract_editops(arg1)) != NULL) {
	if (lev_editops_check_errors(len1, len2, n, ops)) {
	PyErr_Format(PyExc_ValueError,
	"opcodes edit operation list is invalid");
	free(ops);
	return NULL;
	}
	bops = lev_editops_to_opcodes(n, ops, &n, len1, len2); /* XXX: n != n */
	if (!bops && n) {
	free(ops);
	return PyErr_NoMemory();
	}
	oplist = opcodes_to_tuple_list(n, bops);
	free(bops);
	free(ops);
	return oplist;
	}
	if ((bops = extract_opcodes(arg1)) != NULL) {
	if (lev_opcodes_check_errors(len1, len2, n, bops)) {
	PyErr_Format(PyExc_ValueError,
	"opcodes edit operation list is invalid");
	free(bops);
	return NULL;
	}
	free(bops);
	Py_INCREF(arg1); /* opcodes -> opcodes is identity */
	return arg1;
	}
	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"opcodes first argument must be a List of edit operations");
	return NULL;
	}

	/* find opcodes: we were called (s1, s2) */
	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2;

	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);
	ops = lev_editops_find(len1, string1, len2, string2, &n);
	}
	else if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2;

	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);
	ops = lev_u_editops_find(len1, string1, len2, string2, &n);
	}
	else {
	PyErr_Format(PyExc_TypeError,
	"opcodes expected two Strings or two Unicodes");
	return NULL;
	}
	if (!ops && n)
	return PyErr_NoMemory();
	bops = lev_editops_to_opcodes(n, ops, &nb, len1, len2);
	free(ops);
	if (!bops && nb)
	return PyErr_NoMemory();
	oplist = opcodes_to_tuple_list(nb, bops);
	free(bops);
	return oplist;
	}

	static PyObject*
	inverse_py(PyObject self, PyObject args)
	{
	PyObject list, result;
	size_t n;
	LevEditOp *ops;
	LevOpCode *bops;

	if (!PyArg_UnpackTuple(args, PYARGCFIX("inverse"), 1, 1, &list)
	\|\| !PyList_Check(list))
	return NULL;

	n = PyList_GET_SIZE(list);
	if (!n) {
	Py_INCREF(list);
	return list;
	}
	if ((ops = extract_editops(list)) != NULL) {
	lev_editops_invert(n, ops);
	result = editops_to_tuple_list(n, ops);
	free(ops);
	return result;
	}
	if ((bops = extract_opcodes(list)) != NULL) {
	lev_opcodes_invert(n, bops);
	result = opcodes_to_tuple_list(n, bops);
	free(bops);
	return result;
	}

	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"inverse expected a list of edit operations");
	return NULL;
	}

	static PyObject*
	apply_edit_py(PyObject self, PyObject args)
	{
	PyObject list, result, arg1, arg2;
	size_t n, len, len1, len2;
	LevEditOp *ops;
	LevOpCode *bops;

	if (!PyArg_UnpackTuple(args, PYARGCFIX("apply_edit"), 3, 3,
	&list, &arg1, &arg2))
	return NULL;

	if (!PyList_Check(list)) {
	PyErr_Format(PyExc_TypeError,
	"apply_edit first argument must be a List of edit operations");
	return NULL;
	}
	n = PyList_GET_SIZE(list);

	if (PyObject_TypeCheck(arg1, &PyString_Type)
	&& PyObject_TypeCheck(arg2, &PyString_Type)) {
	lev_byte string1, string2, *s;

	if (!n) {
	Py_INCREF(arg1);
	return arg1;
	}
	len1 = PyString_GET_SIZE(arg1);
	len2 = PyString_GET_SIZE(arg2);
	string1 = PyString_AS_STRING(arg1);
	string2 = PyString_AS_STRING(arg2);

	if ((ops = extract_editops(list)) != NULL) {
	if (lev_editops_check_errors(len1, len2, n, ops)) {
	PyErr_Format(PyExc_ValueError,
	"apply_edit edit operations are invalid or inapplicable");
	free(ops);
	return NULL;
	}
	s = lev_editops_apply(len1, string1, len2, string2,
	n, ops, &len);
	free(ops);
	if (!s && len)
	return PyErr_NoMemory();
	result = PyString_FromStringAndSize(s, len);
	free(s);
	return result;
	}
	if ((bops = extract_opcodes(list)) != NULL) {
	if (lev_opcodes_check_errors(len1, len2, n, bops)) {
	PyErr_Format(PyExc_ValueError,
	"apply_edit edit operations are invalid or inapplicable");
	free(bops);
	return NULL;
	}
	s = lev_opcodes_apply(len1, string1, len2, string2,
	n, bops, &len);
	free(bops);
	if (!s && len)
	return PyErr_NoMemory();
	result = PyString_FromStringAndSize(s, len);
	free(s);
	return result;
	}

	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"apply_edit first argument must be "
	"a list of edit operations");
	return NULL;
	}
	if (PyObject_TypeCheck(arg1, &PyUnicode_Type)
	&& PyObject_TypeCheck(arg2, &PyUnicode_Type)) {
	Py_UNICODE string1, string2, *s;

	if (!n) {
	Py_INCREF(arg1);
	return arg1;
	}
	len1 = PyUnicode_GET_SIZE(arg1);
	len2 = PyUnicode_GET_SIZE(arg2);
	string1 = PyUnicode_AS_UNICODE(arg1);
	string2 = PyUnicode_AS_UNICODE(arg2);

	if ((ops = extract_editops(list)) != NULL) {
	if (lev_editops_check_errors(len1, len2, n, ops)) {
	PyErr_Format(PyExc_ValueError,
	"apply_edit edit operations are invalid or inapplicable");
	free(ops);
	return NULL;
	}
	s = lev_u_editops_apply(len1, string1, len2, string2,
	n, ops, &len);
	free(ops);
	if (!s && len)
	return PyErr_NoMemory();
	result = PyUnicode_FromUnicode(s, len);
	free(s);
	return result;
	}
	if ((bops = extract_opcodes(list)) != NULL) {
	if (lev_opcodes_check_errors(len1, len2, n, bops)) {
	PyErr_Format(PyExc_ValueError,
	"apply_edit edit operations are invalid or inapplicable");
	free(bops);
	return NULL;
	}
	s = lev_u_opcodes_apply(len1, string1, len2, string2,
	n, bops, &len);
	free(bops);
	if (!s && len)
	return PyErr_NoMemory();
	result = PyUnicode_FromUnicode(s, len);
	free(s);
	return result;
	}

	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"apply_edit first argument must be "
	"a list of edit operations");
	return NULL;
	}

	PyErr_Format(PyExc_TypeError,
	"apply_edit expected two Strings or two Unicodes");
	return NULL;
	}

	static PyObject*
	matching_blocks_py(PyObject self, PyObject args)
	{
	PyObject list, arg1, arg2, result;
	size_t n, nmb, len1, len2;
	LevEditOp *ops;
	LevOpCode *bops;
	LevMatchingBlock *mblocks;

	if (!PyArg_UnpackTuple(args, PYARGCFIX("matching_blocks"), 3, 3,
	&list, &arg1, &arg2)
	\|\| !PyList_Check(list))
	return NULL;

	if (!PyList_Check(list)) {
	PyErr_Format(PyExc_TypeError,
	"matching_blocks first argument must be "
	"a List of edit operations");
	return NULL;
	}
	n = PyList_GET_SIZE(list);
	len1 = get_length_of_anything(arg1);
	len2 = get_length_of_anything(arg2);
	if (len1 == (size_t)-1 \|\| len2 == (size_t)-1) {
	PyErr_Format(PyExc_ValueError,
	"matching_blocks second and third argument "
	"must specify sizes");
	return NULL;
	}

	if ((ops = extract_editops(list)) != NULL) {
	if (lev_editops_check_errors(len1, len2, n, ops)) {
	PyErr_Format(PyExc_ValueError,
	"apply_edit edit operations are invalid or inapplicable");
	free(ops);
	return NULL;
	}
	mblocks = lev_editops_matching_blocks(len1, len2, n, ops, &nmb);
	free(ops);
	if (!mblocks && nmb)
	return PyErr_NoMemory();
	result = matching_blocks_to_tuple_list(len1, len2, nmb, mblocks);
	free(mblocks);
	return result;
	}
	if ((bops = extract_opcodes(list)) != NULL) {
	if (lev_opcodes_check_errors(len1, len2, n, bops)) {
	PyErr_Format(PyExc_ValueError,
	"apply_edit edit operations are invalid or inapplicable");
	free(bops);
	return NULL;
	}
	mblocks = lev_opcodes_matching_blocks(len1, len2, n, bops, &nmb);
	free(bops);
	if (!mblocks && nmb)
	return PyErr_NoMemory();
	result = matching_blocks_to_tuple_list(len1, len2, nmb, mblocks);
	free(mblocks);
	return result;
	}

	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"inverse expected a list of edit operations");
	return NULL;
	}

	static PyObject*
	subtract_edit_py(PyObject self, PyObject args)
	{
	PyObject list, sub, *result;
	size_t n, ns, nr;
	LevEditOp ops, osub, *orem;

	if (!PyArg_UnpackTuple(args, PYARGCFIX("subtract_edit"), 2, 2, &list, &sub)
	\|\| !PyList_Check(list))
	return NULL;

	ns = PyList_GET_SIZE(sub);
	if (!ns) {
	Py_INCREF(list);
	return list;
	}
	n = PyList_GET_SIZE(list);
	if (!n) {
	PyErr_Format(PyExc_ValueError,
	"subtract_edit subsequence is not a subsequence "
	"or is invalid");
	return NULL;
	}

	if ((ops = extract_editops(list)) != NULL) {
	if ((osub = extract_editops(sub)) != NULL) {
	orem = lev_editops_subtract(n, ops, ns, osub, &nr);
	free(ops);
	free(osub);

	if (!orem && nr == -1) {
	PyErr_Format(PyExc_ValueError,
	"subtract_edit subsequence is not a subsequence "
	"or is invalid");
	return NULL;
	}
	result = editops_to_tuple_list(nr, orem);
	free(orem);

	return result;
	}
	free(ops);
	}

	if (!PyErr_Occurred())
	PyErr_Format(PyExc_TypeError,
	"subtract_edit expected two lists of edit operations");
	return NULL;
	}


	PY_MOD_INIT_FUNC_DEF(_levenshtein)
	{
	#ifdef LEV_PYTHON3
	PyObject *module;
	#endif
	size_t i;

	PY_INIT_MOD(module, "_levenshtein", Levenshtein_DESC, methods)
	/* create intern strings for edit operation names */
	if (opcode_names[0].pystring)
	abort();
	for (i = 0; i < N_OPCODE_NAMES; i++) {
	#ifdef LEV_PYTHON3
	opcode_names[i].pystring
	= PyUnicode_InternFromString(opcode_names[i].cstring);
	#else
	opcode_names[i].pystring
	= PyString_InternFromString(opcode_names[i].cstring);
	#endif
	opcode_names[i].len = strlen(opcode_names[i].cstring);
	}
	lev_init_rng(0);
	#ifdef LEV_PYTHON3
	return module;
	#endif
	}
	/* }}} */
	#endif /* not NO_PYTHON */

	/****************************************************************************
	*
	* C (i.e. executive) part
	*
	****************************************************************************/

	/****************************************************************************
	*
	* Taus113
	*
	****************************************************************************/
	/* {{{ */

	/*
	* Based on Tausworthe random generator implementation rng/taus113.c
	* from the GNU Scientific Library (http://sources.redhat.com/gsl/)
	* which has the notice
	* Copyright (C) 2002 Atakan Gurkan
	* Based on the file taus.c which has the notice
	* Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
	*/

	static inline unsigned long
	taus113_get(taus113_state_t *state)
	{
	unsigned long b1, b2, b3, b4;

	b1 = ((((state->z1 << 6UL) & TAUS113_MASK) ^ state->z1) >> 13UL);
	state->z1 = ((((state->z1 & 4294967294UL) << 18UL) & TAUS113_MASK) ^ b1);

	b2 = ((((state->z2 << 2UL) & TAUS113_MASK) ^ state->z2) >> 27UL);
	state->z2 = ((((state->z2 & 4294967288UL) << 2UL) & TAUS113_MASK) ^ b2);

	b3 = ((((state->z3 << 13UL) & TAUS113_MASK) ^ state->z3) >> 21UL);
	state->z3 = ((((state->z3 & 4294967280UL) << 7UL) & TAUS113_MASK) ^ b3);

	b4 = ((((state->z4 << 3UL) & TAUS113_MASK) ^ state->z4) >> 12UL);
	state->z4 = ((((state->z4 & 4294967168UL) << 13UL) & TAUS113_MASK) ^ b4);

	return (state->z1 ^ state->z2 ^ state->z3 ^ state->z4);

	}

	static void
	taus113_set(taus113_state_t *state,
	unsigned long int s)
	{
	if (!s)
	s = 1UL; /* default seed is 1 */

	state->z1 = TAUS113_LCG(s);
	if (state->z1 < 2UL)
	state->z1 += 2UL;

	state->z2 = TAUS113_LCG(state->z1);
	if (state->z2 < 8UL)
	state->z2 += 8UL;

	state->z3 = TAUS113_LCG(state->z2);
	if (state->z3 < 16UL)
	state->z3 += 16UL;

	state->z4 = TAUS113_LCG(state->z3);
	if (state->z4 < 128UL)
	state->z4 += 128UL;

	/* Calling RNG ten times to satify recurrence condition */
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	taus113_get(state);
	}


	/**
	* lev_init_rng:
	* @seed: A seed. If zero, a default value (currently 1) is used instead.
	*
	* Initializes the random generator used by some Levenshtein functions.
	*
	* This does NOT happen automatically when these functions are used.
	**/
	_LEV_STATIC_PY void
	lev_init_rng(unsigned long int seed)
	{
	static taus113_state_t state;

	taus113_set(&state, seed);
	}
	/* }}} */

	/****************************************************************************
	*
	* Basic stuff, Levenshtein distance
	*
	****************************************************************************/
	/* {{{ */

	/**
	* lev_edit_distance:
	* @len1: The length of @string1.
	* @string1: A sequence of bytes of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A sequence of bytes of length @len2, may contain NUL characters.
	* @xcost: If nonzero, the replace operation has weight 2, otherwise all
	* edit operations have equal weights of 1.
	*
	* Computes Levenshtein edit distance of two strings.
	*
	* Returns: The edit distance.
	**/
	_LEV_STATIC_PY size_t
	lev_edit_distance(size_t len1, const lev_byte *string1,
	size_t len2, const lev_byte *string2,
	int xcost)
	{
	size_t i;
	size_t row; / we only need to keep one row of costs */
	size_t *end;
	size_t half;

	/* strip common prefix */
	while (len1 > 0 && len2 > 0 && string1 == string2) {
	len1--;
	len2--;
	string1++;
	string2++;
	}

	/* strip common suffix */
	while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
	len1--;
	len2--;
	}

	/* catch trivial cases */
	if (len1 == 0)
	return len2;
	if (len2 == 0)
	return len1;

	/* make the inner cycle (i.e. string2) the longer one */
	if (len1 > len2) {
	size_t nx = len1;
	const lev_byte *sx = string1;
	len1 = len2;
	len2 = nx;
	string1 = string2;
	string2 = sx;
	}
	/* check len1 == 1 separately */
	if (len1 == 1) {
	if (xcost)
	return len2 + 1 - 2(memchr(string2, string1, len2) != NULL);
	else
	return len2 - (memchr(string2, *string1, len2) != NULL);
	}
	len1++;
	len2++;
	half = len1 >> 1;

	/* initalize first row */
	row = (size_t*)safe_malloc(len2, sizeof(size_t));
	if (!row)
	return (size_t)(-1);
	end = row + len2 - 1;
	for (i = 0; i < len2 - (xcost ? 0 : half); i++)
	row[i] = i;

	/* go through the matrix and compute the costs. yes, this is an extremely
	* obfuscated version, but also extremely memory-conservative and relatively
	* fast. */
	if (xcost) {
	for (i = 1; i < len1; i++) {
	size_t *p = row + 1;
	const lev_byte char1 = string1[i - 1];
	const lev_byte *char2p = string2;
	size_t D = i;
	size_t x = i;
	while (p <= end) {
	if (char1 == *(char2p++))
	x = --D;
	else
	x++;
	D = *p;
	D++;
	if (x > D)
	x = D;
	*(p++) = x;
	}
	}
	}
	else {
	/* in this case we don't have to scan two corner triangles (of size len1/2)
	* in the matrix because no best path can go throught them. note this
	* breaks when len1 == len2 == 2 so the memchr() special case above is
	* necessary */
	row[0] = len1 - half - 1;
	for (i = 1; i < len1; i++) {
	size_t *p;
	const lev_byte char1 = string1[i - 1];
	const lev_byte *char2p;
	size_t D, x;
	/* skip the upper triangle */
	if (i >= len1 - half) {
	size_t offset = i - (len1 - half);
	size_t c3;

	char2p = string2 + offset;
	p = row + offset;
	c3 = (p++) + (char1 != (char2p++));
	x = *p;
	x++;
	D = x;
	if (x > c3)
	x = c3;
	*(p++) = x;
	}
	else {
	p = row + 1;
	char2p = string2;
	D = x = i;
	}
	/* skip the lower triangle */
	if (i <= half + 1)
	end = row + len2 + i - half - 2;
	/* main */
	while (p <= end) {
	size_t c3 = --D + (char1 != *(char2p++));
	x++;
	if (x > c3)
	x = c3;
	D = *p;
	D++;
	if (x > D)
	x = D;
	*(p++) = x;
	}
	/* lower triangle sentinel */
	if (i <= half) {
	size_t c3 = --D + (char1 != *char2p);
	x++;
	if (x > c3)
	x = c3;
	*p = x;
	}
	}
	}

	i = *end;
	free(row);
	return i;
	}

	_LEV_STATIC_PY double
	lev_edit_distance_sod(size_t len, const lev_byte *string,
	size_t n, const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	int xcost)
	{
	size_t i, d;
	double sum = 0.0;

	for (i = 0; i < n; i++) {
	d = lev_edit_distance(len, string, lengths[i], strings[i], xcost);
	if (d == (size_t)-1)
	return -1.0;
	sum += weights[i]*d;
	}
	return sum;
	}

	/**
	* lev_u_edit_distance:
	* @len1: The length of @string1.
	* @string1: A sequence of Unicode characters of length @len1, may contain NUL
	* characters.
	* @len2: The length of @string2.
	* @string2: A sequence of Unicode characters of length @len2, may contain NUL
	* characters.
	* @xcost: If nonzero, the replace operation has weight 2, otherwise all
	* edit operations have equal weights of 1.
	*
	* Computes Levenshtein edit distance of two Unicode strings.
	*
	* Returns: The edit distance.
	**/
	_LEV_STATIC_PY size_t
	lev_u_edit_distance(size_t len1, const lev_wchar *string1,
	size_t len2, const lev_wchar *string2,
	int xcost)
	{
	size_t i;
	size_t row; / we only need to keep one row of costs */
	size_t *end;
	size_t half;

	/* strip common prefix */
	while (len1 > 0 && len2 > 0 && string1 == string2) {
	len1--;
	len2--;
	string1++;
	string2++;
	}

	/* strip common suffix */
	while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
	len1--;
	len2--;
	}

	/* catch trivial cases */
	if (len1 == 0)
	return len2;
	if (len2 == 0)
	return len1;

	/* make the inner cycle (i.e. string2) the longer one */
	if (len1 > len2) {
	size_t nx = len1;
	const lev_wchar *sx = string1;
	len1 = len2;
	len2 = nx;
	string1 = string2;
	string2 = sx;
	}
	/* check len1 == 1 separately */
	if (len1 == 1) {
	lev_wchar z = *string1;
	const lev_wchar *p = string2;
	for (i = len2; i; i--) {
	if (*(p++) == z)
	return len2 - 1;
	}
	return len2 + (xcost != 0);
	}
	len1++;
	len2++;
	half = len1 >> 1;

	/* initalize first row */
	row = (size_t*)safe_malloc(len2, sizeof(size_t));
	if (!row)
	return (size_t)(-1);
	end = row + len2 - 1;
	for (i = 0; i < len2 - (xcost ? 0 : half); i++)
	row[i] = i;

	/* go through the matrix and compute the costs. yes, this is an extremely
	* obfuscated version, but also extremely memory-conservative and relatively
	* fast. */
	if (xcost) {
	for (i = 1; i < len1; i++) {
	size_t *p = row + 1;
	const lev_wchar char1 = string1[i - 1];
	const lev_wchar *char2p = string2;
	size_t D = i - 1;
	size_t x = i;
	while (p <= end) {
	if (char1 == *(char2p++))
	x = D;
	else
	x++;
	D = *p;
	if (x > D + 1)
	x = D + 1;
	*(p++) = x;
	}
	}
	}
	else {
	/* in this case we don't have to scan two corner triangles (of size len1/2)
	* in the matrix because no best path can go throught them. note this
	* breaks when len1 == len2 == 2 so the memchr() special case above is
	* necessary */
	row[0] = len1 - half - 1;
	for (i = 1; i < len1; i++) {
	size_t *p;
	const lev_wchar char1 = string1[i - 1];
	const lev_wchar *char2p;
	size_t D, x;
	/* skip the upper triangle */
	if (i >= len1 - half) {
	size_t offset = i - (len1 - half);
	size_t c3;

	char2p = string2 + offset;
	p = row + offset;
	c3 = (p++) + (char1 != (char2p++));
	x = *p;
	x++;
	D = x;
	if (x > c3)
	x = c3;
	*(p++) = x;
	}
	else {
	p = row + 1;
	char2p = string2;
	D = x = i;
	}
	/* skip the lower triangle */
	if (i <= half + 1)
	end = row + len2 + i - half - 2;
	/* main */
	while (p <= end) {
	size_t c3 = --D + (char1 != *(char2p++));
	x++;
	if (x > c3)
	x = c3;
	D = *p;
	D++;
	if (x > D)
	x = D;
	*(p++) = x;
	}
	/* lower triangle sentinel */
	if (i <= half) {
	size_t c3 = --D + (char1 != *char2p);
	x++;
	if (x > c3)
	x = c3;
	*p = x;
	}
	}
	}

	i = *end;
	free(row);
	return i;
	}

	_LEV_STATIC_PY double
	lev_u_edit_distance_sod(size_t len, const lev_wchar *string,
	size_t n, const size_t *lengths,
	const lev_wchar *strings[],
	const double *weights,
	int xcost)
	{
	size_t i, d;
	double sum = 0.0;

	for (i = 0; i < n; i++) {
	d = lev_u_edit_distance(len, string, lengths[i], strings[i], xcost);
	if (d == (size_t)-1)
	return -1.0;
	sum += weights[i]*d;
	}
	return sum;
	}
	/* }}} */


	/****************************************************************************
	*
	* Other simple distances: Hamming, Jaro, Jaro-Winkler
	*
	****************************************************************************/
	/* {{{ */
	/**
	* lev_hamming_distance:
	* @len: The length of @string1 and @string2.
	* @string1: A sequence of bytes of length @len1, may contain NUL characters.
	* @string2: A sequence of bytes of length @len2, may contain NUL characters.
	*
	* Computes Hamming distance of two strings.
	*
	* The strings must have the same length.
	*
	* Returns: The Hamming distance.
	**/
	_LEV_STATIC_PY size_t
	lev_hamming_distance(size_t len,
	const lev_byte *string1,
	const lev_byte *string2)
	{
	size_t dist, i;

	dist = 0;
	for (i = len; i; i--) {
	if ((string1++) != (string2++))
	dist++;
	}

	return dist;
	}

	/**
	* lev_u_hamming_distance:
	* @len: The length of @string1 and @string2.
	* @string1: A sequence of Unicode characters of length @len1, may contain NUL
	* characters.
	* @string2: A sequence of Unicode characters of length @len2, may contain NUL
	* characters.
	*
	* Computes Hamming distance of two strings.
	*
	* The strings must have the same length.
	*
	* Returns: The Hamming distance.
	**/
	_LEV_STATIC_PY size_t
	lev_u_hamming_distance(size_t len,
	const lev_wchar *string1,
	const lev_wchar *string2)
	{
	size_t dist, i;

	dist = 0;
	for (i = len; i; i--) {
	if ((string1++) != (string2++))
	dist++;
	}

	return dist;
	}

	/**
	* lev_jaro_ratio:
	* @len1: The length of @string1.
	* @string1: A sequence of bytes of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A sequence of bytes of length @len2, may contain NUL characters.
	*
	* Computes Jaro string similarity metric of two strings.
	*
	* Returns: The Jaro metric of @string1 and @string2.
	**/
	_LEV_STATIC_PY double
	lev_jaro_ratio(size_t len1, const lev_byte *string1,
	size_t len2, const lev_byte *string2)
	{
	size_t i, j, halflen, trans, match, to;
	size_t *idx;
	double md;

	if (len1 == 0 \|\| len2 == 0) {
	if (len1 == 0 && len2 == 0)
	return 1.0;
	return 0.0;
	}
	/* make len1 always shorter (or equally long) */
	if (len1 > len2) {
	const lev_byte *b;

	b = string1;
	string1 = string2;
	string2 = b;

	i = len1;
	len1 = len2;
	len2 = i;
	}

	halflen = (len1 + 1)/2;
	idx = (size_t*)calloc(len1, sizeof(size_t));
	if (!idx)
	return -1.0;

	/* The literature about Jaro metric is confusing as the method of assigment
	* of common characters is nowhere specified. There are several possible
	* deterministic mutual assignments of common characters of two strings.
	* We use earliest-position method, which is however suboptimal (e.g., it
	* gives two transpositions in jaro("Jaro", "Joaro") because of assigment
	* of the first `o'). No reasonable algorithm for the optimal one is
	* currently known to me. */
	match = 0;
	/* the part with allowed range overlapping left */
	for (i = 0; i < halflen; i++) {
	for (j = 0; j <= i+halflen; j++) {
	if (string1[j] == string2[i] && !idx[j]) {
	match++;
	idx[j] = match;
	break;
	}
	}
	}
	/* the part with allowed range overlapping right */
	to = len1+halflen < len2 ? len1+halflen : len2;
	for (i = halflen; i < to; i++) {
	for (j = i - halflen; j < len1; j++) {
	if (string1[j] == string2[i] && !idx[j]) {
	match++;
	idx[j] = match;
	break;
	}
	}
	}
	if (!match) {
	free(idx);
	return 0.0;
	}
	/* count transpositions */
	i = 0;
	trans = 0;
	for (j = 0; j < len1; j++) {
	if (idx[j]) {
	i++;
	if (idx[j] != i)
	trans++;
	}
	}
	free(idx);

	md = (double)match;
	return (md/len1 + md/len2 + 1.0 - trans/md/2.0)/3.0;
	}

	/**
	* lev_u_jaro_ratio:
	* @len1: The length of @string1.
	* @string1: A sequence of Unicode characters of length @len1, may contain NUL
	* characters.
	* @len2: The length of @string2.
	* @string2: A sequence of Unicode characters of length @len2, may contain NUL
	* characters.
	*
	* Computes Jaro string similarity metric of two Unicode strings.
	*
	* Returns: The Jaro metric of @string1 and @string2.
	**/
	_LEV_STATIC_PY double
	lev_u_jaro_ratio(size_t len1, const lev_wchar *string1,
	size_t len2, const lev_wchar *string2)
	{
	size_t i, j, halflen, trans, match, to;
	size_t *idx;
	double md;

	if (len1 == 0 \|\| len2 == 0) {
	if (len1 == 0 && len2 == 0)
	return 1.0;
	return 0.0;
	}
	/* make len1 always shorter (or equally long) */
	if (len1 > len2) {
	const lev_wchar *b;

	b = string1;
	string1 = string2;
	string2 = b;

	i = len1;
	len1 = len2;
	len2 = i;
	}

	halflen = (len1 + 1)/2;
	idx = (size_t*)calloc(len1, sizeof(size_t));
	if (!idx)
	return -1.0;

	match = 0;
	/* the part with allowed range overlapping left */
	for (i = 0; i < halflen; i++) {
	for (j = 0; j <= i+halflen; j++) {
	if (string1[j] == string2[i] && !idx[j]) {
	match++;
	idx[j] = match;
	break;
	}
	}
	}
	/* the part with allowed range overlapping right */
	to = len1+halflen < len2 ? len1+halflen : len2;
	for (i = halflen; i < to; i++) {
	for (j = i - halflen; j < len1; j++) {
	if (string1[j] == string2[i] && !idx[j]) {
	match++;
	idx[j] = match;
	break;
	}
	}
	}
	if (!match) {
	free(idx);
	return 0.0;
	}
	/* count transpositions */
	i = 0;
	trans = 0;
	for (j = 0; j < len1; j++) {
	if (idx[j]) {
	i++;
	if (idx[j] != i)
	trans++;
	}
	}
	free(idx);

	md = (double)match;
	return (md/len1 + md/len2 + 1.0 - trans/md/2.0)/3.0;
	}

	/**
	* lev_jaro_winkler_ratio:
	* @len1: The length of @string1.
	* @string1: A sequence of bytes of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A sequence of bytes of length @len2, may contain NUL characters.
	* @pfweight: Prefix weight, i.e., how much weight should be given to a
	* common prefix.
	*
	* Computes Jaro-Winkler string similarity metric of two strings.
	*
	* The formula is J+@pfweightP(1-J), where J is Jaro metric and P is the
	* length of common prefix.
	*
	* Returns: The Jaro-Winkler metric of @string1 and @string2.
	**/
	_LEV_STATIC_PY double
	lev_jaro_winkler_ratio(size_t len1, const lev_byte *string1,
	size_t len2, const lev_byte *string2,
	double pfweight)
	{
	double j;
	size_t p, m;

	j = lev_jaro_ratio(len1, string1, len2, string2);
	m = len1 < len2 ? len1 : len2;
	for (p = 0; p < m; p++) {
	if (string1[p] != string2[p])
	break;
	}
	j += (1.0 - j)ppfweight;
	return j > 1.0 ? 1.0 : j;
	}

	/**
	* lev_u_jaro_winkler_ratio:
	* @len1: The length of @string1.
	* @string1: A sequence of Unicode characters of length @len1, may contain NUL
	* characters.
	* @len2: The length of @string2.
	* @string2: A sequence of Unicode characters of length @len2, may contain NUL
	* characters.
	* @pfweight: Prefix weight, i.e., how much weight should be given to a
	* common prefix.
	*
	* Computes Jaro-Winkler string similarity metric of two Unicode strings.
	*
	* The formula is J+@pfweightP(1-J), where J is Jaro metric and P is the
	* length of common prefix.
	*
	* Returns: The Jaro-Winkler metric of @string1 and @string2.
	**/
	_LEV_STATIC_PY double
	lev_u_jaro_winkler_ratio(size_t len1, const lev_wchar *string1,
	size_t len2, const lev_wchar *string2,
	double pfweight)
	{
	double j;
	size_t p, m;

	j = lev_u_jaro_ratio(len1, string1, len2, string2);
	m = len1 < len2 ? len1 : len2;
	for (p = 0; p < m; p++) {
	if (string1[p] != string2[p])
	break;
	}
	j += (1.0 - j)ppfweight;
	return j > 1.0 ? 1.0 : j;
	}
	/* }}} */

	/****************************************************************************
	*
	* Generalized medians, the greedy algorithm, and greedy improvements
	*
	****************************************************************************/
	/* {{{ */

	/* compute the sets of symbols each string contains, and the set of symbols
	* in any of them (symset). meanwhile, count how many different symbols
	* there are (used below for symlist). */
	static lev_byte*
	make_symlist(size_t n, const size_t *lengths,
	const lev_byte strings[], size_t symlistlen)
	{
	short int symset; / indexed by ALL symbols, contains 1 for symbols
	present in the strings, zero for others */
	size_t i, j;
	lev_byte *symlist;

	symset = calloc(0x100, sizeof(short int));
	if (!symset) {
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	*symlistlen = 0;
	for (i = 0; i < n; i++) {
	const lev_byte *stri = strings[i];
	for (j = 0; j < lengths[i]; j++) {
	int c = stri[j];
	if (!symset[c]) {
	(*symlistlen)++;
	symset[c] = 1;
	}
	}
	}
	if (!*symlistlen) {
	free(symset);
	return NULL;
	}

	/* create dense symbol table, so we can easily iterate over only characters
	* present in the strings */
	{
	size_t pos = 0;
	symlist = (lev_byte)safe_malloc((symlistlen), sizeof(lev_byte));
	if (!symlist) {
	*symlistlen = (size_t)(-1);
	free(symset);
	return NULL;
	}
	for (j = 0; j < 0x100; j++) {
	if (symset[j])
	symlist[pos++] = (lev_byte)j;
	}
	}
	free(symset);

	return symlist;
	}

	/**
	* lev_greedy_median:
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	* @medlength: Where the length of the median should be stored.
	*
	* Finds a generalized median string of @strings using the greedy algorithm.
	*
	* Note it's considerably more efficient to give a string with weight 2
	* than to store two identical strings in @strings (with weights 1).
	*
	* Returns: The generalized median, as a newly allocated string; its length
	* is stored in @medlength.
	**/
	_LEV_STATIC_PY lev_byte*
	lev_greedy_median(size_t n, const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t i; /* usually iterates over strings (n) */
	size_t j; /* usually iterates over characters */
	size_t len; /* usually iterates over the approximate median string */
	lev_byte symlist; / list of symbols present in the strings,
	we iterate over it insead of set of all
	existing symbols */
	size_t symlistlen; /* length of symlist */
	size_t maxlen; /* maximum input string length */
	size_t stoplen; /* maximum tried median string length -- this is slightly
	higher than maxlen, because the median string may be
	longer than any of the input strings */
	size_t *rows; / Levenshtein matrix rows for each string, we need to keep
	only one previous row to construct the current one */
	size_t row; / a scratch buffer for new Levenshtein matrix row computation,
	shared among all strings */
	lev_byte median; / the resulting approximate median string */
	double mediandist; / the total distance of the best median string of
	given length. warning! mediandist[0] is total
	distance for empty string, while median[] itself
	is normally zero-based */
	size_t bestlen; /* the best approximate median string length */

	/* find all symbols */
	symlist = make_symlist(n, lengths, strings, &symlistlen);
	if (!symlist) {
	*medlength = 0;
	if (symlistlen != 0)
	return NULL;
	else
	return calloc(1, sizeof(lev_byte));
	}

	/* allocate and initialize per-string matrix rows and a common work buffer */
	rows = (size_t*)safe_malloc(n, sizeof(size_t));
	if (!rows) {
	free(symlist);
	return NULL;
	}
	maxlen = 0;
	for (i = 0; i < n; i++) {
	size_t *ri;
	size_t leni = lengths[i];
	if (leni > maxlen)
	maxlen = leni;
	ri = rows[i] = (size_t*)safe_malloc((leni + 1), sizeof(size_t));
	if (!ri) {
	for (j = 0; j < i; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}
	for (j = 0; j <= leni; j++)
	ri[j] = j;
	}
	stoplen = 2*maxlen + 1;
	row = (size_t*)safe_malloc((stoplen + 1), sizeof(size_t));
	if (!row) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}

	/* compute final cost of string of length 0 (empty string may be also
	* a valid answer) */
	median = (lev_byte*)safe_malloc(stoplen, sizeof(lev_byte));
	if (!median) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(row);
	free(symlist);
	return NULL;
	}
	mediandist = (double*)safe_malloc((stoplen + 1), sizeof(double));
	if (!mediandist) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(row);
	free(symlist);
	free(median);
	return NULL;
	}
	mediandist[0] = 0.0;
	for (i = 0; i < n; i++)
	mediandist[0] += lengths[i]*weights[i];

	/* build up the approximate median string symbol by symbol
	* XXX: we actually exit on break below, but on the same condition */
	for (len = 1; len <= stoplen; len++) {
	lev_byte symbol;
	double minminsum = LEV_INFINITY;
	row[0] = len;
	/* iterate over all symbols we may want to add */
	for (j = 0; j < symlistlen; j++) {
	double totaldist = 0.0;
	double minsum = 0.0;
	symbol = symlist[j];
	/* sum Levenshtein distances from all the strings, with given weights */
	for (i = 0; i < n; i++) {
	const lev_byte *stri = strings[i];
	size_t *p = rows[i];
	size_t leni = lengths[i];
	size_t *end = rows[i] + leni;
	size_t min = len;
	size_t x = len; /* == row[0] */
	/* compute how another row of Levenshtein matrix would look for median
	* string with this symbol added */
	while (p < end) {
	size_t D = (p++) + (symbol != (stri++));
	x++;
	if (x > D)
	x = D;
	if (x > *p + 1)
	x = *p + 1;
	if (x < min)
	min = x;
	}
	minsum += min*weights[i];
	totaldist += x*weights[i];
	}
	/* is this symbol better than all the others? */
	if (minsum < minminsum) {
	minminsum = minsum;
	mediandist[len] = totaldist;
	median[len - 1] = symbol;
	}
	}
	/* stop the iteration if we no longer need to recompute the matrix rows
	* or when we are over maxlen and adding more characters doesn't seem
	* useful */
	if (len == stoplen
	\|\| (len > maxlen && mediandist[len] > mediandist[len - 1])) {
	stoplen = len;
	break;
	}
	/* now the best symbol is known, so recompute all matrix rows for this
	* one */
	symbol = median[len - 1];
	for (i = 0; i < n; i++) {
	const lev_byte *stri = strings[i];
	size_t *oldrow = rows[i];
	size_t leni = lengths[i];
	size_t k;
	/* compute a row of Levenshtein matrix */
	for (k = 1; k <= leni; k++) {
	size_t c1 = oldrow[k] + 1;
	size_t c2 = row[k - 1] + 1;
	size_t c3 = oldrow[k - 1] + (symbol != stri[k - 1]);
	row[k] = c2 > c3 ? c3 : c2;
	if (row[k] > c1)
	row[k] = c1;
	}
	memcpy(oldrow, row, (leni + 1)*sizeof(size_t));
	}
	}

	/* find the string with minimum total distance */
	bestlen = 0;
	for (len = 1; len <= stoplen; len++) {
	if (mediandist[len] < mediandist[bestlen])
	bestlen = len;
	}

	/* clean up */
	for (i = 0; i < n; i++)
	free(rows[i]);
	free(rows);
	free(row);
	free(symlist);
	free(mediandist);

	/* return result */
	{
	lev_byte result = (lev_byte)safe_malloc(bestlen, sizeof(lev_byte));
	if (!result) {
	free(median);
	return NULL;
	}
	memcpy(result, median, bestlen*sizeof(lev_byte));
	free(median);
	*medlength = bestlen;
	return result;
	}
	}

	/*
	* Knowing the distance matrices up to some row, finish the distance
	* computations.
	*
	* string1, len1 are already shortened.
	*/
	static double
	finish_distance_computations(size_t len1, lev_byte *string1,
	size_t n, const size_t *lengths,
	const lev_byte **strings,
	const double weights, size_t *rows,
	size_t *row)
	{
	size_t *end;
	size_t i, j;
	size_t offset; /* row[0]; offset + len1 give together real len of string1 */
	double distsum = 0.0; /* sum of distances */

	/* catch trivia case */
	if (len1 == 0) {
	for (j = 0; j < n; j++)
	distsum += rows[j][lengths[j]]*weights[j];
	return distsum;
	}

	/* iterate through the strings and sum the distances */
	for (j = 0; j < n; j++) {
	size_t rowi = rows[j]; / current row */
	size_t leni = lengths[j]; /* current length */
	size_t len = len1; /* temporary len1 for suffix stripping */
	const lev_byte stringi = strings[j]; / current string */

	/* strip common suffix (prefix CAN'T be stripped) */
	while (len && leni && stringi[leni-1] == string1[len-1]) {
	len--;
	leni--;
	}

	/* catch trivial cases */
	if (len == 0) {
	distsum += rowi[leni]*weights[j];
	continue;
	}
	offset = rowi[0];
	if (leni == 0) {
	distsum += (offset + len)*weights[j];
	continue;
	}

	/* complete the matrix */
	memcpy(row, rowi, (leni + 1)*sizeof(size_t));
	end = row + leni;

	for (i = 1; i <= len; i++) {
	size_t *p = row + 1;
	const lev_byte char1 = string1[i - 1];
	const lev_byte *char2p = stringi;
	size_t D, x;

	D = x = i + offset;
	while (p <= end) {
	size_t c3 = --D + (char1 != *(char2p++));
	x++;
	if (x > c3)
	x = c3;
	D = *p;
	D++;
	if (x > D)
	x = D;
	*(p++) = x;
	}
	}
	distsum += weights[j](end);
	}

	return distsum;
	}

	/**
	* lev_median_improve:
	* @len: The length of @s.
	* @s: The approximate generalized median string to be improved.
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	* @medlength: Where the new length of the median should be stored.
	*
	* Tries to make @s a better generalized median string of @strings with
	* small perturbations.
	*
	* It never returns a string with larger SOD than @s; in the worst case, a
	* string identical to @s is returned.
	*
	* Returns: The improved generalized median, as a newly allocated string; its
	* length is stored in @medlength.
	**/
	_LEV_STATIC_PY lev_byte*
	lev_median_improve(size_t len, const lev_byte *s,
	size_t n, const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t i; /* usually iterates over strings (n) */
	size_t j; /* usually iterates over characters */
	size_t pos; /* the position in the approximate median string we are
	trying to change */
	lev_byte symlist; / list of symbols present in the strings,
	we iterate over it insead of set of all
	existing symbols */
	size_t symlistlen; /* length of symlist */
	size_t maxlen; /* maximum input string length */
	size_t stoplen; /* maximum tried median string length -- this is slightly
	higher than maxlen, because the median string may be
	longer than any of the input strings */
	size_t *rows; / Levenshtein matrix rows for each string, we need to keep
	only one previous row to construct the current one */
	size_t row; / a scratch buffer for new Levenshtein matrix row computation,
	shared among all strings */
	lev_byte median; / the resulting approximate median string */
	size_t medlen; /* the current approximate median string length */
	double minminsum; /* the current total distance sum */

	/* find all symbols */
	symlist = make_symlist(n, lengths, strings, &symlistlen);
	if (!symlist) {
	*medlength = 0;
	if (symlistlen != 0)
	return NULL;
	else
	return calloc(1, sizeof(lev_byte));
	}

	/* allocate and initialize per-string matrix rows and a common work buffer */
	rows = (size_t*)safe_malloc(n, sizeof(size_t));
	if (!rows) {
	free(symlist);
	return NULL;
	}
	maxlen = 0;
	for (i = 0; i < n; i++) {
	size_t *ri;
	size_t leni = lengths[i];
	if (leni > maxlen)
	maxlen = leni;
	ri = rows[i] = (size_t*)safe_malloc((leni + 1), sizeof(size_t));
	if (!ri) {
	for (j = 0; j < i; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}
	for (j = 0; j <= leni; j++)
	ri[j] = j;
	}
	stoplen = 2*maxlen + 1;
	row = (size_t*)safe_malloc((stoplen + 2), sizeof(size_t));
	if (!row) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}

	/* initialize median to given string */
	median = (lev_byte*)safe_malloc((stoplen+1), sizeof(lev_byte));
	if (!median) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(row);
	free(symlist);
	return NULL;
	}
	median++; /* we need -1st element for insertions a pos 0 */
	medlen = len;
	memcpy(median, s, (medlen)*sizeof(lev_byte));
	minminsum = finish_distance_computations(medlen, median,
	n, lengths, strings,
	weights, rows, row);

	/* sequentially try perturbations on all positions */
	for (pos = 0; pos <= medlen; ) {
	lev_byte orig_symbol, symbol;
	LevEditType operation;
	double sum;

	symbol = median[pos];
	operation = LEV_EDIT_KEEP;
	/* IF pos < medlength: FOREACH symbol: try to replace the symbol
	* at pos, if some lower the total distance, chooste the best */
	if (pos < medlen) {
	orig_symbol = median[pos];
	for (j = 0; j < symlistlen; j++) {
	if (symlist[j] == orig_symbol)
	continue;
	median[pos] = symlist[j];
	sum = finish_distance_computations(medlen - pos, median + pos,
	n, lengths, strings,
	weights, rows, row);
	if (sum < minminsum) {
	minminsum = sum;
	symbol = symlist[j];
	operation = LEV_EDIT_REPLACE;
	}
	}
	median[pos] = orig_symbol;
	}
	/* FOREACH symbol: try to add it at pos, if some lower the total
	* distance, chooste the best (increase medlength)
	* We simulate insertion by replacing the character at pos-1 */
	orig_symbol = *(median + pos - 1);
	for (j = 0; j < symlistlen; j++) {
	*(median + pos - 1) = symlist[j];
	sum = finish_distance_computations(medlen - pos + 1, median + pos - 1,
	n, lengths, strings,
	weights, rows, row);
	if (sum < minminsum) {
	minminsum = sum;
	symbol = symlist[j];
	operation = LEV_EDIT_INSERT;
	}
	}
	*(median + pos - 1) = orig_symbol;
	/* IF pos < medlength: try to delete the symbol at pos, if it lowers
	* the total distance remember it (decrease medlength) */
	if (pos < medlen) {
	sum = finish_distance_computations(medlen - pos - 1, median + pos + 1,
	n, lengths, strings,
	weights, rows, row);
	if (sum < minminsum) {
	minminsum = sum;
	operation = LEV_EDIT_DELETE;
	}
	}
	/* actually perform the operation */
	switch (operation) {
	case LEV_EDIT_REPLACE:
	median[pos] = symbol;
	break;

	case LEV_EDIT_INSERT:
	memmove(median+pos+1, median+pos,
	(medlen - pos)*sizeof(lev_byte));
	median[pos] = symbol;
	medlen++;
	break;

	case LEV_EDIT_DELETE:
	memmove(median+pos, median + pos+1,
	(medlen - pos-1)*sizeof(lev_byte));
	medlen--;
	break;

	default:
	break;
	}
	assert(medlen <= stoplen);
	/* now the result is known, so recompute all matrix rows and move on */
	if (operation != LEV_EDIT_DELETE) {
	symbol = median[pos];
	row[0] = pos + 1;
	for (i = 0; i < n; i++) {
	const lev_byte *stri = strings[i];
	size_t *oldrow = rows[i];
	size_t leni = lengths[i];
	size_t k;
	/* compute a row of Levenshtein matrix */
	for (k = 1; k <= leni; k++) {
	size_t c1 = oldrow[k] + 1;
	size_t c2 = row[k - 1] + 1;
	size_t c3 = oldrow[k - 1] + (symbol != stri[k - 1]);
	row[k] = c2 > c3 ? c3 : c2;
	if (row[k] > c1)
	row[k] = c1;
	}
	memcpy(oldrow, row, (leni + 1)*sizeof(size_t));
	}
	pos++;
	}
	}

	/* clean up */
	for (i = 0; i < n; i++)
	free(rows[i]);
	free(rows);
	free(row);
	free(symlist);

	/* return result */
	{
	lev_byte result = (lev_byte)safe_malloc(medlen, sizeof(lev_byte));
	if (!result) {
	free(median);
	return NULL;
	}
	*medlength = medlen;
	memcpy(result, median, medlen*sizeof(lev_byte));
	median--;
	free(median);
	return result;
	}
	}

	/* used internally in make_usymlist */
	typedef struct _HItem HItem;
	struct _HItem {
	lev_wchar c;
	HItem *n;
	};

	/* free usmylist hash
	* this is a separate function because we need it in more than one place */
	static void
	free_usymlist_hash(HItem *symmap)
	{
	size_t j;

	for (j = 0; j < 0x100; j++) {
	HItem *p = symmap + j;
	if (p->n == symmap \|\| p->n == NULL)
	continue;
	p = p->n;
	while (p) {
	HItem *q = p;
	p = p->n;
	free(q);
	}
	}
	free(symmap);
	}

	/* compute the sets of symbols each string contains, and the set of symbols
	* in any of them (symset). meanwhile, count how many different symbols
	* there are (used below for symlist). */
	static lev_wchar*
	make_usymlist(size_t n, const size_t *lengths,
	const lev_wchar strings[], size_t symlistlen)
	{
	lev_wchar *symlist;
	size_t i, j;
	HItem *symmap;

	j = 0;
	for (i = 0; i < n; i++)
	j += lengths[i];

	*symlistlen = 0;
	if (j == 0)
	return NULL;

	/* find all symbols, use a kind of hash for storage */
	symmap = (HItem*)safe_malloc(0x100, sizeof(HItem));
	if (!symmap) {
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	/* this is an ugly memory allocation avoiding hack: most hash elements
	* will probably contain none or one symbols only so, when p->n is equal
	* to symmap, it means there're no symbols yet, afters insterting the
	* first one, p->n becomes normally NULL and then it behaves like an
	* usual singly linked list */
	for (i = 0; i < 0x100; i++)
	symmap[i].n = symmap;
	for (i = 0; i < n; i++) {
	const lev_wchar *stri = strings[i];
	for (j = 0; j < lengths[i]; j++) {
	int c = stri[j];
	int key = (c + (c >> 7)) & 0xff;
	HItem *p = symmap + key;
	if (p->n == symmap) {
	p->c = c;
	p->n = NULL;
	(*symlistlen)++;
	continue;
	}
	while (p->c != c && p->n != NULL)
	p = p->n;
	if (p->c != c) {
	p->n = (HItem*)malloc(sizeof(HItem));
	if (!p->n) {
	free_usymlist_hash(symmap);
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	p = p->n;
	p->n = NULL;
	p->c = c;
	(*symlistlen)++;
	}
	}
	}
	/* create dense symbol table, so we can easily iterate over only characters
	* present in the strings */
	{
	size_t pos = 0;
	symlist = (lev_wchar)safe_malloc((symlistlen), sizeof(lev_wchar));
	if (!symlist) {
	free_usymlist_hash(symmap);
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	for (j = 0; j < 0x100; j++) {
	HItem *p = symmap + j;
	while (p != NULL && p->n != symmap) {
	symlist[pos++] = p->c;
	p = p->n;
	}
	}
	}

	/* free memory */
	free_usymlist_hash(symmap);

	return symlist;
	}

	/**
	* lev_u_greedy_median:
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	* @medlength: Where the length of the median should be stored.
	*
	* Finds a generalized median string of @strings using the greedy algorithm.
	*
	* Note it's considerably more efficient to give a string with weight 2
	* than to store two identical strings in @strings (with weights 1).
	*
	* Returns: The generalized median, as a newly allocated string; its length
	* is stored in @medlength.
	**/
	_LEV_STATIC_PY lev_wchar*
	lev_u_greedy_median(size_t n, const size_t *lengths,
	const lev_wchar *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t i; /* usually iterates over strings (n) */
	size_t j; /* usually iterates over characters */
	size_t len; /* usually iterates over the approximate median string */
	lev_wchar symlist; / list of symbols present in the strings,
	we iterate over it insead of set of all
	existing symbols */
	size_t symlistlen; /* length of symlistle */
	size_t maxlen; /* maximum input string length */
	size_t stoplen; /* maximum tried median string length -- this is slightly
	higher than maxlen, because the median string may be
	longer than any of the input strings */
	size_t *rows; / Levenshtein matrix rows for each string, we need to keep
	only one previous row to construct the current one */
	size_t row; / a scratch buffer for new Levenshtein matrix row computation,
	shared among all strings */
	lev_wchar median; / the resulting approximate median string */
	double mediandist; / the total distance of the best median string of
	given length. warning! mediandist[0] is total
	distance for empty string, while median[] itself
	is normally zero-based */
	size_t bestlen; /* the best approximate median string length */

	/* find all symbols */
	symlist = make_usymlist(n, lengths, strings, &symlistlen);
	if (!symlist) {
	*medlength = 0;
	if (symlistlen != 0)
	return NULL;
	else
	return calloc(1, sizeof(lev_wchar));
	}

	/* allocate and initialize per-string matrix rows and a common work buffer */
	rows = (size_t*)safe_malloc(n, sizeof(size_t));
	if (!rows) {
	free(symlist);
	return NULL;
	}
	maxlen = 0;
	for (i = 0; i < n; i++) {
	size_t *ri;
	size_t leni = lengths[i];
	if (leni > maxlen)
	maxlen = leni;
	ri = rows[i] = (size_t*)safe_malloc((leni + 1), sizeof(size_t));
	if (!ri) {
	for (j = 0; j < i; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}
	for (j = 0; j <= leni; j++)
	ri[j] = j;
	}
	stoplen = 2*maxlen + 1;
	row = (size_t*)safe_malloc((stoplen + 1), sizeof(size_t));
	if (!row) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}

	/* compute final cost of string of length 0 (empty string may be also
	* a valid answer) */
	median = (lev_wchar*)safe_malloc(stoplen, sizeof(lev_wchar));
	if (!median) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(row);
	free(symlist);
	return NULL;
	}
	mediandist = (double*)safe_malloc((stoplen + 1), sizeof(double));
	if (!mediandist) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(row);
	free(symlist);
	free(median);
	return NULL;
	}
	mediandist[0] = 0.0;
	for (i = 0; i < n; i++)
	mediandist[0] += lengths[i]*weights[i];

	/* build up the approximate median string symbol by symbol
	* XXX: we actually exit on break below, but on the same condition */
	for (len = 1; len <= stoplen; len++) {
	lev_wchar symbol;
	double minminsum = LEV_INFINITY;
	row[0] = len;
	/* iterate over all symbols we may want to add */
	for (j = 0; j < symlistlen; j++) {
	double totaldist = 0.0;
	double minsum = 0.0;
	symbol = symlist[j];
	/* sum Levenshtein distances from all the strings, with given weights */
	for (i = 0; i < n; i++) {
	const lev_wchar *stri = strings[i];
	size_t *p = rows[i];
	size_t leni = lengths[i];
	size_t *end = rows[i] + leni;
	size_t min = len;
	size_t x = len; /* == row[0] */
	/* compute how another row of Levenshtein matrix would look for median
	* string with this symbol added */
	while (p < end) {
	size_t D = (p++) + (symbol != (stri++));
	x++;
	if (x > D)
	x = D;
	if (x > *p + 1)
	x = *p + 1;
	if (x < min)
	min = x;
	}
	minsum += min*weights[i];
	totaldist += x*weights[i];
	}
	/* is this symbol better than all the others? */
	if (minsum < minminsum) {
	minminsum = minsum;
	mediandist[len] = totaldist;
	median[len - 1] = symbol;
	}
	}
	/* stop the iteration if we no longer need to recompute the matrix rows
	* or when we are over maxlen and adding more characters doesn't seem
	* useful */
	if (len == stoplen
	\|\| (len > maxlen && mediandist[len] > mediandist[len - 1])) {
	stoplen = len;
	break;
	}
	/* now the best symbol is known, so recompute all matrix rows for this
	* one */
	symbol = median[len - 1];
	for (i = 0; i < n; i++) {
	const lev_wchar *stri = strings[i];
	size_t *oldrow = rows[i];
	size_t leni = lengths[i];
	size_t k;
	/* compute a row of Levenshtein matrix */
	for (k = 1; k <= leni; k++) {
	size_t c1 = oldrow[k] + 1;
	size_t c2 = row[k - 1] + 1;
	size_t c3 = oldrow[k - 1] + (symbol != stri[k - 1]);
	row[k] = c2 > c3 ? c3 : c2;
	if (row[k] > c1)
	row[k] = c1;
	}
	memcpy(oldrow, row, (leni + 1)*sizeof(size_t));
	}
	}

	/* find the string with minimum total distance */
	bestlen = 0;
	for (len = 1; len <= stoplen; len++) {
	if (mediandist[len] < mediandist[bestlen])
	bestlen = len;
	}

	/* clean up */
	for (i = 0; i < n; i++)
	free(rows[i]);
	free(rows);
	free(row);
	free(symlist);
	free(mediandist);

	/* return result */
	{
	lev_wchar result = (lev_wchar)safe_malloc(bestlen, sizeof(lev_wchar));
	if (!result) {
	free(median);
	return NULL;
	}
	memcpy(result, median, bestlen*sizeof(lev_wchar));
	free(median);
	*medlength = bestlen;
	return result;
	}
	}

	/*
	* Knowing the distance matrices up to some row, finish the distance
	* computations.
	*
	* string1, len1 are already shortened.
	*/
	static double
	finish_udistance_computations(size_t len1, lev_wchar *string1,
	size_t n, const size_t *lengths,
	const lev_wchar **strings,
	const double weights, size_t *rows,
	size_t *row)
	{
	size_t *end;
	size_t i, j;
	size_t offset; /* row[0]; offset + len1 give together real len of string1 */
	double distsum = 0.0; /* sum of distances */

	/* catch trivia case */
	if (len1 == 0) {
	for (j = 0; j < n; j++)
	distsum += rows[j][lengths[j]]*weights[j];
	return distsum;
	}

	/* iterate through the strings and sum the distances */
	for (j = 0; j < n; j++) {
	size_t rowi = rows[j]; / current row */
	size_t leni = lengths[j]; /* current length */
	size_t len = len1; /* temporary len1 for suffix stripping */
	const lev_wchar stringi = strings[j]; / current string */

	/* strip common suffix (prefix CAN'T be stripped) */
	while (len && leni && stringi[leni-1] == string1[len-1]) {
	len--;
	leni--;
	}

	/* catch trivial cases */
	if (len == 0) {
	distsum += rowi[leni]*weights[j];
	continue;
	}
	offset = rowi[0];
	if (leni == 0) {
	distsum += (offset + len)*weights[j];
	continue;
	}

	/* complete the matrix */
	memcpy(row, rowi, (leni + 1)*sizeof(size_t));
	end = row + leni;

	for (i = 1; i <= len; i++) {
	size_t *p = row + 1;
	const lev_wchar char1 = string1[i - 1];
	const lev_wchar *char2p = stringi;
	size_t D, x;

	D = x = i + offset;
	while (p <= end) {
	size_t c3 = --D + (char1 != *(char2p++));
	x++;
	if (x > c3)
	x = c3;
	D = *p;
	D++;
	if (x > D)
	x = D;
	*(p++) = x;
	}
	}
	distsum += weights[j](end);
	}

	return distsum;
	}

	/**
	* lev_u_median_improve:
	* @len: The length of @s.
	* @s: The approximate generalized median string to be improved.
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	* @medlength: Where the new length of the median should be stored.
	*
	* Tries to make @s a better generalized median string of @strings with
	* small perturbations.
	*
	* It never returns a string with larger SOD than @s; in the worst case, a
	* string identical to @s is returned.
	*
	* Returns: The improved generalized median, as a newly allocated string; its
	* length is stored in @medlength.
	**/
	_LEV_STATIC_PY lev_wchar*
	lev_u_median_improve(size_t len, const lev_wchar *s,
	size_t n, const size_t *lengths,
	const lev_wchar *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t i; /* usually iterates over strings (n) */
	size_t j; /* usually iterates over characters */
	size_t pos; /* the position in the approximate median string we are
	trying to change */
	lev_wchar symlist; / list of symbols present in the strings,
	we iterate over it insead of set of all
	existing symbols */
	size_t symlistlen; /* length of symlist */
	size_t maxlen; /* maximum input string length */
	size_t stoplen; /* maximum tried median string length -- this is slightly
	higher than maxlen, because the median string may be
	longer than any of the input strings */
	size_t *rows; / Levenshtein matrix rows for each string, we need to keep
	only one previous row to construct the current one */
	size_t row; / a scratch buffer for new Levenshtein matrix row computation,
	shared among all strings */
	lev_wchar median; / the resulting approximate median string */
	size_t medlen; /* the current approximate median string length */
	double minminsum; /* the current total distance sum */

	/* find all symbols */
	symlist = make_usymlist(n, lengths, strings, &symlistlen);
	if (!symlist) {
	*medlength = 0;
	if (symlistlen != 0)
	return NULL;
	else
	return calloc(1, sizeof(lev_wchar));
	}

	/* allocate and initialize per-string matrix rows and a common work buffer */
	rows = (size_t*)safe_malloc(n, sizeof(size_t));
	if (!rows) {
	free(symlist);
	return NULL;
	}
	maxlen = 0;
	for (i = 0; i < n; i++) {
	size_t *ri;
	size_t leni = lengths[i];
	if (leni > maxlen)
	maxlen = leni;
	ri = rows[i] = (size_t*)safe_malloc((leni + 1), sizeof(size_t));
	if (!ri) {
	for (j = 0; j < i; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}
	for (j = 0; j <= leni; j++)
	ri[j] = j;
	}
	stoplen = 2*maxlen + 1;
	row = (size_t*)safe_malloc((stoplen + 2), sizeof(size_t));
	if (!row) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(symlist);
	return NULL;
	}

	/* initialize median to given string */
	median = (lev_wchar*)safe_malloc((stoplen+1), sizeof(lev_wchar));
	if (!median) {
	for (j = 0; j < n; j++)
	free(rows[j]);
	free(rows);
	free(row);
	free(symlist);
	return NULL;
	}
	median++; /* we need -1st element for insertions a pos 0 */
	medlen = len;
	memcpy(median, s, (medlen)*sizeof(lev_wchar));
	minminsum = finish_udistance_computations(medlen, median,
	n, lengths, strings,
	weights, rows, row);

	/* sequentially try perturbations on all positions */
	for (pos = 0; pos <= medlen; ) {
	lev_wchar orig_symbol, symbol;
	LevEditType operation;
	double sum;

	symbol = median[pos];
	operation = LEV_EDIT_KEEP;
	/* IF pos < medlength: FOREACH symbol: try to replace the symbol
	* at pos, if some lower the total distance, chooste the best */
	if (pos < medlen) {
	orig_symbol = median[pos];
	for (j = 0; j < symlistlen; j++) {
	if (symlist[j] == orig_symbol)
	continue;
	median[pos] = symlist[j];
	sum = finish_udistance_computations(medlen - pos, median + pos,
	n, lengths, strings,
	weights, rows, row);
	if (sum < minminsum) {
	minminsum = sum;
	symbol = symlist[j];
	operation = LEV_EDIT_REPLACE;
	}
	}
	median[pos] = orig_symbol;
	}
	/* FOREACH symbol: try to add it at pos, if some lower the total
	* distance, chooste the best (increase medlength)
	* We simulate insertion by replacing the character at pos-1 */
	orig_symbol = *(median + pos - 1);
	for (j = 0; j < symlistlen; j++) {
	*(median + pos - 1) = symlist[j];
	sum = finish_udistance_computations(medlen - pos + 1, median + pos - 1,
	n, lengths, strings,
	weights, rows, row);
	if (sum < minminsum) {
	minminsum = sum;
	symbol = symlist[j];
	operation = LEV_EDIT_INSERT;
	}
	}
	*(median + pos - 1) = orig_symbol;
	/* IF pos < medlength: try to delete the symbol at pos, if it lowers
	* the total distance remember it (decrease medlength) */
	if (pos < medlen) {
	sum = finish_udistance_computations(medlen - pos - 1, median + pos + 1,
	n, lengths, strings,
	weights, rows, row);
	if (sum < minminsum) {
	minminsum = sum;
	operation = LEV_EDIT_DELETE;
	}
	}
	/* actually perform the operation */
	switch (operation) {
	case LEV_EDIT_REPLACE:
	median[pos] = symbol;
	break;

	case LEV_EDIT_INSERT:
	memmove(median+pos+1, median+pos,
	(medlen - pos)*sizeof(lev_wchar));
	median[pos] = symbol;
	medlen++;
	break;

	case LEV_EDIT_DELETE:
	memmove(median+pos, median + pos+1,
	(medlen - pos-1)*sizeof(lev_wchar));
	medlen--;
	break;

	default:
	break;
	}
	assert(medlen <= stoplen);
	/* now the result is known, so recompute all matrix rows and move on */
	if (operation != LEV_EDIT_DELETE) {
	symbol = median[pos];
	row[0] = pos + 1;
	for (i = 0; i < n; i++) {
	const lev_wchar *stri = strings[i];
	size_t *oldrow = rows[i];
	size_t leni = lengths[i];
	size_t k;
	/* compute a row of Levenshtein matrix */
	for (k = 1; k <= leni; k++) {
	size_t c1 = oldrow[k] + 1;
	size_t c2 = row[k - 1] + 1;
	size_t c3 = oldrow[k - 1] + (symbol != stri[k - 1]);
	row[k] = c2 > c3 ? c3 : c2;
	if (row[k] > c1)
	row[k] = c1;
	}
	memcpy(oldrow, row, (leni + 1)*sizeof(size_t));
	}
	pos++;
	}
	}

	/* clean up */
	for (i = 0; i < n; i++)
	free(rows[i]);
	free(rows);
	free(row);
	free(symlist);

	/* return result */
	{
	lev_wchar result = (lev_wchar)safe_malloc(medlen, sizeof(lev_wchar));
	if (!result) {
	free(median);
	return NULL;
	}
	*medlength = medlen;
	memcpy(result, median, medlen*sizeof(lev_wchar));
	median--;
	free(median);
	return result;
	}
	}
	/* }}} */

	/****************************************************************************
	*
	* Quick (voting) medians
	*
	****************************************************************************/
	/* {{{ */

	/* compute the sets of symbols each string contains, and the set of symbols
	* in any of them (symset). meanwhile, count how many different symbols
	* there are (used below for symlist).
	* the symset is passed as an argument to avoid its allocation and
	* deallocation when it's used in the caller too */
	static lev_byte*
	make_symlistset(size_t n, const size_t *lengths,
	const lev_byte strings[], size_t symlistlen,
	double *symset)
	{
	size_t i, j;
	lev_byte *symlist;

	if (!symset) {
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	memset(symset, 0, 0x100sizeof(double)); / XXX: needs IEEE doubles?! */
	*symlistlen = 0;
	for (i = 0; i < n; i++) {
	const lev_byte *stri = strings[i];
	for (j = 0; j < lengths[i]; j++) {
	int c = stri[j];
	if (!symset[c]) {
	(*symlistlen)++;
	symset[c] = 1.0;
	}
	}
	}
	if (!*symlistlen)
	return NULL;

	/* create dense symbol table, so we can easily iterate over only characters
	* present in the strings */
	{
	size_t pos = 0;
	symlist = (lev_byte)safe_malloc((symlistlen), sizeof(lev_byte));
	if (!symlist) {
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	for (j = 0; j < 0x100; j++) {
	if (symset[j])
	symlist[pos++] = (lev_byte)j;
	}
	}

	return symlist;
	}

	_LEV_STATIC_PY lev_byte*
	lev_quick_median(size_t n,
	const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t symlistlen, len, i, j, k;
	lev_byte *symlist;
	lev_byte median; / the resulting string */
	double *symset;
	double ml, wl;

	/* first check whether the result would be an empty string
	* and compute resulting string length */
	ml = wl = 0.0;
	for (i = 0; i < n; i++) {
	ml += lengths[i]*weights[i];
	wl += weights[i];
	}
	if (wl == 0.0)
	return (lev_byte*)calloc(1, sizeof(lev_byte));
	ml = floor(ml/wl + 0.499999);
	*medlength = len = ml;
	if (!len)
	return (lev_byte*)calloc(1, sizeof(lev_byte));
	median = (lev_byte*)safe_malloc(len, sizeof(lev_byte));
	if (!median)
	return NULL;

	/* find the symbol set;
	* now an empty symbol set is really a failure */
	symset = (double*)calloc(0x100, sizeof(double));
	if (!symset) {
	free(median);
	return NULL;
	}
	symlist = make_symlistset(n, lengths, strings, &symlistlen, symset);
	if (!symlist) {
	free(median);
	free(symset);
	return NULL;
	}

	for (j = 0; j < len; j++) {
	/* clear the symbol probabilities */
	if (symlistlen < 32) {
	for (i = 0; i < symlistlen; i++)
	symset[symlist[i]] = 0.0;
	}
	else
	memset(symset, 0, 0x100*sizeof(double));

	/* let all strings vote */
	for (i = 0; i < n; i++) {
	const lev_byte *stri = strings[i];
	double weighti = weights[i];
	size_t lengthi = lengths[i];
	double start = lengthi/ml*j;
	double end = start + lengthi/ml;
	size_t istart = floor(start);
	size_t iend = ceil(end);

	/* rounding errors can overflow the buffer */
	if (iend > lengthi)
	iend = lengthi;

	for (k = istart+1; k < iend; k++)
	symset[stri[k]] += weighti;
	symset[stri[istart]] += weighti*(1+istart - start);
	symset[stri[iend-1]] -= weighti*(iend - end);
	}

	/* find the elected symbol */
	k = symlist[0];
	for (i = 1; i < symlistlen; i++) {
	if (symset[symlist[i]] > symset[k])
	k = symlist[i];
	}
	median[j] = k;
	}

	free(symset);
	free(symlist);

	return median;
	}

	/* used internally in make_usymlistset */
	typedef struct _HQItem HQItem;
	struct _HQItem {
	lev_wchar c;
	double s;
	HQItem *n;
	};

	/* free usmylistset hash
	* this is a separate function because we need it in more than one place */
	static void
	free_usymlistset_hash(HQItem *symmap)
	{
	size_t j;

	for (j = 0; j < 0x100; j++) {
	HQItem *p = symmap + j;
	if (p->n == symmap \|\| p->n == NULL)
	continue;
	p = p->n;
	while (p) {
	HQItem *q = p;
	p = p->n;
	free(q);
	}
	}
	free(symmap);
	}

	/* compute the sets of symbols each string contains, and the set of symbols
	* in any of them (symset). meanwhile, count how many different symbols
	* there are (used below for symlist).
	* the symset is passed as an argument to avoid its allocation and
	* deallocation when it's used in the caller too */
	static lev_wchar*
	make_usymlistset(size_t n, const size_t *lengths,
	const lev_wchar strings[], size_t symlistlen,
	HQItem *symmap)
	{
	lev_wchar *symlist;
	size_t i, j;

	j = 0;
	for (i = 0; i < n; i++)
	j += lengths[i];

	*symlistlen = 0;
	if (j == 0)
	return NULL;

	/* this is an ugly memory allocation avoiding hack: most hash elements
	* will probably contain none or one symbols only so, when p->n is equal
	* to symmap, it means there're no symbols yet, afters insterting the
	* first one, p->n becomes normally NULL and then it behaves like an
	* usual singly linked list */
	for (i = 0; i < 0x100; i++)
	symmap[i].n = symmap;
	for (i = 0; i < n; i++) {
	const lev_wchar *stri = strings[i];
	for (j = 0; j < lengths[i]; j++) {
	int c = stri[j];
	int key = (c + (c >> 7)) & 0xff;
	HQItem *p = symmap + key;
	if (p->n == symmap) {
	p->c = c;
	p->n = NULL;
	(*symlistlen)++;
	continue;
	}
	while (p->c != c && p->n != NULL)
	p = p->n;
	if (p->c != c) {
	p->n = (HQItem*)malloc(sizeof(HQItem));
	if (!p->n) {
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	p = p->n;
	p->n = NULL;
	p->c = c;
	(*symlistlen)++;
	}
	}
	}
	/* create dense symbol table, so we can easily iterate over only characters
	* present in the strings */
	{
	size_t pos = 0;
	symlist = (lev_wchar)safe_malloc((symlistlen), sizeof(lev_wchar));
	if (!symlist) {
	*symlistlen = (size_t)(-1);
	return NULL;
	}
	for (j = 0; j < 0x100; j++) {
	HQItem *p = symmap + j;
	while (p != NULL && p->n != symmap) {
	symlist[pos++] = p->c;
	p = p->n;
	}
	}
	}

	return symlist;
	}

	_LEV_STATIC_PY lev_wchar*
	lev_u_quick_median(size_t n,
	const size_t *lengths,
	const lev_wchar *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t symlistlen, len, i, j, k;
	lev_wchar *symlist;
	lev_wchar median; / the resulting string */
	HQItem *symmap;
	double ml, wl;

	/* first check whether the result would be an empty string
	* and compute resulting string length */
	ml = wl = 0.0;
	for (i = 0; i < n; i++) {
	ml += lengths[i]*weights[i];
	wl += weights[i];
	}
	if (wl == 0.0)
	return (lev_wchar*)calloc(1, sizeof(lev_wchar));
	ml = floor(ml/wl + 0.499999);
	*medlength = len = ml;
	if (!len)
	return (lev_wchar*)calloc(1, sizeof(lev_wchar));
	median = (lev_wchar*)safe_malloc(len, sizeof(lev_wchar));
	if (!median)
	return NULL;

	/* find the symbol set;
	* now an empty symbol set is really a failure */
	symmap = (HQItem*)safe_malloc(0x100, sizeof(HQItem));
	if (!symmap) {
	free(median);
	return NULL;
	}
	symlist = make_usymlistset(n, lengths, strings, &symlistlen, symmap);
	if (!symlist) {
	free(median);
	free_usymlistset_hash(symmap);
	return NULL;
	}

	for (j = 0; j < len; j++) {
	/* clear the symbol probabilities */
	for (i = 0; i < 0x100; i++) {
	HQItem *p = symmap + i;
	if (p->n == symmap)
	continue;
	while (p) {
	p->s = 0.0;
	p = p->n;
	}
	}

	/* let all strings vote */
	for (i = 0; i < n; i++) {
	const lev_wchar *stri = strings[i];
	double weighti = weights[i];
	size_t lengthi = lengths[i];
	double start = lengthi/ml*j;
	double end = start + lengthi/ml;
	size_t istart = floor(start);
	size_t iend = ceil(end);

	/* rounding errors can overflow the buffer */
	if (iend > lengthi)
	iend = lengthi;

	/* the inner part, including the complete last character */
	for (k = istart+1; k < iend; k++) {
	int c = stri[k];
	int key = (c + (c >> 7)) & 0xff;
	HQItem *p = symmap + key;
	while (p->c != c)
	p = p->n;
	p->s += weighti;
	}
	/* the initial fraction */
	{
	int c = stri[istart];
	int key = (c + (c >> 7)) & 0xff;
	HQItem *p = symmap + key;
	while (p->c != c)
	p = p->n;
	p->s += weighti*(1+istart - start);
	}
	/* subtract what we counted from the last character but doesn't
	* actually belong here.
	* this strategy works also when istart+1 == iend (i.e., everything
	* happens inside a one character) */
	{
	int c = stri[iend-1];
	int key = (c + (c >> 7)) & 0xff;
	HQItem *p = symmap + key;
	while (p->c != c)
	p = p->n;
	p->s -= weighti*(iend - end);
	}
	}

	/* find the elected symbol */
	{
	HQItem *max = NULL;

	for (i = 0; i < 0x100; i++) {
	HQItem *p = symmap + i;
	if (p->n == symmap)
	continue;
	while (p) {
	if (!max \|\| p->s > max->s)
	max = p;
	p = p->n;
	}
	}
	median[j] = max->c;
	}
	}

	free_usymlistset_hash(symmap);
	free(symlist);

	return median;
	}
	/* }}} */

	/****************************************************************************
	*
	* Set medians
	*
	****************************************************************************/
	/* {{{ */

	/**
	* lev_set_median_index:
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	*
	* Finds the median string of a string set @strings.
	*
	* Returns: An index in @strings pointing to the set median, -1 in case of
	* failure.
	**/
	_LEV_STATIC_PY size_t
	lev_set_median_index(size_t n, const size_t *lengths,
	const lev_byte *strings[],
	const double *weights)
	{
	size_t minidx = 0;
	double mindist = LEV_INFINITY;
	size_t i;
	long int *distances;

	distances = (long int)safe_malloc((n(n - 1)/2), sizeof(long int));
	if (!distances)
	return (size_t)-1;

	memset(distances, 0xff, (n(n - 1)/2)sizeof(long int)); /* XXX */
	for (i = 0; i < n; i++) {
	size_t j = 0;
	double dist = 0.0;
	const lev_byte *stri = strings[i];
	size_t leni = lengths[i];
	/* below diagonal */
	while (j < i && dist < mindist) {
	size_t dindex = (i - 1)*(i - 2)/2 + j;
	long int d;
	if (distances[dindex] >= 0)
	d = distances[dindex];
	else {
	d = lev_edit_distance(lengths[j], strings[j], leni, stri, 0);
	if (d < 0) {
	free(distances);
	return (size_t)-1;
	}
	}
	dist += weights[j]*d;
	j++;
	}
	j++; /* no need to compare item with itself */
	/* above diagonal */
	while (j < n && dist < mindist) {
	size_t dindex = (j - 1)*(j - 2)/2 + i;
	distances[dindex] = lev_edit_distance(lengths[j], strings[j],
	leni, stri, 0);
	if (distances[dindex] < 0) {
	free(distances);
	return (size_t)-1;
	}
	dist += weights[j]*distances[dindex];
	j++;
	}

	if (dist < mindist) {
	mindist = dist;
	minidx = i;
	}
	}

	free(distances);
	return minidx;
	}

	/**
	* lev_u_set_median_index:
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	*
	* Finds the median string of a string set @strings.
	*
	* Returns: An index in @strings pointing to the set median, -1 in case of
	* failure.
	**/
	_LEV_STATIC_PY size_t
	lev_u_set_median_index(size_t n, const size_t *lengths,
	const lev_wchar *strings[],
	const double *weights)
	{
	size_t minidx = 0;
	double mindist = LEV_INFINITY;
	size_t i;
	long int *distances;

	distances = (long int)safe_malloc((n(n - 1)/2), sizeof(long int));
	if (!distances)
	return (size_t)-1;

	memset(distances, 0xff, (n(n - 1)/2)sizeof(long int)); /* XXX */
	for (i = 0; i < n; i++) {
	size_t j = 0;
	double dist = 0.0;
	const lev_wchar *stri = strings[i];
	size_t leni = lengths[i];
	/* below diagonal */
	while (j < i && dist < mindist) {
	size_t dindex = (i - 1)*(i - 2)/2 + j;
	long int d;
	if (distances[dindex] >= 0)
	d = distances[dindex];
	else {
	d = lev_u_edit_distance(lengths[j], strings[j], leni, stri, 0);
	if (d < 0) {
	free(distances);
	return (size_t)-1;
	}
	}
	dist += weights[j]*d;
	j++;
	}
	j++; /* no need to compare item with itself */
	/* above diagonal */
	while (j < n && dist < mindist) {
	size_t dindex = (j - 1)*(j - 2)/2 + i;
	distances[dindex] = lev_u_edit_distance(lengths[j], strings[j],
	leni, stri, 0);
	if (distances[dindex] < 0) {
	free(distances);
	return (size_t)-1;
	}
	dist += weights[j]*distances[dindex];
	j++;
	}

	if (dist < mindist) {
	mindist = dist;
	minidx = i;
	}
	}

	free(distances);
	return minidx;
	}

	/**
	* lev_set_median:
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	* @medlength: Where the length of the median string should be stored.
	*
	* Finds the median string of a string set @strings.
	*
	* Returns: The set median as a newly allocate string, its length is stored
	* in @medlength. %NULL in the case of failure.
	**/
	_LEV_STATIC_PY lev_byte*
	lev_set_median(size_t n, const size_t *lengths,
	const lev_byte *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t minidx = lev_set_median_index(n, lengths, strings, weights);
	lev_byte *result;

	if (minidx == (size_t)-1)
	return NULL;

	*medlength = lengths[minidx];
	if (!lengths[minidx])
	return (lev_byte*)calloc(1, sizeof(lev_byte));

	result = (lev_byte*)safe_malloc(lengths[minidx], sizeof(lev_byte));
	if (!result)
	return NULL;
	return memcpy(result, strings[minidx], lengths[minidx]*sizeof(lev_byte));
	}

	/**
	* lev_u_set_median:
	* @n: The size of @lengths, @strings, and @weights.
	* @lengths: The lengths of @strings.
	* @strings: An array of strings, that may contain NUL characters.
	* @weights: The string weights (they behave exactly as multiplicities, though
	* any positive value is allowed, not just integers).
	* @medlength: Where the length of the median string should be stored.
	*
	* Finds the median string of a string set @strings.
	*
	* Returns: The set median as a newly allocate string, its length is stored
	* in @medlength. %NULL in the case of failure.
	**/
	_LEV_STATIC_PY lev_wchar*
	lev_u_set_median(size_t n, const size_t *lengths,
	const lev_wchar *strings[],
	const double *weights,
	size_t *medlength)
	{
	size_t minidx = lev_u_set_median_index(n, lengths, strings, weights);
	lev_wchar *result;

	if (minidx == (size_t)-1)
	return NULL;

	*medlength = lengths[minidx];
	if (!lengths[minidx])
	return (lev_wchar*)calloc(1, sizeof(lev_wchar));

	result = (lev_wchar*)safe_malloc(lengths[minidx], sizeof(lev_wchar));
	if (!result)
	return NULL;
	return memcpy(result, strings[minidx], lengths[minidx]*sizeof(lev_wchar));
	}
	/* }}} */

	/****************************************************************************
	*
	* Set, sequence distances
	*
	****************************************************************************/
	/* {{{ */

	/**
	* lev_edit_seq_distance:
	* @n1: The length of @lengths1 and @strings1.
	* @lengths1: The lengths of strings in @strings1.
	* @strings1: An array of strings that may contain NUL characters.
	* @n2: The length of @lengths2 and @strings2.
	* @lengths2: The lengths of strings in @strings2.
	* @strings2: An array of strings that may contain NUL characters.
	*
	* Finds the distance between string sequences @strings1 and @strings2.
	*
	* In other words, this is a double-Levenshtein algorithm.
	*
	* The cost of string replace operation is based on string similarity: it's
	* zero for identical strings and 2 for completely unsimilar strings.
	*
	* Returns: The distance of the two sequences.
	**/
	_LEV_STATIC_PY double
	lev_edit_seq_distance(size_t n1, const size_t *lengths1,
	const lev_byte *strings1[],
	size_t n2, const size_t *lengths2,
	const lev_byte *strings2[])
	{
	size_t i;
	double row; / we only need to keep one row of costs */
	double *end;

	/* strip common prefix */
	while (n1 > 0 && n2 > 0
	&& lengths1 == lengths2
	&& memcmp(strings1, strings2,
	lengths1sizeof(lev_byte)) == 0) {
	n1--;
	n2--;
	strings1++;
	strings2++;
	lengths1++;
	lengths2++;
	}

	/* strip common suffix */
	while (n1 > 0 && n2 > 0
	&& lengths1[n1-1] == lengths2[n2-1]
	&& memcmp(strings1[n1-1], strings2[n2-1],
	lengths1[n1-1]*sizeof(lev_byte)) == 0) {
	n1--;
	n2--;
	}

	/* catch trivial cases */
	if (n1 == 0)
	return n2;
	if (n2 == 0)
	return n1;

	/* make the inner cycle (i.e. strings2) the longer one */
	if (n1 > n2) {
	size_t nx = n1;
	const size_t *lx = lengths1;
	const lev_byte **sx = strings1;
	n1 = n2;
	n2 = nx;
	lengths1 = lengths2;
	lengths2 = lx;
	strings1 = strings2;
	strings2 = sx;
	}
	n1++;
	n2++;

	/* initalize first row */
	row = (double*)safe_malloc(n2, sizeof(double));
	if (!row)
	return -1.0;
	end = row + n2 - 1;
	for (i = 0; i < n2; i++)
	row[i] = (double)i;

	/* go through the matrix and compute the costs. yes, this is an extremely
	* obfuscated version, but also extremely memory-conservative and relatively
	* fast. */
	for (i = 1; i < n1; i++) {
	double *p = row + 1;
	const lev_byte *str1 = strings1[i - 1];
	const size_t len1 = lengths1[i - 1];
	const lev_byte **str2p = strings2;
	const size_t *len2p = lengths2;
	double D = i - 1.0;
	double x = i;
	while (p <= end) {
	size_t l = len1 + *len2p;
	double q;
	if (l == 0)
	q = D;
	else {
	size_t d = lev_edit_distance(len1, str1, (len2p++), (str2p++), 1);
	if (d == (size_t)(-1)) {
	free(row);
	return -1.0;
	}
	q = D + 2.0/l*d;
	}
	x += 1.0;
	if (x > q)
	x = q;
	D = *p;
	if (x > D + 1.0)
	x = D + 1.0;
	*(p++) = x;
	}
	}

	{
	double q = *end;
	free(row);
	return q;
	}
	}

	/**
	* lev_u_edit_seq_distance:
	* @n1: The length of @lengths1 and @strings1.
	* @lengths1: The lengths of strings in @strings1.
	* @strings1: An array of strings that may contain NUL characters.
	* @n2: The length of @lengths2 and @strings2.
	* @lengths2: The lengths of strings in @strings2.
	* @strings2: An array of strings that may contain NUL characters.
	*
	* Finds the distance between string sequences @strings1 and @strings2.
	*
	* In other words, this is a double-Levenshtein algorithm.
	*
	* The cost of string replace operation is based on string similarity: it's
	* zero for identical strings and 2 for completely unsimilar strings.
	*
	* Returns: The distance of the two sequences.
	**/
	_LEV_STATIC_PY double
	lev_u_edit_seq_distance(size_t n1, const size_t *lengths1,
	const lev_wchar *strings1[],
	size_t n2, const size_t *lengths2,
	const lev_wchar *strings2[])
	{
	size_t i;
	double row; / we only need to keep one row of costs */
	double *end;

	/* strip common prefix */
	while (n1 > 0 && n2 > 0
	&& lengths1 == lengths2
	&& memcmp(strings1, strings2,
	lengths1sizeof(lev_wchar)) == 0) {
	n1--;
	n2--;
	strings1++;
	strings2++;
	lengths1++;
	lengths2++;
	}

	/* strip common suffix */
	while (n1 > 0 && n2 > 0
	&& lengths1[n1-1] == lengths2[n2-1]
	&& memcmp(strings1[n1-1], strings2[n2-1],
	lengths1[n1-1]*sizeof(lev_wchar)) == 0) {
	n1--;
	n2--;
	}

	/* catch trivial cases */
	if (n1 == 0)
	return (double)n2;
	if (n2 == 0)
	return (double)n1;

	/* make the inner cycle (i.e. strings2) the longer one */
	if (n1 > n2) {
	size_t nx = n1;
	const size_t *lx = lengths1;
	const lev_wchar **sx = strings1;
	n1 = n2;
	n2 = nx;
	lengths1 = lengths2;
	lengths2 = lx;
	strings1 = strings2;
	strings2 = sx;
	}
	n1++;
	n2++;

	/* initalize first row */
	row = (double*)safe_malloc(n2, sizeof(double));
	if (!row)
	return -1.0;
	end = row + n2 - 1;
	for (i = 0; i < n2; i++)
	row[i] = (double)i;

	/* go through the matrix and compute the costs. yes, this is an extremely
	* obfuscated version, but also extremely memory-conservative and relatively
	* fast. */
	for (i = 1; i < n1; i++) {
	double *p = row + 1;
	const lev_wchar *str1 = strings1[i - 1];
	const size_t len1 = lengths1[i - 1];
	const lev_wchar **str2p = strings2;
	const size_t *len2p = lengths2;
	double D = i - 1.0;
	double x = i;
	while (p <= end) {
	size_t l = len1 + *len2p;
	double q;
	if (l == 0)
	q = D;
	else {
	size_t d = lev_u_edit_distance(len1, str1, (len2p++), (str2p++), 1);
	if (d == (size_t)(-1)) {
	free(row);
	return -1.0;
	}
	q = D + 2.0/l*d;
	}
	x += 1.0;
	if (x > q)
	x = q;
	D = *p;
	if (x > D + 1.0)
	x = D + 1.0;
	*(p++) = x;
	}
	}

	{
	double q = *end;
	free(row);
	return q;
	}
	}

	/**
	* lev_set_distance:
	* @n1: The length of @lengths1 and @strings1.
	* @lengths1: The lengths of strings in @strings1.
	* @strings1: An array of strings that may contain NUL characters.
	* @n2: The length of @lengths2 and @strings2.
	* @lengths2: The lengths of strings in @strings2.
	* @strings2: An array of strings that may contain NUL characters.
	*
	* Finds the distance between string sets @strings1 and @strings2.
	*
	* The difference from lev_edit_seq_distance() is that order doesn't matter.
	* The optimal association of @strings1 and @strings2 is found first and
	* the similarity is computed for that.
	*
	* Uses sequential Munkers-Blackman algorithm.
	*
	* Returns: The distance of the two sets.
	**/
	_LEV_STATIC_PY double
	lev_set_distance(size_t n1, const size_t *lengths1,
	const lev_byte *strings1[],
	size_t n2, const size_t *lengths2,
	const lev_byte *strings2[])
	{
	double dists; / the (modified) distance matrix, indexed [rown1 + col] /
	double *r;
	size_t i, j;
	size_t *map;
	double sum;

	/* catch trivial cases */
	if (n1 == 0)
	return (double)n2;
	if (n2 == 0)
	return (double)n1;

	/* make the number of columns (n1) smaller than the number of rows */
	if (n1 > n2) {
	size_t nx = n1;
	const size_t *lx = lengths1;
	const lev_byte **sx = strings1;
	n1 = n2;
	n2 = nx;
	lengths1 = lengths2;
	lengths2 = lx;
	strings1 = strings2;
	strings2 = sx;
	}

	/* compute distances from each to each */
	r = dists = (double*)safe_malloc_3(n1, n2, sizeof(double));
	if (!r)
	return -1.0;
	for (i = 0; i < n2; i++) {
	size_t len2 = lengths2[i];
	const lev_byte *str2 = strings2[i];
	const size_t *len1p = lengths1;
	const lev_byte **str1p = strings1;
	for (j = 0; j < n1; j++) {
	size_t l = len2 + *len1p;
	if (l == 0)
	*(r++) = 0.0;
	else {
	size_t d = lev_edit_distance(len2, str2, (len1p++), (str1p)++, 1);
	if (d == (size_t)(-1)) {
	free(r);
	return -1.0;
	}
	*(r++) = (double)d/l;
	}
	}
	}

	/* find the optimal mapping between the two sets */
	map = munkers_blackman(n1, n2, dists);
	if (!map)
	return -1.0;

	/* sum the set distance */
	sum = n2 - n1;
	for (j = 0; j < n1; j++) {
	size_t l;
	i = map[j];
	l = lengths1[j] + lengths2[i];
	if (l > 0) {
	size_t d = lev_edit_distance(lengths1[j], strings1[j],
	lengths2[i], strings2[i], 1);
	if (d == (size_t)(-1)) {
	free(map);
	return -1.0;
	}
	sum += 2.0*d/l;
	}
	}
	free(map);

	return sum;
	}

	/**
	* lev_u_set_distance:
	* @n1: The length of @lengths1 and @strings1.
	* @lengths1: The lengths of strings in @strings1.
	* @strings1: An array of strings that may contain NUL characters.
	* @n2: The length of @lengths2 and @strings2.
	* @lengths2: The lengths of strings in @strings2.
	* @strings2: An array of strings that may contain NUL characters.
	*
	* Finds the distance between string sets @strings1 and @strings2.
	*
	* The difference from lev_u_edit_seq_distance() is that order doesn't matter.
	* The optimal association of @strings1 and @strings2 is found first and
	* the similarity is computed for that.
	*
	* Uses sequential Munkers-Blackman algorithm.
	*
	* Returns: The distance of the two sets.
	**/
	_LEV_STATIC_PY double
	lev_u_set_distance(size_t n1, const size_t *lengths1,
	const lev_wchar *strings1[],
	size_t n2, const size_t *lengths2,
	const lev_wchar *strings2[])
	{
	double dists; / the (modified) distance matrix, indexed [rown1 + col] /
	double *r;
	size_t i, j;
	size_t *map;
	double sum;

	/* catch trivial cases */
	if (n1 == 0)
	return (double)n2;
	if (n2 == 0)
	return (double)n1;

	/* make the number of columns (n1) smaller than the number of rows */
	if (n1 > n2) {
	size_t nx = n1;
	const size_t *lx = lengths1;
	const lev_wchar **sx = strings1;
	n1 = n2;
	n2 = nx;
	lengths1 = lengths2;
	lengths2 = lx;
	strings1 = strings2;
	strings2 = sx;
	}

	/* compute distances from each to each */
	r = dists = (double*)safe_malloc_3(n1, n2, sizeof(double));
	if (!r)
	return -1.0;
	for (i = 0; i < n2; i++) {
	size_t len2 = lengths2[i];
	const lev_wchar *str2 = strings2[i];
	const size_t *len1p = lengths1;
	const lev_wchar **str1p = strings1;
	for (j = 0; j < n1; j++) {
	size_t l = len2 + *len1p;
	if (l == 0)
	*(r++) = 0.0;
	else {
	size_t d = lev_u_edit_distance(len2, str2, (len1p++), (str1p)++, 1);
	if (d == (size_t)(-1)) {
	free(r);
	return -1.0;
	}
	*(r++) = (double)d/l;
	}
	}
	}

	/* find the optimal mapping between the two sets */
	map = munkers_blackman(n1, n2, dists);
	if (!map)
	return -1.0;

	/* sum the set distance */
	sum = n2 - n1;
	for (j = 0; j < n1; j++) {
	size_t l;
	i = map[j];
	l = lengths1[j] + lengths2[i];
	if (l > 0) {
	size_t d = lev_u_edit_distance(lengths1[j], strings1[j],
	lengths2[i], strings2[i], 1);
	if (d == (size_t)(-1)) {
	free(map);
	return -1.0;
	}
	sum += 2.0*d/l;
	}
	}
	free(map);

	return sum;
	}

	/*
	* Munkers-Blackman algorithm.
	*/
	static size_t*
	munkers_blackman(size_t n1, size_t n2, double *dists)
	{
	size_t i, j;
	size_t covc, covr; /* 1 if column/row is covered */
	/* these contain 1-based indices, so we can use zero as `none'
	* zstarr: column of a z* in given row
	* zstarc: row of a z* in given column
	* zprimer: column of a z' in given row */
	size_t zstarr, zstarc, *zprimer;

	/* allocate memory */
	covc = calloc(n1, sizeof(size_t));
	if (!covc)
	return NULL;
	zstarc = calloc(n1, sizeof(size_t));
	if (!zstarc) {
	free(covc);
	return NULL;
	}
	covr = calloc(n2, sizeof(size_t));
	if (!covr) {
	free(zstarc);
	free(covc);
	return NULL;
	}
	zstarr = calloc(n2, sizeof(size_t));
	if (!zstarr) {
	free(covr);
	free(zstarc);
	free(covc);
	return NULL;
	}
	zprimer = calloc(n2, sizeof(size_t));
	if (!zprimer) {
	free(zstarr);
	free(covr);
	free(zstarc);
	free(covc);
	return NULL;
	}

	/* step 0 (subtract minimal distance) and step 1 (find zeroes) */
	for (j = 0; j < n1; j++) {
	size_t minidx = 0;
	double *col = dists + j;
	double min = *col;
	double *p = col + n1;
	for (i = 1; i < n2; i++) {
	if (min > *p) {
	minidx = i;
	min = *p;
	}
	p += n1;
	}
	/* subtract */
	p = col;
	for (i = 0; i < n2; i++) {
	*p -= min;
	if (*p < LEV_EPSILON)
	*p = 0.0;
	p += n1;
	}
	/* star the zero, if possible */
	if (!zstarc[j] && !zstarr[minidx]) {
	zstarc[j] = minidx + 1;
	zstarr[minidx] = j + 1;
	}
	else {
	/* otherwise try to find some other */
	p = col;
	for (i = 0; i < n2; i++) {
	if (i != minidx && *p == 0.0
	&& !zstarc[j] && !zstarr[i]) {
	zstarc[j] = i + 1;
	zstarr[i] = j + 1;
	break;
	}
	p += n1;
	}
	}
	}

	/* main */
	while (1) {
	/* step 2 (cover columns containing z) /
	{
	size_t nc = 0;
	for (j = 0; j < n1; j++) {
	if (zstarc[j]) {
	covc[j] = 1;
	nc++;
	}
	}
	if (nc == n1)
	break;
	}

	/* step 3 (find uncovered zeroes) */
	while (1) {
	step_3:
	/* search uncovered matrix entries */
	for (j = 0; j < n1; j++) {
	double *p = dists + j;
	if (covc[j])
	continue;
	for (i = 0; i < n2; i++) {
	if (!covr[i] && *p == 0.0) {
	/* when a zero is found, prime it */
	zprimer[i] = j + 1;
	if (zstarr[i]) {
	/* if there's a z* in the same row,
	* uncover the column, cover the row and redo */
	covr[i] = 1;
	covc[zstarr[i] - 1] = 0;
	goto step_3;
	}
	/* if there's no z*,
	* we are at the end of our path an can convert z'
	* to z* */
	goto step_4;
	}
	p += n1;
	}
	}

	/* step 5 (new zero manufacturer)
	* we can't get here, unless no zero is found at all */
	{
	/* find the smallest uncovered entry */
	double min = LEV_INFINITY;
	for (j = 0; j < n1; j++) {
	double *p = dists + j;
	if (covc[j])
	continue;
	for (i = 0; i < n2; i++) {
	if (!covr[i] && min > *p) {
	min = *p;
	}
	p += n1;
	}
	}
	/* add it to all covered rows */
	for (i = 0; i < n2; i++) {
	double p = dists + in1;
	if (!covr[i])
	continue;
	for (j = 0; j < n1; j++)
	*(p++) += min;
	}
	/* subtract if from all uncovered columns */
	for (j = 0; j < n1; j++) {
	double *p = dists + j;
	if (covc[j])
	continue;
	for (i = 0; i < n2; i++) {
	*p -= min;
	if (*p < LEV_EPSILON)
	*p = 0.0;
	p += n1;
	}
	}
	}
	}

	/* step 4 (increment the number of z*)
	* i is the row number (we get it from step 3) */
	step_4:
	i++;
	do {
	size_t x = i;

	i--;
	j = zprimer[i] - 1; /* move to z' in the same row */
	zstarr[i] = j + 1; /* mark it as z* in row buffer */
	i = zstarc[j]; /* move to z* in the same column */
	zstarc[j] = x; /* mark the z' as being new z* */
	} while (i);
	memset(zprimer, 0, n2*sizeof(size_t));
	memset(covr, 0, n2*sizeof(size_t));
	memset(covc, 0, n1*sizeof(size_t));
	}

	free(dists);
	free(covc);
	free(covr);
	/free(zstarc); this is the result /
	free(zstarr);
	free(zprimer);

	for (j = 0; j < n1; j++)
	zstarc[j]--;
	return zstarc;
	}
	/* }}} */

	/****************************************************************************
	*
	* Editops and other difflib-like stuff.
	*
	****************************************************************************/
	/* {{{ */

	/**
	* lev_editops_check_errors:
	* @len1: The length of an eventual @ops source string.
	* @len2: The length of an eventual @ops destination string.
	* @n: The length of @ops.
	* @ops: An array of elementary edit operations.
	*
	* Checks whether @ops is consistent and applicable as a partial edit from a
	* string of length @len1 to a string of length @len2.
	*
	* Returns: Zero if @ops seems OK, a nonzero error code otherwise.
	**/
	_LEV_STATIC_PY int
	lev_editops_check_errors(size_t len1, size_t len2,
	size_t n, const LevEditOp *ops)
	{
	const LevEditOp *o;
	size_t i;

	if (!n)
	return LEV_EDIT_ERR_OK;

	/* check bounds */
	o = ops;
	for (i = n; i; i--, o++) {
	if (o->type >= LEV_EDIT_LAST)
	return LEV_EDIT_ERR_TYPE;
	if (o->spos > len1 \|\| o->dpos > len2)
	return LEV_EDIT_ERR_OUT;
	if (o->spos == len1 && o->type != LEV_EDIT_INSERT)
	return LEV_EDIT_ERR_OUT;
	if (o->dpos == len2 && o->type != LEV_EDIT_DELETE)
	return LEV_EDIT_ERR_OUT;
	}

	/* check ordering */
	o = ops + 1;
	for (i = n - 1; i; i--, o++, ops++) {
	if (o->spos < ops->spos \|\| o->dpos < ops->dpos)
	return LEV_EDIT_ERR_ORDER;
	}

	return LEV_EDIT_ERR_OK;
	}

	/**
	* lev_opcodes_check_errors:
	* @len1: The length of an eventual @ops source string.
	* @len2: The length of an eventual @ops destination string.
	* @nb: The length of @bops.
	* @bops: An array of difflib block edit operation codes.
	*
	* Checks whether @bops is consistent and applicable as an edit from a
	* string of length @len1 to a string of length @len2.
	*
	* Returns: Zero if @bops seems OK, a nonzero error code otherwise.
	**/
	_LEV_STATIC_PY int
	lev_opcodes_check_errors(size_t len1, size_t len2,
	size_t nb, const LevOpCode *bops)
	{
	const LevOpCode *b;
	size_t i;

	if (!nb)
	return 1;

	/* check completenes */
	if (bops->sbeg \|\| bops->dbeg
	\|\| bops[nb - 1].send != len1 \|\| bops[nb - 1].dend != len2)
	return LEV_EDIT_ERR_SPAN;

	/* check bounds and block consistency */
	b = bops;
	for (i = nb; i; i--, b++) {
	if (b->send > len1 \|\| b->dend > len2)
	return LEV_EDIT_ERR_OUT;
	switch (b->type) {
	case LEV_EDIT_KEEP:
	case LEV_EDIT_REPLACE:
	if (b->dend - b->dbeg != b->send - b->sbeg \|\| b->dend == b->dbeg)
	return LEV_EDIT_ERR_BLOCK;
	break;

	case LEV_EDIT_INSERT:
	if (b->dend - b->dbeg == 0 \|\| b->send - b->sbeg != 0)
	return LEV_EDIT_ERR_BLOCK;
	break;

	case LEV_EDIT_DELETE:
	if (b->send - b->sbeg == 0 \|\| b->dend - b->dbeg != 0)
	return LEV_EDIT_ERR_BLOCK;
	break;

	default:
	return LEV_EDIT_ERR_TYPE;
	break;
	}
	}

	/* check ordering */
	b = bops + 1;
	for (i = nb - 1; i; i--, b++, bops++) {
	if (b->sbeg != bops->send \|\| b->dbeg != bops->dend)
	return LEV_EDIT_ERR_ORDER;
	}

	return LEV_EDIT_ERR_OK;
	}

	/**
	* lev_editops_invert:
	* @n: The length of @ops.
	* @ops: An array of elementary edit operations.
	*
	* Inverts the sense of @ops. It is modified in place.
	*
	* In other words, @ops becomes a valid partial edit for the original source
	* and destination strings with their roles exchanged.
	**/
	_LEV_STATIC_PY void
	lev_editops_invert(size_t n, LevEditOp *ops)
	{
	size_t i;

	for (i = n; i; i--, ops++) {
	size_t z;

	z = ops->dpos;
	ops->dpos = ops->spos;
	ops->spos = z;
	if (ops->type & 2)
	ops->type ^= 1;
	}
	}

	/**
	* lev_editops_apply:
	* @len1: The length of @string1.
	* @string1: A string of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A string of length @len2, may contain NUL characters.
	* @n: The size of @ops.
	* @ops: An array of elementary edit operations.
	* @len: Where the size of the resulting string should be stored.
	*
	* Applies a partial edit @ops from @string1 to @string2.
	*
	* NB: @ops is not checked for applicability.
	*
	* Returns: The result of the partial edit as a newly allocated string, its
	* length is stored in @len.
	**/
	_LEV_STATIC_PY lev_byte*
	lev_editops_apply(size_t len1, const lev_byte *string1,
	__attribute__((unused)) size_t len2, const lev_byte *string2,
	size_t n, const LevEditOp *ops,
	size_t *len)
	{
	lev_byte dst, dpos; /* destination string */
	const lev_byte spos; / source string position */
	size_t i, j;

	/* this looks too complex for such a simple task, but note ops is not
	* a complete edit sequence, we have to be able to apply anything anywhere */
	dpos = dst = (lev_byte*)safe_malloc((n + len1), sizeof(lev_byte));
	if (!dst) {
	*len = (size_t)(-1);
	return NULL;
	}
	spos = string1;
	for (i = n; i; i--, ops++) {
	/* XXX: this fine with gcc internal memcpy, but when memcpy is
	* actually a function, it may be pretty slow */
	j = ops->spos - (spos - string1) + (ops->type == LEV_EDIT_KEEP);
	if (j) {
	memcpy(dpos, spos, j*sizeof(lev_byte));
	spos += j;
	dpos += j;
	}
	switch (ops->type) {
	case LEV_EDIT_DELETE:
	spos++;
	break;

	case LEV_EDIT_REPLACE:
	spos++;
	case LEV_EDIT_INSERT:
	*(dpos++) = string2[ops->dpos];
	break;

	default:
	break;
	}
	}
	j = len1 - (spos - string1);
	if (j) {
	memcpy(dpos, spos, j*sizeof(lev_byte));
	spos += j;
	dpos += j;
	}

	*len = dpos - dst;
	/* possible realloc failure is detected with len != 0 /
	return realloc(dst, lensizeof(lev_byte));
	}

	/**
	* lev_u_editops_apply:
	* @len1: The length of @string1.
	* @string1: A string of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A string of length @len2, may contain NUL characters.
	* @n: The size of @ops.
	* @ops: An array of elementary edit operations.
	* @len: Where the size of the resulting string should be stored.
	*
	* Applies a partial edit @ops from @string1 to @string2.
	*
	* NB: @ops is not checked for applicability.
	*
	* Returns: The result of the partial edit as a newly allocated string, its
	* length is stored in @len.
	**/
	_LEV_STATIC_PY lev_wchar*
	lev_u_editops_apply(size_t len1, const lev_wchar *string1,
	__attribute__((unused)) size_t len2,
	const lev_wchar *string2,
	size_t n, const LevEditOp *ops,
	size_t *len)
	{
	lev_wchar dst, dpos; /* destination string */
	const lev_wchar spos; / source string position */
	size_t i, j;

	/* this looks too complex for such a simple task, but note ops is not
	* a complete edit sequence, we have to be able to apply anything anywhere */
	dpos = dst = (lev_wchar*)safe_malloc((n + len1), sizeof(lev_wchar));
	if (!dst) {
	*len = (size_t)(-1);
	return NULL;
	}
	spos = string1;
	for (i = n; i; i--, ops++) {
	/* XXX: this is fine with gcc internal memcpy, but when memcpy is
	* actually a function, it may be pretty slow */
	j = ops->spos - (spos - string1) + (ops->type == LEV_EDIT_KEEP);
	if (j) {
	memcpy(dpos, spos, j*sizeof(lev_wchar));
	spos += j;
	dpos += j;
	}
	switch (ops->type) {
	case LEV_EDIT_DELETE:
	spos++;
	break;

	case LEV_EDIT_REPLACE:
	spos++;
	case LEV_EDIT_INSERT:
	*(dpos++) = string2[ops->dpos];
	break;

	default:
	break;
	}
	}
	j = len1 - (spos - string1);
	if (j) {
	memcpy(dpos, spos, j*sizeof(lev_wchar));
	spos += j;
	dpos += j;
	}

	*len = dpos - dst;
	/* possible realloc failure is detected with len != 0 /
	return realloc(dst, lensizeof(lev_wchar));
	}

	/**
	* editops_from_cost_matrix:
	* @len1: The length of @string1.
	* @string1: A string of length @len1, may contain NUL characters.
	* @o1: The offset where the matrix starts from the start of @string1.
	* @len2: The length of @string2.
	* @string2: A string of length @len2, may contain NUL characters.
	* @o2: The offset where the matrix starts from the start of @string2.
	* @matrix: The cost matrix.
	* @n: Where the number of edit operations should be stored.
	*
	* Reconstructs the optimal edit sequence from the cost matrix @matrix.
	*
	* The matrix is freed.
	*
	* Returns: The optimal edit sequence, as a newly allocated array of
	* elementary edit operations, it length is stored in @n.
	**/
	static LevEditOp*
	editops_from_cost_matrix(size_t len1, const lev_byte *string1, size_t off1,
	size_t len2, const lev_byte *string2, size_t off2,
	size_t matrix, size_t n)
	{
	size_t *p;
	size_t i, j, pos;
	LevEditOp *ops;
	int dir = 0;

	pos = n = matrix[len1len2 - 1];
	if (!*n) {
	free(matrix);
	return NULL;
	}
	ops = (LevEditOp)safe_malloc((n), sizeof(LevEditOp));
	if (!ops) {
	free(matrix);
	*n = (size_t)(-1);
	return NULL;
	}
	i = len1 - 1;
	j = len2 - 1;
	p = matrix + len1*len2 - 1;
	while (i \|\| j) {
	/* prefer contiuning in the same direction */
	if (dir < 0 && j && p == (p - 1) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_INSERT;
	ops[pos].spos = i + off1;
	ops[pos].dpos = --j + off2;
	p--;
	continue;
	}
	if (dir > 0 && i && p == (p - len2) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_DELETE;
	ops[pos].spos = --i + off1;
	ops[pos].dpos = j + off2;
	p -= len2;
	continue;
	}
	if (i && j && p == (p - len2 - 1)
	&& string1[i - 1] == string2[j - 1]) {
	/* don't be stupid like difflib, don't store LEV_EDIT_KEEP */
	i--;
	j--;
	p -= len2 + 1;
	dir = 0;
	continue;
	}
	if (i && j && p == (p - len2 - 1) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_REPLACE;
	ops[pos].spos = --i + off1;
	ops[pos].dpos = --j + off2;
	p -= len2 + 1;
	dir = 0;
	continue;
	}
	/* we cant't turn directly from -1 to 1, in this case it would be better
	* to go diagonally, but check it (dir == 0) */
	if (dir == 0 && j && p == (p - 1) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_INSERT;
	ops[pos].spos = i + off1;
	ops[pos].dpos = --j + off2;
	p--;
	dir = -1;
	continue;
	}
	if (dir == 0 && i && p == (p - len2) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_DELETE;
	ops[pos].spos = --i + off1;
	ops[pos].dpos = j + off2;
	p -= len2;
	dir = 1;
	continue;
	}
	/* coredump right now, later might be too late ;-) */
	assert("lost in the cost matrix" == NULL);
	}
	free(matrix);

	return ops;
	}

	/**
	* lev_editops_find:
	* @len1: The length of @string1.
	* @string1: A string of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A string of length @len2, may contain NUL characters.
	* @n: Where the number of edit operations should be stored.
	*
	* Find an optimal edit sequence from @string1 to @string2.
	*
	* When there's more than one optimal sequence, a one is arbitrarily (though
	* deterministically) chosen.
	*
	* Returns: The optimal edit sequence, as a newly allocated array of
	* elementary edit operations, it length is stored in @n.
	* It is normalized, i.e., keep operations are not included.
	**/
	_LEV_STATIC_PY LevEditOp*
	lev_editops_find(size_t len1, const lev_byte *string1,
	size_t len2, const lev_byte *string2,
	size_t *n)
	{
	size_t len1o, len2o;
	size_t i;
	size_t matrix; / cost matrix */

	/* strip common prefix */
	len1o = 0;
	while (len1 > 0 && len2 > 0 && string1 == string2) {
	len1--;
	len2--;
	string1++;
	string2++;
	len1o++;
	}
	len2o = len1o;

	/* strip common suffix */
	while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
	len1--;
	len2--;
	}
	len1++;
	len2++;

	/* initialize first row and column */
	matrix = (size_t*)safe_malloc_3(len1, len2, sizeof(size_t));
	if (!matrix) {
	*n = (size_t)(-1);
	return NULL;
	}
	for (i = 0; i < len2; i++)
	matrix[i] = i;
	for (i = 1; i < len1; i++)
	matrix[len2*i] = i;

	/* find the costs and fill the matrix */
	for (i = 1; i < len1; i++) {
	size_t prev = matrix + (i - 1)len2;
	size_t p = matrix + ilen2;
	size_t *end = p + len2 - 1;
	const lev_byte char1 = string1[i - 1];
	const lev_byte *char2p = string2;
	size_t x = i;
	p++;
	while (p <= end) {
	size_t c3 = (prev++) + (char1 != (char2p++));
	x++;
	if (x > c3)
	x = c3;
	c3 = *prev + 1;
	if (x > c3)
	x = c3;
	*(p++) = x;
	}
	}

	/* find the way back */
	return editops_from_cost_matrix(len1, string1, len1o,
	len2, string2, len2o,
	matrix, n);
	}

	/**
	* ueditops_from_cost_matrix:
	* @len1: The length of @string1.
	* @string1: A string of length @len1, may contain NUL characters.
	* @o1: The offset where the matrix starts from the start of @string1.
	* @len2: The length of @string2.
	* @string2: A string of length @len2, may contain NUL characters.
	* @o2: The offset where the matrix starts from the start of @string2.
	* @matrix: The cost matrix.
	* @n: Where the number of edit operations should be stored.
	*
	* Reconstructs the optimal edit sequence from the cost matrix @matrix.
	*
	* The matrix is freed.
	*
	* Returns: The optimal edit sequence, as a newly allocated array of
	* elementary edit operations, it length is stored in @n.
	**/
	static LevEditOp*
	ueditops_from_cost_matrix(size_t len1, const lev_wchar *string1, size_t o1,
	size_t len2, const lev_wchar *string2, size_t o2,
	size_t matrix, size_t n)
	{
	size_t *p;
	size_t i, j, pos;
	LevEditOp *ops;
	int dir = 0;

	pos = n = matrix[len1len2 - 1];
	if (!*n) {
	free(matrix);
	return NULL;
	}
	ops = (LevEditOp)safe_malloc((n), sizeof(LevEditOp));
	if (!ops) {
	free(matrix);
	*n = (size_t)(-1);
	return NULL;
	}
	i = len1 - 1;
	j = len2 - 1;
	p = matrix + len1*len2 - 1;
	while (i \|\| j) {
	/* prefer contiuning in the same direction */
	if (dir < 0 && j && p == (p - 1) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_INSERT;
	ops[pos].spos = i + o1;
	ops[pos].dpos = --j + o2;
	p--;
	continue;
	}
	if (dir > 0 && i && p == (p - len2) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_DELETE;
	ops[pos].spos = --i + o1;
	ops[pos].dpos = j + o2;
	p -= len2;
	continue;
	}
	if (i && j && p == (p - len2 - 1)
	&& string1[i - 1] == string2[j - 1]) {
	/* don't be stupid like difflib, don't store LEV_EDIT_KEEP */
	i--;
	j--;
	p -= len2 + 1;
	dir = 0;
	continue;
	}
	if (i && j && p == (p - len2 - 1) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_REPLACE;
	ops[pos].spos = --i + o1;
	ops[pos].dpos = --j + o2;
	p -= len2 + 1;
	dir = 0;
	continue;
	}
	/* we cant't turn directly from -1 to 1, in this case it would be better
	* to go diagonally, but check it (dir == 0) */
	if (dir == 0 && j && p == (p - 1) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_INSERT;
	ops[pos].spos = i + o1;
	ops[pos].dpos = --j + o2;
	p--;
	dir = -1;
	continue;
	}
	if (dir == 0 && i && p == (p - len2) + 1) {
	pos--;
	ops[pos].type = LEV_EDIT_DELETE;
	ops[pos].spos = --i + o1;
	ops[pos].dpos = j + o2;
	p -= len2;
	dir = 1;
	continue;
	}
	/* coredump right now, later might be too late ;-) */
	assert("lost in the cost matrix" == NULL);
	}
	free(matrix);

	return ops;
	}

	/**
	* lev_u_editops_find:
	* @len1: The length of @string1.
	* @string1: A string of length @len1, may contain NUL characters.
	* @len2: The length of @string2.
	* @string2: A string of length @len2, may contain NUL characters.
	* @n: Where the number of edit operations should be stored.
	*
	* Find an optimal edit sequence from @string1 to @string2.
	*
	* When there's more than one optimal sequence, a one is arbitrarily (though
	* deterministically) chosen.
	*
	* Returns: The optimal edit sequence, as a newly allocated array of
	* elementary edit operations, it length is stored in @n.
	* It is normalized, i.e., keep operations are not included.
	**/
	_LEV_STATIC_PY LevEditOp*
	lev_u_editops_find(size_t len1, const lev_wchar *string1,
	size_t len2, const lev_wchar *string2,
	size_t *n)
	{
	size_t len1o, len2o;
	size_t i;
	size_t matrix; / cost matrix */

	/* strip common prefix */
	len1o = 0;
	while (len1 > 0 && len2 > 0 && string1 == string2) {
	len1--;
	len2--;
	string1++;
	string2++;
	len1o++;
	}
	len2o = len1o;

	/* strip common suffix */
	while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
	len1--;
	len2--;
	}
	len1++;
	len2++;

	/* initalize first row and column */
	matrix = (size_t*)safe_malloc_3(len1, len2, sizeof(size_t));
	if (!matrix) {
	*n = (size_t)(-1);
	return NULL;
	}
	for (i = 0; i < len2; i++)
	matrix[i] = i;
	for (i = 1; i < len1; i++)
	matrix[len2*i] = i;

	/* find the costs and fill the matrix */
	for (i = 1; i < len1; i++) {
	size_t prev = matrix + (i - 1)len2;
	size_t p = matrix + ilen2;
	size_t *end = p + len2 - 1;
	const lev_wchar char1 = string1[i - 1];
	const lev_wchar *char2p = string2;
	size_t x = i;
	p++;
	while (p <= end) {
	size_t c3 = (prev++) + (char1 != (char2p++));
	x++;
	if (x > c3)
	x = c3;
	c3 = *prev + 1;
	if (x > c3)
	x = c3;
	*(p++) = x;
	}
	}

	/* find the way back */
	return ueditops_from_cost_matrix(len1, string1, len1o,
	len2, string2, len2o,
	matrix, n);
	}

	/**
	* lev_opcodes_to_editops:
	* @nb: The length of @bops.
	* @bops: An array of difflib block edit operation codes.
	* @n: Where the number of edit operations should be stored.
	* @keepkeep: If nonzero, keep operations will be included. Otherwise the
	* result will be normalized, i.e. without any keep operations.
	*
	* Converts difflib block operation codes to elementary edit operations.
	*
	* Returns: The converted edit operatiosn, as a newly allocated array; its
	* size is stored in @n.
	**/
	_LEV_STATIC_PY LevEditOp*
	lev_opcodes_to_editops(size_t nb, const LevOpCode *bops,
	size_t *n, int keepkeep)
	{
	size_t i;
	const LevOpCode *b;
	LevEditOp ops, o;

	/* compute the number of atomic operations */
	*n = 0;
	if (!nb)
	return NULL;

	b = bops;
	if (keepkeep) {
	for (i = nb; i; i--, b++) {
	size_t sd = b->send - b->sbeg;
	size_t dd = b->dend - b->dbeg;
	*n += (sd > dd ? sd : dd);
	}
	}
	else {
	for (i = nb; i; i--, b++) {
	size_t sd = b->send - b->sbeg;
	size_t dd = b->dend - b->dbeg;
	*n += (b->type != LEV_EDIT_KEEP ? (sd > dd ? sd : dd) : 0);
	}
	}

	/* convert */
	o = ops = (LevEditOp)safe_malloc((n), sizeof(LevEditOp));
	if (!ops) {
	*n = (size_t)(-1);
	return NULL;
	}
	b = bops;
	for (i = nb; i; i--, b++) {
	size_t j;

	switch (b->type) {
	case LEV_EDIT_KEEP:
	if (keepkeep) {
	for (j = 0; j < b->send - b->sbeg; j++, o++) {
	o->type = LEV_EDIT_KEEP;
	o->spos = b->sbeg + j;
	o->dpos = b->dbeg + j;
	}
	}
	break;

	case LEV_EDIT_REPLACE:
	for (j = 0; j < b->send - b->sbeg; j++, o++) {
	o->type = LEV_EDIT_REPLACE;
	o->spos = b->sbeg + j;
	o->dpos = b->dbeg + j;
	}
	break;

	case LEV_EDIT_DELETE:
	for (j = 0; j < b->send - b->sbeg; j++, o++) {
	o->type = LEV_EDIT_DELETE;
	o->spos = b->sbeg + j;
	o->dpos = b->dbeg;
	}
	break;

	case LEV_EDIT_INSERT:
	for (j = 0; j < b->dend - b->dbeg; j++, o++) {
	o->type = LEV_EDIT_INSERT;
	o->spos = b->sbeg;
	o->dpos = b->dbeg + j;
	}
	break;

	default:
	break;
	}
	}
	assert((size_t)(o - ops) == *n);

	return ops;
	}

	/**
	* lev_editops_to_opcodes:
	* @n: The size of @ops.
	* @ops: An array of elementary edit operations.
	* @nb: Where the number of difflib block operation codes should be stored.
	* @len1: The length of the source string.
	* @len2: The length of the destination string.
	*
	* Converts elementary edit operations to difflib block operation codes.
	*
	* Note the string lengths are necessary since difflib doesn't allow omitting
	* keep operations.
	*
	* Returns: The converted block operation codes, as a newly allocated array;
	* its length is stored in @nb.
	**/
	_LEV_STATIC_PY LevOpCode*
	lev_editops_to_opcodes(size_t n, const LevEditOp ops, size_t nb,
	size_t len1, size_t len2)
	{
	size_t nbl, i, spos, dpos;
	const LevEditOp *o;
	LevOpCode bops, b;
	LevEditType type;

	/* compute the number of blocks */
	nbl = 0;
	o = ops;
	spos = dpos = 0;
	type = LEV_EDIT_KEEP;
	for (i = n; i; ) {
	/* simply pretend there are no keep blocks */
	while (o->type == LEV_EDIT_KEEP && --i)
	o++;
	if (!i)
	break;
	if (spos < o->spos \|\| dpos < o->dpos) {
	nbl++;
	spos = o->spos;
	dpos = o->dpos;
	}
	nbl++;
	type = o->type;
	switch (type) {
	case LEV_EDIT_REPLACE:
	do {
	spos++;
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_DELETE:
	do {
	spos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_INSERT:
	do {
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	default:
	break;
	}
	}
	if (spos < len1 \|\| dpos < len2)
	nbl++;

	/* convert */
	b = bops = (LevOpCode*)safe_malloc(nbl, sizeof(LevOpCode));
	if (!bops) {
	*nb = (size_t)(-1);
	return NULL;
	}
	o = ops;
	spos = dpos = 0;
	type = LEV_EDIT_KEEP;
	for (i = n; i; ) {
	/* simply pretend there are no keep blocks */
	while (o->type == LEV_EDIT_KEEP && --i)
	o++;
	if (!i)
	break;
	b->sbeg = spos;
	b->dbeg = dpos;
	if (spos < o->spos \|\| dpos < o->dpos) {
	b->type = LEV_EDIT_KEEP;
	spos = b->send = o->spos;
	dpos = b->dend = o->dpos;
	b++;
	b->sbeg = spos;
	b->dbeg = dpos;
	}
	type = o->type;
	switch (type) {
	case LEV_EDIT_REPLACE:
	do {
	spos++;
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_DELETE:
	do {
	spos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_INSERT:
	do {
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	default:
	break;
	}
	b->type = type;
	b->send = spos;
	b->dend = dpos;
	b++;
	}
	if (spos < len1 \|\| dpos < len2) {
	assert(len1 - spos == len2 - dpos);
	b->type = LEV_EDIT_KEEP;
	b->sbeg = spos;
	b->dbeg = dpos;
	b->send = len1;
	b->dend = len2;
	b++;
	}
	assert((size_t)(b - bops) == nbl);

	*nb = nbl;
	return bops;
	}

	/**
	* lev_opcodes_apply:
	* @len1: The length of the source string.
	* @string1: A string of length @len1, may contain NUL characters.
	* @len2: The length of the destination string.
	* @string2: A string of length @len2, may contain NUL characters.
	* @nb: The length of @bops.
	* @bops: An array of difflib block edit operation codes.
	* @len: Where the size of the resulting string should be stored.
	*
	* Applies a sequence of difflib block operations to a string.
	*
	* NB: @bops is not checked for applicability.
	*
	* Returns: The result of the edit as a newly allocated string, its length
	* is stored in @len.
	**/
	_LEV_STATIC_PY lev_byte*
	lev_opcodes_apply(size_t len1, const lev_byte *string1,
	size_t len2, const lev_byte *string2,
	size_t nb, const LevOpCode *bops,
	size_t *len)
	{
	lev_byte dst, dpos; /* destination string */
	const lev_byte spos; / source string position */
	size_t i;

	/* this looks too complex for such a simple task, but note ops is not
	* a complete edit sequence, we have to be able to apply anything anywhere */
	dpos = dst = (lev_byte*)safe_malloc((len1 + len2), sizeof(lev_byte));
	if (!dst) {
	*len = (size_t)(-1);
	return NULL;
	}
	spos = string1;
	for (i = nb; i; i--, bops++) {
	switch (bops->type) {
	case LEV_EDIT_INSERT:
	case LEV_EDIT_REPLACE:
	memcpy(dpos, string2 + bops->dbeg,
	(bops->dend - bops->dbeg)*sizeof(lev_byte));
	break;

	case LEV_EDIT_KEEP:
	memcpy(dpos, string1 + bops->sbeg,
	(bops->send - bops->sbeg)*sizeof(lev_byte));
	break;

	default:
	break;
	}
	spos += bops->send - bops->sbeg;
	dpos += bops->dend - bops->dbeg;
	}

	*len = dpos - dst;
	/* possible realloc failure is detected with len != 0 /
	return realloc(dst, lensizeof(lev_byte));
	}

	/**
	* lev_u_opcodes_apply:
	* @len1: The length of the source string.
	* @string1: A string of length @len1, may contain NUL characters.
	* @len2: The length of the destination string.
	* @string2: A string of length @len2, may contain NUL characters.
	* @nb: The length of @bops.
	* @bops: An array of difflib block edit operation codes.
	* @len: Where the size of the resulting string should be stored.
	*
	* Applies a sequence of difflib block operations to a string.
	*
	* NB: @bops is not checked for applicability.
	*
	* Returns: The result of the edit as a newly allocated string, its length
	* is stored in @len.
	**/
	_LEV_STATIC_PY lev_wchar*
	lev_u_opcodes_apply(size_t len1, const lev_wchar *string1,
	size_t len2, const lev_wchar *string2,
	size_t nb, const LevOpCode *bops,
	size_t *len)
	{
	lev_wchar dst, dpos; /* destination string */
	const lev_wchar spos; / source string position */
	size_t i;

	/* this looks too complex for such a simple task, but note ops is not
	* a complete edit sequence, we have to be able to apply anything anywhere */
	dpos = dst = (lev_wchar*)safe_malloc((len1 + len2), sizeof(lev_wchar));
	if (!dst) {
	*len = (size_t)(-1);
	return NULL;
	}
	spos = string1;
	for (i = nb; i; i--, bops++) {
	switch (bops->type) {
	case LEV_EDIT_INSERT:
	case LEV_EDIT_REPLACE:
	memcpy(dpos, string2 + bops->dbeg,
	(bops->dend - bops->dbeg)*sizeof(lev_wchar));
	break;

	case LEV_EDIT_KEEP:
	memcpy(dpos, string1 + bops->sbeg,
	(bops->send - bops->sbeg)*sizeof(lev_wchar));
	break;

	default:
	break;
	}
	spos += bops->send - bops->sbeg;
	dpos += bops->dend - bops->dbeg;
	}

	*len = dpos - dst;
	/* possible realloc failure is detected with len != 0 /
	return realloc(dst, lensizeof(lev_wchar));
	}

	/**
	* lev_opcodes_invert:
	* @nb: The length of @ops.
	* @bops: An array of difflib block edit operation codes.
	*
	* Inverts the sense of @ops. It is modified in place.
	*
	* In other words, @ops becomes a partial edit for the original source
	* and destination strings with their roles exchanged.
	**/
	_LEV_STATIC_PY void
	lev_opcodes_invert(size_t nb, LevOpCode *bops)
	{
	size_t i;

	for (i = nb; i; i--, bops++) {
	size_t z;

	z = bops->dbeg;
	bops->dbeg = bops->sbeg;
	bops->sbeg = z;
	z = bops->dend;
	bops->dend = bops->send;
	bops->send = z;
	if (bops->type & 2)
	bops->type ^= 1;
	}
	}

	/**
	* lev_editops_matching_blocks:
	* @len1: The length of the source string.
	* @len2: The length of the destination string.
	* @n: The size of @ops.
	* @ops: An array of elementary edit operations.
	* @nmblocks: Where the number of matching block should be stored.
	*
	* Computes the matching block corresponding to an optimal edit @ops.
	*
	* Returns: The matching blocks as a newly allocated array, it length is
	* stored in @nmblocks.
	**/
	_LEV_STATIC_PY LevMatchingBlock*
	lev_editops_matching_blocks(size_t len1,
	size_t len2,
	size_t n,
	const LevEditOp *ops,
	size_t *nmblocks)
	{
	size_t nmb, i, spos, dpos;
	LevEditType type;
	const LevEditOp *o;
	LevMatchingBlock mblocks, mb;

	/* compute the number of matching blocks */
	nmb = 0;
	o = ops;
	spos = dpos = 0;
	type = LEV_EDIT_KEEP;
	for (i = n; i; ) {
	/* simply pretend there are no keep blocks */
	while (o->type == LEV_EDIT_KEEP && --i)
	o++;
	if (!i)
	break;
	if (spos < o->spos \|\| dpos < o->dpos) {
	nmb++;
	spos = o->spos;
	dpos = o->dpos;
	}
	type = o->type;
	switch (type) {
	case LEV_EDIT_REPLACE:
	do {
	spos++;
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_DELETE:
	do {
	spos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_INSERT:
	do {
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	default:
	break;
	}
	}
	if (spos < len1 \|\| dpos < len2)
	nmb++;

	/* fill the info */
	mb = mblocks = (LevMatchingBlock*)safe_malloc(nmb, sizeof(LevOpCode));
	if (!mblocks) {
	*nmblocks = (size_t)(-1);
	return NULL;
	}
	o = ops;
	spos = dpos = 0;
	type = LEV_EDIT_KEEP;
	for (i = n; i; ) {
	/* simply pretend there are no keep blocks */
	while (o->type == LEV_EDIT_KEEP && --i)
	o++;
	if (!i)
	break;
	if (spos < o->spos \|\| dpos < o->dpos) {
	mb->spos = spos;
	mb->dpos = dpos;
	mb->len = o->spos - spos;
	spos = o->spos;
	dpos = o->dpos;
	mb++;
	}
	type = o->type;
	switch (type) {
	case LEV_EDIT_REPLACE:
	do {
	spos++;
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_DELETE:
	do {
	spos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	case LEV_EDIT_INSERT:
	do {
	dpos++;
	i--;
	o++;
	} while (i && o->type == type && spos == o->spos && dpos == o->dpos);
	break;

	default:
	break;
	}
	}
	if (spos < len1 \|\| dpos < len2) {
	assert(len1 - spos == len2 - dpos);
	mb->spos = spos;
	mb->dpos = dpos;
	mb->len = len1 - spos;
	mb++;
	}
	assert((size_t)(mb - mblocks) == nmb);

	*nmblocks = nmb;
	return mblocks;
	}

	/**
	* lev_opcodes_matching_blocks:
	* @len1: The length of the source string.
	* @len2: The length of the destination string.
	* @nb: The size of @bops.
	* @bops: An array of difflib block edit operation codes.
	* @nmblocks: Where the number of matching block should be stored.
	*
	* Computes the matching block corresponding to an optimal edit @bops.
	*
	* Returns: The matching blocks as a newly allocated array, it length is
	* stored in @nmblocks.
	**/
	_LEV_STATIC_PY LevMatchingBlock*
	lev_opcodes_matching_blocks(size_t len1,
	__attribute__((unused)) size_t len2,
	size_t nb,
	const LevOpCode *bops,
	size_t *nmblocks)
	{
	size_t nmb, i;
	const LevOpCode *b;
	LevMatchingBlock mblocks, mb;

	/* compute the number of matching blocks */
	nmb = 0;
	b = bops;
	for (i = nb; i; i--, b++) {
	if (b->type == LEV_EDIT_KEEP) {
	nmb++;
	/* adjacent KEEP blocks -- we never produce it, but... */
	while (i && b->type == LEV_EDIT_KEEP) {
	i--;
	b++;
	}
	if (!i)
	break;
	}
	}

	/* convert */
	mb = mblocks = (LevMatchingBlock*)safe_malloc(nmb, sizeof(LevOpCode));
	if (!mblocks) {
	*nmblocks = (size_t)(-1);
	return NULL;
	}
	b = bops;
	for (i = nb; i; i--, b++) {
	if (b->type == LEV_EDIT_KEEP) {
	mb->spos = b->sbeg;
	mb->dpos = b->dbeg;
	/* adjacent KEEP blocks -- we never produce it, but... */
	while (i && b->type == LEV_EDIT_KEEP) {
	i--;
	b++;
	}
	if (!i) {
	mb->len = len1 - mb->spos;
	mb++;
	break;
	}
	mb->len = b->sbeg - mb->spos;
	mb++;
	}
	}
	assert((size_t)(mb - mblocks) == nmb);

	*nmblocks = nmb;
	return mblocks;
	}

	/**
	* lev_editops_total_cost:
	* @n: Tne size of @ops.
	* @ops: An array of elementary edit operations.
	*
	* Computes the total cost of operations in @ops.
	*
	* The costs of elementary operations are all 1.
	*
	* Returns: The total cost.
	**/
	_LEV_STATIC_PY size_t
	lev_editops_total_cost(size_t n,
	const LevEditOp *ops)
	{
	size_t i, sum = 0;

	for (i = n; i; i--, ops++)
	sum += !!ops->type;

	return sum;
	}

	/**
	* lev_editops_normalize:
	* @n: The size of @ops.
	* @ops: An array of elementary edit operations.
	* @nnorm: Where to store then length of the normalized array.
	*
	* Normalizes a list of edit operations to contain no keep operations.
	*
	* Note it returns %NULL for an empty array.
	*
	* Returns: A newly allocated array of normalized edit operations, its length
	* is stored to @nnorm.
	**/
	_LEV_STATIC_PY LevEditOp*
	lev_editops_normalize(size_t n,
	const LevEditOp *ops,
	size_t *nnorm)
	{
	size_t nx, i;
	const LevEditOp *o;
	LevEditOp opsnorm, on;

	if (!n \|\| !ops) {
	*nnorm = 0;
	return NULL;
	}

	nx = 0;
	o = ops;
	for (i = n; i; i--, o++)
	nx += (o->type == LEV_EDIT_KEEP);

	*nnorm = n - nx;
	if (!*nnorm)
	return NULL;

	opsnorm = on = (LevEditOp*)safe_malloc((n - nx), sizeof(LevEditOp));
	o = ops;
	for (i = n; i; i--, o++) {
	if (o->type == LEV_EDIT_KEEP)
	continue;
	memcpy(on++, o, sizeof(LevEditOp));
	}

	return opsnorm;
	}

	/**
	* lev_opcodes_total_cost:
	* @nb: Tne size of @bops.
	* @bops: An array of difflib block operation codes.
	*
	* Computes the total cost of operations in @bops.
	*
	* The costs of elementary operations are all 1.
	*
	* Returns: The total cost.
	**/
	_LEV_STATIC_PY size_t
	lev_opcodes_total_cost(size_t nb,
	const LevOpCode *bops)
	{
	size_t i, sum = 0;

	for (i = nb; i; i--, bops++) {
	switch (bops->type) {
	case LEV_EDIT_REPLACE:
	case LEV_EDIT_DELETE:
	sum += (bops->send - bops->sbeg);
	break;

	case LEV_EDIT_INSERT:
	sum += (bops->dend - bops->dbeg);
	break;

	default:
	break;
	}
	}

	return sum;
	}

	/**
	* lev_editops_subtract:
	* @n: The size of @ops.
	* @ops: An array of elementary edit operations.
	* @ns: The size of @sub.
	* @sub: A subsequence (ordered subset) of @ops
	* @nrem: Where to store then length of the remainder array.
	*
	* Subtracts a subsequence of elementary edit operations from a sequence.
	*
	* The remainder is a sequence that, applied to result of application of @sub,
	* gives the same final result as application of @ops to original string.
	*
	* Returns: A newly allocated array of normalized edit operations, its length
	* is stored to @nrem. It is always normalized, i.e, without any
	* keep operations. On failure, %NULL is returned.
	**/
	_LEV_STATIC_PY LevEditOp*
	lev_editops_subtract(size_t n,
	const LevEditOp *ops,
	size_t ns,
	const LevEditOp *sub,
	size_t *nrem)
	{
	static const int shifts[] = { 0, 0, 1, -1 };
	LevEditOp *rem;
	size_t i, j, nr, nn;
	int shift;

	/* compute remainder size */
	*nrem = -1;
	nr = nn = 0;
	for (i = 0; i < n; i++) {
	if (ops[i].type != LEV_EDIT_KEEP)
	nr++;
	}
	for (i = 0; i < ns; i++) {
	if (sub[i].type != LEV_EDIT_KEEP)
	nn++;
	}
	if (nn > nr)
	return NULL;
	nr -= nn;


	/* subtract */
	/* we could simply return NULL when nr == 0, but then it would be possible
	* to subtract any sequence of the right length to get an empty sequence
	* -- clrealy incorrectly; so we have to scan the list to check */
	rem = nr ? (LevEditOp*)safe_malloc(nr, sizeof(LevEditOp)) : NULL;
	j = nn = shift = 0;
	for (i = 0; i < ns; i++) {
	while ((ops[j].spos != sub[i].spos
	\|\| ops[j].dpos != sub[i].dpos
	\|\| ops[j].type != sub[i].type)
	&& j < n) {
	if (ops[j].type != LEV_EDIT_KEEP) {
	rem[nn] = ops[j];
	rem[nn].spos += shift;
	nn++;
	}
	j++;
	}
	if (j == n) {
	free(rem);
	return NULL;
	}

	shift += shifts[sub[i].type];
	j++;
	}

	while (j < n) {
	if (ops[j].type != LEV_EDIT_KEEP) {
	rem[nn] = ops[j];
	rem[nn].spos += shift;
	nn++;
	}
	j++;
	}
	assert(nn == nr);

	*nrem = nr;
	return rem;
	}
	/* }}} */

	/****************************************************************************
	*
	* Weighted mean strings
	*
	****************************************************************************/

	/*
	_LEV_STATIC_PY lev_byte*
	lev_weighted_average(size_t len1,
	const lev_byte* string1,
	size_t len2,
	const lev_byte* string2,
	size_t n,
	const LevEditOp *ops,
	LevAveragingType avtype,
	size_t total_cost,
	double alpha,
	size_t *len)
	{
	return NULL;
	}
	*/