|
|
|
#ifndef LEVENSHTEIN_H |
|
#define LEVENSHTEIN_H |
|
|
|
#ifndef size_t |
|
# include <stdlib.h> |
|
#endif |
|
|
|
|
|
#ifndef _LEV_STATIC_PY |
|
# define _LEV_STATIC_PY |
|
#endif |
|
|
|
|
|
|
|
|
|
#ifndef lev_wchar |
|
# ifndef wchar_t |
|
# include <wchar.h> |
|
# endif |
|
# define lev_wchar wchar_t |
|
#endif |
|
typedef unsigned char lev_byte; |
|
|
|
|
|
|
|
|
|
typedef enum { |
|
LEV_EDIT_KEEP = 0, |
|
LEV_EDIT_REPLACE = 1, |
|
LEV_EDIT_INSERT = 2, |
|
LEV_EDIT_DELETE = 3, |
|
LEV_EDIT_LAST |
|
} LevEditType; |
|
|
|
|
|
typedef enum { |
|
LEV_EDIT_ERR_OK = 0, |
|
LEV_EDIT_ERR_TYPE, |
|
LEV_EDIT_ERR_OUT, |
|
LEV_EDIT_ERR_ORDER, |
|
LEV_EDIT_ERR_BLOCK, |
|
LEV_EDIT_ERR_SPAN, |
|
LEV_EDIT_ERR_LAST |
|
} LevEditOpError; |
|
|
|
|
|
typedef enum { |
|
LEV_AVG_HEAD = 0, |
|
LEV_AVG_TAIL, |
|
LEV_AVG_SPREAD, |
|
LEV_AVG_BLOCK, |
|
LEV_AVG_RANDOM, |
|
LEV_AVG_LAST |
|
} LevAveragingType; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
LevEditType type; |
|
size_t spos; |
|
size_t dpos; |
|
} LevEditOp; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
LevEditType type; |
|
size_t sbeg, send; |
|
size_t dbeg, dend; |
|
} LevOpCode; |
|
|
|
|
|
typedef struct { |
|
size_t spos; |
|
size_t dpos; |
|
size_t len; |
|
} LevMatchingBlock; |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_edit_distance(size_t len1, |
|
const lev_byte *string1, |
|
size_t len2, |
|
const lev_byte *string2, |
|
int xcost); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_u_edit_distance(size_t len1, |
|
const lev_wchar *string1, |
|
size_t len2, |
|
const lev_wchar *string2, |
|
int xcost); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_hamming_distance(size_t len, |
|
const lev_byte *string1, |
|
const lev_byte *string2); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_u_hamming_distance(size_t len, |
|
const lev_wchar *string1, |
|
const lev_wchar *string2); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_jaro_ratio(size_t len1, |
|
const lev_byte *string1, |
|
size_t len2, |
|
const lev_byte *string2); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_u_jaro_ratio(size_t len1, |
|
const lev_wchar *string1, |
|
size_t len2, |
|
const lev_wchar *string2); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_jaro_winkler_ratio(size_t len1, const lev_byte *string1, |
|
size_t len2, const lev_byte *string2, |
|
double pfweight); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_u_jaro_winkler_ratio(size_t len1, const lev_wchar *string1, |
|
size_t len2, const lev_wchar *string2, |
|
double pfweight); |
|
|
|
_LEV_STATIC_PY |
|
lev_byte* |
|
lev_greedy_median(size_t n, |
|
const size_t *lengths, |
|
const lev_byte *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
lev_wchar* |
|
lev_u_greedy_median(size_t n, |
|
const size_t *lengths, |
|
const lev_wchar *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
lev_byte* |
|
lev_median_improve(size_t len, const lev_byte *s, |
|
size_t n, const size_t *lengths, |
|
const lev_byte *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
lev_wchar* |
|
lev_u_median_improve(size_t len, const lev_wchar *s, |
|
size_t n, const size_t *lengths, |
|
const lev_wchar *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
lev_byte* |
|
lev_quick_median(size_t n, |
|
const size_t *lengths, |
|
const lev_byte *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
lev_wchar* |
|
lev_u_quick_median(size_t n, |
|
const size_t *lengths, |
|
const lev_wchar *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
lev_byte* |
|
lev_set_median(size_t n, |
|
const size_t *lengths, |
|
const lev_byte *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_set_median_index(size_t n, const size_t *lengths, |
|
const lev_byte *strings[], |
|
const double *weights); |
|
|
|
_LEV_STATIC_PY |
|
lev_wchar* |
|
lev_u_set_median(size_t n, |
|
const size_t *lengths, |
|
const lev_wchar *strings[], |
|
const double *weights, |
|
size_t *medlength); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_u_set_median_index(size_t n, const size_t *lengths, |
|
const lev_wchar *strings[], |
|
const double *weights); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_edit_seq_distance(size_t n1, |
|
const size_t *lengths1, |
|
const lev_byte *strings1[], |
|
size_t n2, |
|
const size_t *lengths2, |
|
const lev_byte *strings2[]); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_u_edit_seq_distance(size_t n1, |
|
const size_t *lengths1, |
|
const lev_wchar *strings1[], |
|
size_t n2, |
|
const size_t *lengths2, |
|
const lev_wchar *strings2[]); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_set_distance(size_t n1, |
|
const size_t *lengths1, |
|
const lev_byte *strings1[], |
|
size_t n2, |
|
const size_t *lengths2, |
|
const lev_byte *strings2[]); |
|
|
|
_LEV_STATIC_PY |
|
double |
|
lev_u_set_distance(size_t n1, |
|
const size_t *lengths1, |
|
const lev_wchar *strings1[], |
|
size_t n2, |
|
const size_t *lengths2, |
|
const lev_wchar *strings2[]); |
|
|
|
_LEV_STATIC_PY |
|
int |
|
lev_editops_check_errors(size_t len1, |
|
size_t len2, |
|
size_t n, |
|
const LevEditOp *ops); |
|
|
|
_LEV_STATIC_PY |
|
int |
|
lev_opcodes_check_errors(size_t len1, |
|
size_t len2, |
|
size_t nb, |
|
const LevOpCode *bops); |
|
|
|
_LEV_STATIC_PY |
|
void |
|
lev_editops_invert(size_t n, |
|
LevEditOp *ops); |
|
|
|
_LEV_STATIC_PY |
|
void |
|
lev_opcodes_invert(size_t nb, |
|
LevOpCode *bops); |
|
|
|
_LEV_STATIC_PY |
|
LevMatchingBlock* |
|
lev_editops_matching_blocks(size_t len1, |
|
size_t len2, |
|
size_t n, |
|
const LevEditOp *ops, |
|
size_t *nmblocks); |
|
|
|
_LEV_STATIC_PY |
|
LevMatchingBlock* |
|
lev_opcodes_matching_blocks(size_t len1, |
|
size_t len2, |
|
size_t nb, |
|
const LevOpCode *bops, |
|
size_t *nmblocks); |
|
|
|
_LEV_STATIC_PY |
|
lev_byte* |
|
lev_editops_apply(size_t len1, |
|
const lev_byte* string1, |
|
size_t len2, |
|
const lev_byte* string2, |
|
size_t n, |
|
const LevEditOp *ops, |
|
size_t *len); |
|
|
|
_LEV_STATIC_PY |
|
lev_wchar* |
|
lev_u_editops_apply(size_t len1, |
|
const lev_wchar* string1, |
|
size_t len2, |
|
const lev_wchar* string2, |
|
size_t n, |
|
const LevEditOp *ops, |
|
size_t *len); |
|
|
|
_LEV_STATIC_PY |
|
lev_byte* |
|
lev_opcodes_apply(size_t len1, |
|
const lev_byte* string1, |
|
size_t len2, |
|
const lev_byte* string2, |
|
size_t nb, |
|
const LevOpCode *bops, |
|
size_t *len); |
|
|
|
_LEV_STATIC_PY |
|
lev_wchar* |
|
lev_u_opcodes_apply(size_t len1, |
|
const lev_wchar* string1, |
|
size_t len2, |
|
const lev_wchar* string2, |
|
size_t nb, |
|
const LevOpCode *bops, |
|
size_t *len); |
|
|
|
_LEV_STATIC_PY |
|
LevEditOp* |
|
lev_editops_find(size_t len1, |
|
const lev_byte *string1, |
|
size_t len2, |
|
const lev_byte *string2, |
|
size_t *n); |
|
|
|
_LEV_STATIC_PY |
|
LevEditOp* |
|
lev_u_editops_find(size_t len1, |
|
const lev_wchar *string1, |
|
size_t len2, |
|
const lev_wchar *string2, |
|
size_t *n); |
|
|
|
_LEV_STATIC_PY |
|
LevEditOp* |
|
lev_opcodes_to_editops(size_t nb, |
|
const LevOpCode *bops, |
|
size_t *n, |
|
int keepkeep); |
|
|
|
_LEV_STATIC_PY |
|
LevOpCode* |
|
lev_editops_to_opcodes(size_t n, |
|
const LevEditOp *ops, |
|
size_t *nb, |
|
size_t len1, |
|
size_t len2); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_editops_total_cost(size_t n, |
|
const LevEditOp *ops); |
|
|
|
_LEV_STATIC_PY |
|
size_t |
|
lev_opcodes_total_cost(size_t nb, |
|
const LevOpCode *bops); |
|
|
|
_LEV_STATIC_PY |
|
LevEditOp* |
|
lev_editops_normalize(size_t n, |
|
const LevEditOp *ops, |
|
size_t *nnorm); |
|
|
|
_LEV_STATIC_PY LevEditOp* |
|
lev_editops_subtract(size_t n, |
|
const LevEditOp *ops, |
|
size_t ns, |
|
const LevEditOp *sub, |
|
size_t *nrem); |
|
|
|
|
|
_LEV_STATIC_PY |
|
void |
|
lev_init_rng(unsigned long int seed); |
|
|
|
#endif |
|
|