const PUNCTUATION_REGEX = /\p{P}/gu; | |
function removeDiacritics(s: string, form: "NFD" | "NFKD" = "NFD"): string { | |
return s.normalize(form).replace(/[\u0300-\u036f]/g, ""); | |
} | |
export function generateSearchTokens(value: string): string[] { | |
const fullTitleToken = removeDiacritics(value) | |
.replace(PUNCTUATION_REGEX, "") | |
.replaceAll(/\s+/g, "") | |
.toLowerCase(); | |
return [ | |
...new Set([ | |
...removeDiacritics(value) | |
.split(/\s+/) | |
.map((word) => word.replace(PUNCTUATION_REGEX, "").toLowerCase()) | |
.filter((word) => word.length), | |
...(fullTitleToken.length ? [fullTitleToken] : []), | |
]), | |
]; | |
} | |
function escapeForRegExp(s: string): string { | |
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string | |
} | |
export function generateQueryTokens(query: string): RegExp[] { | |
return removeDiacritics(query) | |
.split(/\s+/) | |
.map((word) => word.replace(PUNCTUATION_REGEX, "").toLowerCase()) | |
.filter((word) => word.length) | |
.map((token) => new RegExp(`^${escapeForRegExp(token)}`)); | |
} | |