File size: 1,025 Bytes
10dbbd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
const PUNCTUATION_REGEX = /\p{P}/gu;

function removeDiacritics(s: string, form: "NFD" | "NFKD" = "NFD"): string {
	return s.normalize(form).replace(/[\u0300-\u036f]/g, "");
}

export function generateSearchTokens(value: string): string[] {
	const fullTitleToken = removeDiacritics(value)
		.replace(PUNCTUATION_REGEX, "")
		.replaceAll(/\s+/g, "")
		.toLowerCase();
	return [
		...new Set([
			...removeDiacritics(value)
				.split(/\s+/)
				.map((word) => word.replace(PUNCTUATION_REGEX, "").toLowerCase())
				.filter((word) => word.length),
			...(fullTitleToken.length ? [fullTitleToken] : []),
		]),
	];
}

function escapeForRegExp(s: string): string {
	return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
}

export function generateQueryTokens(query: string): RegExp[] {
	return removeDiacritics(query)
		.split(/\s+/)
		.map((word) => word.replace(PUNCTUATION_REGEX, "").toLowerCase())
		.filter((word) => word.length)
		.map((token) => new RegExp(`^${escapeForRegExp(token)}`));
}