chat-ui / src /lib /utils /searchTokens.ts
Mishig
[Assistants] Filter on names (#841)
10dbbd6 unverified
raw
history blame
1.03 kB
const PUNCTUATION_REGEX = /\p{P}/gu;
function removeDiacritics(s: string, form: "NFD" | "NFKD" = "NFD"): string {
return s.normalize(form).replace(/[\u0300-\u036f]/g, "");
}
export function generateSearchTokens(value: string): string[] {
const fullTitleToken = removeDiacritics(value)
.replace(PUNCTUATION_REGEX, "")
.replaceAll(/\s+/g, "")
.toLowerCase();
return [
...new Set([
...removeDiacritics(value)
.split(/\s+/)
.map((word) => word.replace(PUNCTUATION_REGEX, "").toLowerCase())
.filter((word) => word.length),
...(fullTitleToken.length ? [fullTitleToken] : []),
]),
];
}
function escapeForRegExp(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
}
export function generateQueryTokens(query: string): RegExp[] {
return removeDiacritics(query)
.split(/\s+/)
.map((word) => word.replace(PUNCTUATION_REGEX, "").toLowerCase())
.filter((word) => word.length)
.map((token) => new RegExp(`^${escapeForRegExp(token)}`));
}