Spaces:
Running
Running
File size: 1,446 Bytes
3a01622 41f8b74 3a01622 d5559df 3a01622 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import { dot } from "@xenova/transformers";
import type { EmbeddingBackendModel } from "$lib/server/embeddingModels";
import type { Embedding } from "$lib/server/embeddingEndpoints/embeddingEndpoints";
// see here: https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/README.md?plain=1#L34
function innerProduct(embeddingA: Embedding, embeddingB: Embedding) {
return 1.0 - dot(embeddingA, embeddingB);
}
export async function findSimilarSentences(
embeddingModel: EmbeddingBackendModel,
query: string,
sentences: string[],
{ topK = 5 }: { topK: number }
): Promise<Embedding> {
const inputs = [
`${embeddingModel.preQuery}${query}`,
...sentences.map((sentence) => `${embeddingModel.prePassage}${sentence}`),
];
const embeddingEndpoint = await embeddingModel.getEndpoint();
const output = await embeddingEndpoint({ inputs });
const queryEmbedding: Embedding = output[0];
const sentencesEmbeddings: Embedding[] = output.slice(1, inputs.length - 1);
const distancesFromQuery: { distance: number; index: number }[] = [...sentencesEmbeddings].map(
(sentenceEmbedding: Embedding, index: number) => {
return {
distance: innerProduct(queryEmbedding, sentenceEmbedding),
index,
};
}
);
distancesFromQuery.sort((a, b) => {
return a.distance - b.distance;
});
// Return the indexes of the closest topK sentences
return distancesFromQuery.slice(0, topK).map((item) => item.index);
}
|