nsarrazin HF staff commited on
Commit
aa07e29
1 Parent(s): 7ddda31

Revert "Revert "Update embedding model for WebSearch (#437)""

Browse files

This reverts commit 1061bc237c37ada9537197b10aeee43e08fc8f4e.

src/lib/server/websearch/sentenceSimilarity.ts CHANGED
@@ -6,16 +6,14 @@ function innerProduct(tensor1: Tensor, tensor2: Tensor) {
6
  return 1.0 - dot(tensor1.data, tensor2.data);
7
  }
8
 
9
- const extractor = await pipeline("feature-extraction", "Xenova/e5-small-v2");
10
 
11
  export async function findSimilarSentences(
12
  query: string,
13
  sentences: string[],
14
  { topK = 5 }: { topK: number }
15
  ) {
16
- // this preprocessing step is suggested for e5-small-v2 model
17
- // see more: https://huggingface.co/intfloat/e5-small-v2/blob/main/README.md?code=true#L2631
18
- const input = [`query: ${query}`, ...sentences.map((s) => `passage: ${s}`)];
19
  const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
20
 
21
  const queryTensor: Tensor = output[0];
 
6
  return 1.0 - dot(tensor1.data, tensor2.data);
7
  }
8
 
9
+ const extractor = await pipeline("feature-extraction", "Xenova/gte-small");
10
 
11
  export async function findSimilarSentences(
12
  query: string,
13
  sentences: string[],
14
  { topK = 5 }: { topK: number }
15
  ) {
16
+ const input = [query, ...sentences];
 
 
17
  const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
18
 
19
  const queryTensor: Tensor = output[0];