Spaces:
Paused
Paused
Revert "Revert "Update embedding model for WebSearch (#437)""
Browse filesThis reverts commit 1061bc237c37ada9537197b10aeee43e08fc8f4e.
src/lib/server/websearch/sentenceSimilarity.ts
CHANGED
|
@@ -6,16 +6,14 @@ function innerProduct(tensor1: Tensor, tensor2: Tensor) {
|
|
| 6 |
return 1.0 - dot(tensor1.data, tensor2.data);
|
| 7 |
}
|
| 8 |
|
| 9 |
-
const extractor = await pipeline("feature-extraction", "Xenova/
|
| 10 |
|
| 11 |
export async function findSimilarSentences(
|
| 12 |
query: string,
|
| 13 |
sentences: string[],
|
| 14 |
{ topK = 5 }: { topK: number }
|
| 15 |
) {
|
| 16 |
-
|
| 17 |
-
// see more: https://huggingface.co/intfloat/e5-small-v2/blob/main/README.md?code=true#L2631
|
| 18 |
-
const input = [`query: ${query}`, ...sentences.map((s) => `passage: ${s}`)];
|
| 19 |
const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
|
| 20 |
|
| 21 |
const queryTensor: Tensor = output[0];
|
|
|
|
| 6 |
return 1.0 - dot(tensor1.data, tensor2.data);
|
| 7 |
}
|
| 8 |
|
| 9 |
+
const extractor = await pipeline("feature-extraction", "Xenova/gte-small");
|
| 10 |
|
| 11 |
export async function findSimilarSentences(
|
| 12 |
query: string,
|
| 13 |
sentences: string[],
|
| 14 |
{ topK = 5 }: { topK: number }
|
| 15 |
) {
|
| 16 |
+
const input = [query, ...sentences];
|
|
|
|
|
|
|
| 17 |
const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
|
| 18 |
|
| 19 |
const queryTensor: Tensor = output[0];
|