Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	Revert "Revert "Update embedding model for WebSearch (#437)""
Browse filesThis reverts commit 1061bc237c37ada9537197b10aeee43e08fc8f4e.
    	
        src/lib/server/websearch/sentenceSimilarity.ts
    CHANGED
    
    | @@ -6,16 +6,14 @@ function innerProduct(tensor1: Tensor, tensor2: Tensor) { | |
| 6 | 
             
            	return 1.0 - dot(tensor1.data, tensor2.data);
         | 
| 7 | 
             
            }
         | 
| 8 |  | 
| 9 | 
            -
            const extractor = await pipeline("feature-extraction", "Xenova/ | 
| 10 |  | 
| 11 | 
             
            export async function findSimilarSentences(
         | 
| 12 | 
             
            	query: string,
         | 
| 13 | 
             
            	sentences: string[],
         | 
| 14 | 
             
            	{ topK = 5 }: { topK: number }
         | 
| 15 | 
             
            ) {
         | 
| 16 | 
            -
            	 | 
| 17 | 
            -
            	// see more: https://huggingface.co/intfloat/e5-small-v2/blob/main/README.md?code=true#L2631
         | 
| 18 | 
            -
            	const input = [`query: ${query}`, ...sentences.map((s) => `passage: ${s}`)];
         | 
| 19 | 
             
            	const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
         | 
| 20 |  | 
| 21 | 
             
            	const queryTensor: Tensor = output[0];
         | 
|  | |
| 6 | 
             
            	return 1.0 - dot(tensor1.data, tensor2.data);
         | 
| 7 | 
             
            }
         | 
| 8 |  | 
| 9 | 
            +
            const extractor = await pipeline("feature-extraction", "Xenova/gte-small");
         | 
| 10 |  | 
| 11 | 
             
            export async function findSimilarSentences(
         | 
| 12 | 
             
            	query: string,
         | 
| 13 | 
             
            	sentences: string[],
         | 
| 14 | 
             
            	{ topK = 5 }: { topK: number }
         | 
| 15 | 
             
            ) {
         | 
| 16 | 
            +
            	const input = [query, ...sentences];
         | 
|  | |
|  | |
| 17 | 
             
            	const output: Tensor = await extractor(input, { pooling: "mean", normalize: true });
         | 
| 18 |  | 
| 19 | 
             
            	const queryTensor: Tensor = output[0];
         | 
 
			
