| import { describe, it, expect } from "vitest"; |
| import { cosineSimilarity, vectorSearch } from "./vectorSearch"; |
| import type { EmbeddedChunk } from "../types"; |
|
|
| |
| |
| |
| function makeEmbeddedChunk( |
| embedding: number[], |
| docId = "doc1", |
| chunkIndex = 0, |
| ): EmbeddedChunk { |
| return { |
| docId, |
| chunkIndex, |
| text: `chunk ${chunkIndex} of ${docId}`, |
| startChar: 0, |
| title: "Test", |
| embedding: new Float32Array(embedding), |
| }; |
| } |
|
|
| |
| |
| |
| describe("cosineSimilarity", () => { |
| it("returns 1 for identical vectors", () => { |
| const v = new Float32Array([1, 2, 3]); |
| expect(cosineSimilarity(v, v)).toBeCloseTo(1.0, 5); |
| }); |
|
|
| it("returns -1 for opposite vectors", () => { |
| const a = new Float32Array([1, 0, 0]); |
| const b = new Float32Array([-1, 0, 0]); |
| expect(cosineSimilarity(a, b)).toBeCloseTo(-1.0, 5); |
| }); |
|
|
| it("returns 0 for orthogonal vectors", () => { |
| const a = new Float32Array([1, 0, 0]); |
| const b = new Float32Array([0, 1, 0]); |
| expect(cosineSimilarity(a, b)).toBeCloseTo(0.0, 5); |
| }); |
|
|
| it("is symmetric", () => { |
| const a = new Float32Array([1, 2, 3]); |
| const b = new Float32Array([4, 5, 6]); |
| expect(cosineSimilarity(a, b)).toBeCloseTo(cosineSimilarity(b, a), 10); |
| }); |
|
|
| it("is scale-invariant", () => { |
| const a = new Float32Array([1, 2, 3]); |
| const b = new Float32Array([2, 4, 6]); |
| expect(cosineSimilarity(a, b)).toBeCloseTo(1.0, 5); |
| }); |
|
|
| it("computes correct value for known vectors", () => { |
| |
| |
| const a = new Float32Array([3, 4]); |
| const b = new Float32Array([4, 3]); |
| expect(cosineSimilarity(a, b)).toBeCloseTo(0.96, 2); |
| }); |
| }); |
|
|
| |
| |
| |
| describe("vectorSearch", () => { |
| |
| const query = new Float32Array([1, 0, 0]); |
|
|
| const chunks: EmbeddedChunk[] = [ |
| makeEmbeddedChunk([0, 1, 0], "orthogonal", 0), |
| makeEmbeddedChunk([1, 0, 0], "identical", 0), |
| makeEmbeddedChunk([0.7, 0.7, 0], "similar", 0), |
| makeEmbeddedChunk([-1, 0, 0], "opposite", 0), |
| makeEmbeddedChunk([0.9, 0.1, 0], "very-similar", 0), |
| ]; |
|
|
| it("returns results sorted by descending score", () => { |
| const results = vectorSearch(query, chunks); |
| for (let i = 1; i < results.length; i++) { |
| expect(results[i].score).toBeLessThanOrEqual(results[i - 1].score); |
| } |
| }); |
|
|
| it("ranks identical vector highest", () => { |
| const results = vectorSearch(query, chunks); |
| expect(results[0].chunk.docId).toBe("identical"); |
| }); |
|
|
| it("ranks opposite vector lowest", () => { |
| const results = vectorSearch(query, chunks); |
| expect(results[results.length - 1].chunk.docId).toBe("opposite"); |
| }); |
|
|
| it("all results have source 'vector'", () => { |
| const results = vectorSearch(query, chunks); |
| for (const r of results) { |
| expect(r.source).toBe("vector"); |
| } |
| }); |
|
|
| it("respects topK parameter", () => { |
| const results = vectorSearch(query, chunks, 2); |
| expect(results.length).toBe(2); |
| expect(results[0].chunk.docId).toBe("identical"); |
| expect(results[1].chunk.docId).toBe("very-similar"); |
| }); |
|
|
| it("returns all when topK exceeds chunk count", () => { |
| const results = vectorSearch(query, chunks, 100); |
| expect(results.length).toBe(chunks.length); |
| }); |
|
|
| it("handles empty chunks array", () => { |
| const results = vectorSearch(query, []); |
| expect(results).toEqual([]); |
| }); |
|
|
| it("handles single chunk", () => { |
| const single = [makeEmbeddedChunk([0.5, 0.5, 0], "only", 0)]; |
| const results = vectorSearch(query, single); |
| expect(results.length).toBe(1); |
| expect(results[0].source).toBe("vector"); |
| expect(results[0].score).toBeGreaterThan(0); |
| }); |
| }); |
|
|