Spaces:
Paused
Paused
| import type { DatabaseSync } from "node:sqlite"; | |
| import { truncateUtf16Safe } from "../utils.js"; | |
| import { cosineSimilarity, parseEmbedding } from "./internal.js"; | |
| const vectorToBlob = (embedding: number[]): Buffer => | |
| Buffer.from(new Float32Array(embedding).buffer); | |
| export type SearchSource = string; | |
| export type SearchRowResult = { | |
| id: string; | |
| path: string; | |
| startLine: number; | |
| endLine: number; | |
| score: number; | |
| snippet: string; | |
| source: SearchSource; | |
| }; | |
| export async function searchVector(params: { | |
| db: DatabaseSync; | |
| vectorTable: string; | |
| providerModel: string; | |
| queryVec: number[]; | |
| limit: number; | |
| snippetMaxChars: number; | |
| ensureVectorReady: (dimensions: number) => Promise<boolean>; | |
| sourceFilterVec: { sql: string; params: SearchSource[] }; | |
| sourceFilterChunks: { sql: string; params: SearchSource[] }; | |
| }): Promise<SearchRowResult[]> { | |
| if (params.queryVec.length === 0 || params.limit <= 0) { | |
| return []; | |
| } | |
| if (await params.ensureVectorReady(params.queryVec.length)) { | |
| const rows = params.db | |
| .prepare( | |
| `SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` + | |
| ` c.source,\n` + | |
| ` vec_distance_cosine(v.embedding, ?) AS dist\n` + | |
| ` FROM ${params.vectorTable} v\n` + | |
| ` JOIN chunks c ON c.id = v.id\n` + | |
| ` WHERE c.model = ?${params.sourceFilterVec.sql}\n` + | |
| ` ORDER BY dist ASC\n` + | |
| ` LIMIT ?`, | |
| ) | |
| .all( | |
| vectorToBlob(params.queryVec), | |
| params.providerModel, | |
| ...params.sourceFilterVec.params, | |
| params.limit, | |
| ) as Array<{ | |
| id: string; | |
| path: string; | |
| start_line: number; | |
| end_line: number; | |
| text: string; | |
| source: SearchSource; | |
| dist: number; | |
| }>; | |
| return rows.map((row) => ({ | |
| id: row.id, | |
| path: row.path, | |
| startLine: row.start_line, | |
| endLine: row.end_line, | |
| score: 1 - row.dist, | |
| snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), | |
| source: row.source, | |
| })); | |
| } | |
| const candidates = listChunks({ | |
| db: params.db, | |
| providerModel: params.providerModel, | |
| sourceFilter: params.sourceFilterChunks, | |
| }); | |
| const scored = candidates | |
| .map((chunk) => ({ | |
| chunk, | |
| score: cosineSimilarity(params.queryVec, chunk.embedding), | |
| })) | |
| .filter((entry) => Number.isFinite(entry.score)); | |
| return scored | |
| .toSorted((a, b) => b.score - a.score) | |
| .slice(0, params.limit) | |
| .map((entry) => ({ | |
| id: entry.chunk.id, | |
| path: entry.chunk.path, | |
| startLine: entry.chunk.startLine, | |
| endLine: entry.chunk.endLine, | |
| score: entry.score, | |
| snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars), | |
| source: entry.chunk.source, | |
| })); | |
| } | |
| export function listChunks(params: { | |
| db: DatabaseSync; | |
| providerModel: string; | |
| sourceFilter: { sql: string; params: SearchSource[] }; | |
| }): Array<{ | |
| id: string; | |
| path: string; | |
| startLine: number; | |
| endLine: number; | |
| text: string; | |
| embedding: number[]; | |
| source: SearchSource; | |
| }> { | |
| const rows = params.db | |
| .prepare( | |
| `SELECT id, path, start_line, end_line, text, embedding, source\n` + | |
| ` FROM chunks\n` + | |
| ` WHERE model = ?${params.sourceFilter.sql}`, | |
| ) | |
| .all(params.providerModel, ...params.sourceFilter.params) as Array<{ | |
| id: string; | |
| path: string; | |
| start_line: number; | |
| end_line: number; | |
| text: string; | |
| embedding: string; | |
| source: SearchSource; | |
| }>; | |
| return rows.map((row) => ({ | |
| id: row.id, | |
| path: row.path, | |
| startLine: row.start_line, | |
| endLine: row.end_line, | |
| text: row.text, | |
| embedding: parseEmbedding(row.embedding), | |
| source: row.source, | |
| })); | |
| } | |
| export async function searchKeyword(params: { | |
| db: DatabaseSync; | |
| ftsTable: string; | |
| providerModel: string; | |
| query: string; | |
| limit: number; | |
| snippetMaxChars: number; | |
| sourceFilter: { sql: string; params: SearchSource[] }; | |
| buildFtsQuery: (raw: string) => string | null; | |
| bm25RankToScore: (rank: number) => number; | |
| }): Promise<Array<SearchRowResult & { textScore: number }>> { | |
| if (params.limit <= 0) { | |
| return []; | |
| } | |
| const ftsQuery = params.buildFtsQuery(params.query); | |
| if (!ftsQuery) { | |
| return []; | |
| } | |
| const rows = params.db | |
| .prepare( | |
| `SELECT id, path, source, start_line, end_line, text,\n` + | |
| ` bm25(${params.ftsTable}) AS rank\n` + | |
| ` FROM ${params.ftsTable}\n` + | |
| ` WHERE ${params.ftsTable} MATCH ? AND model = ?${params.sourceFilter.sql}\n` + | |
| ` ORDER BY rank ASC\n` + | |
| ` LIMIT ?`, | |
| ) | |
| .all(ftsQuery, params.providerModel, ...params.sourceFilter.params, params.limit) as Array<{ | |
| id: string; | |
| path: string; | |
| source: SearchSource; | |
| start_line: number; | |
| end_line: number; | |
| text: string; | |
| rank: number; | |
| }>; | |
| return rows.map((row) => { | |
| const textScore = params.bm25RankToScore(row.rank); | |
| return { | |
| id: row.id, | |
| path: row.path, | |
| startLine: row.start_line, | |
| endLine: row.end_line, | |
| score: textScore, | |
| textScore, | |
| snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), | |
| source: row.source, | |
| }; | |
| }); | |
| } | |