|
import Database from 'better-sqlite3'; |
|
import path from 'path'; |
|
import fs from 'fs'; |
|
import { |
|
type Document, |
|
type InsertDocument, |
|
type SearchQuery, |
|
type InsertSearchQuery, |
|
type SearchResult, |
|
type InsertSearchResult, |
|
type Citation, |
|
type InsertCitation, |
|
type SearchRequest, |
|
type SearchResponse, |
|
type DocumentWithContext |
|
} from "@shared/schema"; |
|
import { IStorage } from './storage'; |
|
|
|
export class SQLiteStorage implements IStorage { |
|
private db: Database.Database; |
|
|
|
constructor(dbPath?: string) { |
|
|
|
const defaultPath = process.env.NODE_ENV === 'production' |
|
? '/tmp/knowledgebridge.db' |
|
: './data/knowledgebridge.db'; |
|
|
|
const finalPath = dbPath || defaultPath; |
|
|
|
|
|
const dir = path.dirname(finalPath); |
|
try { |
|
if (!fs.existsSync(dir)) { |
|
fs.mkdirSync(dir, { recursive: true }); |
|
} |
|
} catch (error) { |
|
console.warn(`Failed to create database directory at ${dir}:`, error); |
|
} |
|
|
|
this.db = new Database(finalPath); |
|
this.initializeTables(); |
|
} |
|
|
|
private initializeTables() { |
|
|
|
this.db.pragma('foreign_keys = ON'); |
|
|
|
|
|
this.db.exec(` |
|
CREATE TABLE IF NOT EXISTS documents ( |
|
id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
title TEXT NOT NULL, |
|
content TEXT NOT NULL, |
|
source TEXT NOT NULL, |
|
source_type TEXT NOT NULL, |
|
url TEXT, |
|
metadata TEXT, -- JSON string |
|
embedding TEXT, -- JSON string |
|
file_path TEXT, |
|
file_name TEXT, |
|
file_size INTEGER, |
|
mime_type TEXT, |
|
processing_status TEXT NOT NULL DEFAULT 'pending', |
|
modal_task_id TEXT, |
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP, |
|
processed_at DATETIME |
|
) |
|
`); |
|
|
|
|
|
this.db.exec(` |
|
CREATE TABLE IF NOT EXISTS search_queries ( |
|
id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
query TEXT NOT NULL, |
|
search_type TEXT NOT NULL DEFAULT 'semantic', |
|
filters TEXT, -- JSON string |
|
results_count INTEGER DEFAULT 0, |
|
search_time REAL, |
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP |
|
) |
|
`); |
|
|
|
|
|
this.db.exec(` |
|
CREATE TABLE IF NOT EXISTS search_results ( |
|
id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
query_id INTEGER NOT NULL, |
|
document_id INTEGER NOT NULL, |
|
relevance_score REAL NOT NULL, |
|
snippet TEXT NOT NULL, |
|
rank INTEGER NOT NULL, |
|
FOREIGN KEY (query_id) REFERENCES search_queries(id), |
|
FOREIGN KEY (document_id) REFERENCES documents(id) |
|
) |
|
`); |
|
|
|
|
|
this.db.exec(` |
|
CREATE TABLE IF NOT EXISTS citations ( |
|
id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
document_id INTEGER NOT NULL, |
|
citation_text TEXT NOT NULL, |
|
page_number INTEGER, |
|
section TEXT, |
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP, |
|
FOREIGN KEY (document_id) REFERENCES documents(id) |
|
) |
|
`); |
|
|
|
|
|
this.db.exec(` |
|
CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type); |
|
CREATE INDEX IF NOT EXISTS idx_documents_processing_status ON documents(processing_status); |
|
CREATE INDEX IF NOT EXISTS idx_search_results_query_id ON search_results(query_id); |
|
CREATE INDEX IF NOT EXISTS idx_search_results_document_id ON search_results(document_id); |
|
CREATE INDEX IF NOT EXISTS idx_citations_document_id ON citations(document_id); |
|
`); |
|
} |
|
|
|
async getDocument(id: number): Promise<Document | undefined> { |
|
const stmt = this.db.prepare('SELECT * FROM documents WHERE id = ?'); |
|
const row = stmt.get(id) as any; |
|
return row ? this.mapDocumentRow(row) : undefined; |
|
} |
|
|
|
async getDocuments(limit = 50, offset = 0): Promise<Document[]> { |
|
const stmt = this.db.prepare('SELECT * FROM documents ORDER BY created_at DESC LIMIT ? OFFSET ?'); |
|
const rows = stmt.all(limit, offset) as any[]; |
|
return rows.map(row => this.mapDocumentRow(row)); |
|
} |
|
|
|
async createDocument(insertDocument: InsertDocument): Promise<Document> { |
|
const stmt = this.db.prepare(` |
|
INSERT INTO documents ( |
|
title, content, source, source_type, url, metadata, embedding, |
|
file_path, file_name, file_size, mime_type, processing_status, modal_task_id |
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) |
|
`); |
|
|
|
const result = stmt.run( |
|
insertDocument.title, |
|
insertDocument.content, |
|
insertDocument.source, |
|
insertDocument.sourceType, |
|
insertDocument.url || null, |
|
insertDocument.metadata ? JSON.stringify(insertDocument.metadata) : null, |
|
insertDocument.embedding || null, |
|
(insertDocument as any).filePath || null, |
|
(insertDocument as any).fileName || null, |
|
(insertDocument as any).fileSize || null, |
|
(insertDocument as any).mimeType || null, |
|
(insertDocument as any).processingStatus || 'pending', |
|
(insertDocument as any).modalTaskId || null |
|
); |
|
|
|
const created = await this.getDocument(result.lastInsertRowid as number); |
|
if (!created) throw new Error('Failed to create document'); |
|
return created; |
|
} |
|
|
|
async updateDocument(id: number, updateData: Partial<InsertDocument & { processingStatus?: string; modalTaskId?: string; processedAt?: Date }>): Promise<Document | undefined> { |
|
const existing = await this.getDocument(id); |
|
if (!existing) return undefined; |
|
|
|
const fields: string[] = []; |
|
const values: any[] = []; |
|
|
|
Object.entries(updateData).forEach(([key, value]) => { |
|
if (value !== undefined) { |
|
switch (key) { |
|
case 'sourceType': |
|
fields.push('source_type = ?'); |
|
break; |
|
case 'processingStatus': |
|
fields.push('processing_status = ?'); |
|
break; |
|
case 'modalTaskId': |
|
fields.push('modal_task_id = ?'); |
|
break; |
|
case 'filePath': |
|
fields.push('file_path = ?'); |
|
break; |
|
case 'fileName': |
|
fields.push('file_name = ?'); |
|
break; |
|
case 'fileSize': |
|
fields.push('file_size = ?'); |
|
break; |
|
case 'mimeType': |
|
fields.push('mime_type = ?'); |
|
break; |
|
case 'processedAt': |
|
fields.push('processed_at = ?'); |
|
value = value instanceof Date ? value.toISOString() : value; |
|
break; |
|
case 'metadata': |
|
fields.push('metadata = ?'); |
|
value = value ? JSON.stringify(value) : null; |
|
break; |
|
default: |
|
fields.push(`${key} = ?`); |
|
} |
|
values.push(value); |
|
} |
|
}); |
|
|
|
if (fields.length === 0) return existing; |
|
|
|
values.push(id); |
|
const stmt = this.db.prepare(`UPDATE documents SET ${fields.join(', ')} WHERE id = ?`); |
|
stmt.run(...values); |
|
|
|
return await this.getDocument(id); |
|
} |
|
|
|
async deleteDocument(id: number): Promise<boolean> { |
|
const stmt = this.db.prepare('DELETE FROM documents WHERE id = ?'); |
|
const result = stmt.run(id); |
|
return result.changes > 0; |
|
} |
|
|
|
async searchDocuments(request: SearchRequest): Promise<SearchResponse> { |
|
const startTime = Date.now(); |
|
|
|
let sql = ` |
|
SELECT * FROM documents |
|
WHERE (title LIKE ? OR content LIKE ?) |
|
`; |
|
const params: any[] = [`%${request.query}%`, `%${request.query}%`]; |
|
|
|
|
|
if (request.filters?.sourceTypes?.length) { |
|
const placeholders = request.filters.sourceTypes.map(() => '?').join(','); |
|
sql += ` AND source_type IN (${placeholders})`; |
|
params.push(...request.filters.sourceTypes); |
|
} |
|
|
|
sql += ` ORDER BY |
|
CASE |
|
WHEN title LIKE ? THEN 1 |
|
WHEN content LIKE ? THEN 2 |
|
ELSE 3 |
|
END, |
|
created_at DESC |
|
LIMIT ? OFFSET ? |
|
`; |
|
|
|
params.push(`%${request.query}%`, `%${request.query}%`, request.limit, request.offset); |
|
|
|
const stmt = this.db.prepare(sql); |
|
const rows = stmt.all(...params) as any[]; |
|
|
|
const results = rows.map((row, index) => { |
|
const doc = this.mapDocumentRow(row); |
|
return { |
|
...doc, |
|
relevanceScore: this.calculateRelevanceScore(doc, request.query), |
|
snippet: this.extractSnippet(doc.content, request.query), |
|
rank: index + 1 |
|
}; |
|
}); |
|
|
|
const searchTime = (Date.now() - startTime) / 1000; |
|
|
|
|
|
const searchQuery = await this.createSearchQuery({ |
|
query: request.query, |
|
searchType: request.searchType, |
|
filters: request.filters, |
|
resultsCount: results.length, |
|
searchTime |
|
}); |
|
|
|
|
|
for (const doc of results) { |
|
await this.createSearchResult({ |
|
queryId: searchQuery.id, |
|
documentId: doc.id, |
|
relevanceScore: doc.relevanceScore, |
|
snippet: doc.snippet, |
|
rank: doc.rank |
|
}); |
|
} |
|
|
|
return { |
|
results, |
|
totalCount: results.length, |
|
searchTime, |
|
query: request.query, |
|
queryId: searchQuery.id |
|
}; |
|
} |
|
|
|
private calculateRelevanceScore(doc: Document, query: string): number { |
|
const queryLower = query.toLowerCase(); |
|
const titleLower = doc.title.toLowerCase(); |
|
const contentLower = doc.content.toLowerCase(); |
|
|
|
let score = 0; |
|
|
|
|
|
if (titleLower === queryLower) score += 1.0; |
|
else if (titleLower.includes(queryLower)) score += 0.8; |
|
|
|
|
|
if (contentLower.includes(queryLower)) score += 0.3; |
|
|
|
|
|
const queryWords = queryLower.split(' '); |
|
queryWords.forEach(word => { |
|
if (titleLower.includes(word)) score += 0.2; |
|
if (contentLower.includes(word)) score += 0.1; |
|
}); |
|
|
|
return Math.min(score, 1.0); |
|
} |
|
|
|
private extractSnippet(content: string, query: string, maxLength = 200): string { |
|
const queryLower = query.toLowerCase(); |
|
const contentLower = content.toLowerCase(); |
|
|
|
const index = contentLower.indexOf(queryLower); |
|
if (index === -1) { |
|
return content.substring(0, maxLength) + (content.length > maxLength ? '...' : ''); |
|
} |
|
|
|
const start = Math.max(0, index - 50); |
|
const end = Math.min(content.length, index + queryLower.length + 150); |
|
|
|
let snippet = content.substring(start, end); |
|
if (start > 0) snippet = '...' + snippet; |
|
if (end < content.length) snippet = snippet + '...'; |
|
|
|
return snippet; |
|
} |
|
|
|
async getDocumentsBySourceType(sourceType: string): Promise<Document[]> { |
|
const stmt = this.db.prepare('SELECT * FROM documents WHERE source_type = ? ORDER BY created_at DESC'); |
|
const rows = stmt.all(sourceType) as any[]; |
|
return rows.map(row => this.mapDocumentRow(row)); |
|
} |
|
|
|
async getDocumentsByProcessingStatus(status: string): Promise<Document[]> { |
|
const stmt = this.db.prepare('SELECT * FROM documents WHERE processing_status = ? ORDER BY created_at DESC'); |
|
const rows = stmt.all(status) as any[]; |
|
return rows.map(row => this.mapDocumentRow(row)); |
|
} |
|
|
|
async createSearchQuery(insertQuery: InsertSearchQuery): Promise<SearchQuery> { |
|
const stmt = this.db.prepare(` |
|
INSERT INTO search_queries (query, search_type, filters, results_count, search_time) |
|
VALUES (?, ?, ?, ?, ?) |
|
`); |
|
|
|
const result = stmt.run( |
|
insertQuery.query, |
|
insertQuery.searchType || 'semantic', |
|
insertQuery.filters ? JSON.stringify(insertQuery.filters) : null, |
|
insertQuery.resultsCount || null, |
|
insertQuery.searchTime || null |
|
); |
|
|
|
const created = this.db.prepare('SELECT * FROM search_queries WHERE id = ?').get(result.lastInsertRowid) as any; |
|
return this.mapSearchQueryRow(created); |
|
} |
|
|
|
async getSearchQueries(limit = 50): Promise<SearchQuery[]> { |
|
const stmt = this.db.prepare('SELECT * FROM search_queries ORDER BY created_at DESC LIMIT ?'); |
|
const rows = stmt.all(limit) as any[]; |
|
return rows.map(row => this.mapSearchQueryRow(row)); |
|
} |
|
|
|
async createSearchResult(insertResult: InsertSearchResult): Promise<SearchResult> { |
|
const stmt = this.db.prepare(` |
|
INSERT INTO search_results (query_id, document_id, relevance_score, snippet, rank) |
|
VALUES (?, ?, ?, ?, ?) |
|
`); |
|
|
|
const result = stmt.run( |
|
insertResult.queryId, |
|
insertResult.documentId, |
|
insertResult.relevanceScore, |
|
insertResult.snippet, |
|
insertResult.rank |
|
); |
|
|
|
const created = this.db.prepare('SELECT * FROM search_results WHERE id = ?').get(result.lastInsertRowid) as any; |
|
return this.mapSearchResultRow(created); |
|
} |
|
|
|
async getSearchResults(queryId: number): Promise<SearchResult[]> { |
|
const stmt = this.db.prepare('SELECT * FROM search_results WHERE query_id = ? ORDER BY rank'); |
|
const rows = stmt.all(queryId) as any[]; |
|
return rows.map(row => this.mapSearchResultRow(row)); |
|
} |
|
|
|
async createCitation(insertCitation: InsertCitation): Promise<Citation> { |
|
const stmt = this.db.prepare(` |
|
INSERT INTO citations (document_id, citation_text, page_number, section) |
|
VALUES (?, ?, ?, ?) |
|
`); |
|
|
|
const result = stmt.run( |
|
insertCitation.documentId, |
|
insertCitation.citationText, |
|
insertCitation.pageNumber || null, |
|
insertCitation.section || null |
|
); |
|
|
|
const created = this.db.prepare('SELECT * FROM citations WHERE id = ?').get(result.lastInsertRowid) as any; |
|
return this.mapCitationRow(created); |
|
} |
|
|
|
async getCitationsByDocument(documentId: number): Promise<Citation[]> { |
|
const stmt = this.db.prepare('SELECT * FROM citations WHERE document_id = ? ORDER BY created_at DESC'); |
|
const rows = stmt.all(documentId) as any[]; |
|
return rows.map(row => this.mapCitationRow(row)); |
|
} |
|
|
|
async deleteCitation(id: number): Promise<boolean> { |
|
const stmt = this.db.prepare('DELETE FROM citations WHERE id = ?'); |
|
const result = stmt.run(id); |
|
return result.changes > 0; |
|
} |
|
|
|
private mapDocumentRow(row: any): Document { |
|
return { |
|
id: row.id, |
|
title: row.title, |
|
content: row.content, |
|
source: row.source, |
|
sourceType: row.source_type, |
|
url: row.url, |
|
metadata: row.metadata ? JSON.parse(row.metadata) : null, |
|
embedding: row.embedding, |
|
createdAt: new Date(row.created_at), |
|
filePath: row.file_path, |
|
fileName: row.file_name, |
|
fileSize: row.file_size, |
|
mimeType: row.mime_type, |
|
processingStatus: row.processing_status, |
|
modalTaskId: row.modal_task_id, |
|
processedAt: row.processed_at ? new Date(row.processed_at) : null, |
|
} as Document; |
|
} |
|
|
|
private mapSearchQueryRow(row: any): SearchQuery { |
|
return { |
|
id: row.id, |
|
query: row.query, |
|
searchType: row.search_type, |
|
filters: row.filters ? JSON.parse(row.filters) : null, |
|
resultsCount: row.results_count, |
|
searchTime: row.search_time, |
|
createdAt: new Date(row.created_at) |
|
}; |
|
} |
|
|
|
private mapSearchResultRow(row: any): SearchResult { |
|
return { |
|
id: row.id, |
|
queryId: row.query_id, |
|
documentId: row.document_id, |
|
relevanceScore: row.relevance_score, |
|
snippet: row.snippet, |
|
rank: row.rank |
|
}; |
|
} |
|
|
|
private mapCitationRow(row: any): Citation { |
|
return { |
|
id: row.id, |
|
documentId: row.document_id, |
|
citationText: row.citation_text, |
|
pageNumber: row.page_number, |
|
section: row.section, |
|
createdAt: new Date(row.created_at) |
|
}; |
|
} |
|
|
|
close() { |
|
this.db.close(); |
|
} |
|
} |