| |
| |
| |
| |
|
|
| import * as d3 from 'd3'; |
| import type { TextAnalysisAPI } from '../api/GLTR_API'; |
| import { isSemanticFromCache } from '../api/GLTR_API'; |
| import type { AppStateManager } from '../utils/appStateManager'; |
| import type { VisualizationUpdater } from '../utils/visualizationUpdater'; |
| import type { GLTR_Text_Box } from '../vis/GLTR_Text_Box'; |
| import { SEMANTIC_CHUNK_BYTES } from '../constants'; |
| import { getSemanticMatchThreshold } from '../utils/semanticThresholdManager'; |
| import { getDigitsMergeEnabled } from '../utils/digitsMergeManager'; |
| import { |
| getAttentionRawScore, |
| mergeAttentionTokensFullyForRendering, |
| normalizeTokenScores, |
| splitTextToChunks, |
| } from '../utils/semanticUtils'; |
| import type { signalFitResult } from '../utils/signalThresholdDetector'; |
|
|
| export interface SemanticSearchControllerDeps { |
| getQuery: () => string; |
| getText: () => string; |
| getSubmode: () => string | undefined; |
| isChunkedMode: () => boolean; |
| api: TextAnalysisAPI; |
| appStateManager: AppStateManager; |
| visualizationUpdater: VisualizationUpdater; |
| lmf: GLTR_Text_Box; |
| showToast: (message: string, type: 'success' | 'error') => void; |
| showSemanticError: (message?: string) => void; |
| onSearchStart: (query: string) => void; |
| finishSemanticSearch: (query: string, matchDegree: number | null, fromCache: boolean) => void; |
| tr: (key: string) => string; |
| extractErrorMessage: (err: unknown, fallback: string) => string; |
| } |
|
|
| export class SemanticSearchController { |
| private deps: SemanticSearchControllerDeps; |
| private abortController: AbortController | null = null; |
|
|
| constructor(deps: SemanticSearchControllerDeps) { |
| this.deps = deps; |
| } |
|
|
| abort(): void { |
| this.abortController?.abort(); |
| } |
|
|
| run(): void { |
| void this.runSemanticSearchBase(async ({ query, text, submode, signal }) => { |
| if (this.deps.isChunkedMode()) { |
| await this.runChunked({ query, text, submode, signal }); |
| } else { |
| await this.runWhole({ query, text, submode, signal }); |
| } |
| }); |
| } |
|
|
| private async runSemanticSearchBase( |
| execute: (params: { query: string; text: string; submode: string | undefined; signal: AbortSignal }) => Promise<void> |
| ): Promise<void> { |
| const query = this.deps.getQuery(); |
| if (!query) return; |
| const text = this.deps.getText(); |
| if (!text) { |
| this.deps.showToast(this.deps.tr('Please enter text first'), 'error'); |
| return; |
| } |
| this.abortController = new AbortController(); |
| const signal = this.abortController.signal; |
| this.deps.onSearchStart(query); |
| try { |
| this.deps.appStateManager.setSemanticSearching(true); |
| d3.select('#semantic_match_degree').style('display', 'none'); |
| d3.select('#semantic_search_loader').style('visibility', 'visible'); |
| d3.select('#all_result').style('opacity', 1).style('display', null); |
| this.deps.lmf.setTextOnly(text); |
| this.deps.visualizationUpdater.updateHistogramVisibilityForPending('semantic', text, this.deps.isChunkedMode()); |
| await execute({ query, text, submode: this.deps.getSubmode(), signal }); |
| } catch (err) { |
| if (err instanceof Error && err.name === 'AbortError') { |
| this.deps.lmf.hideLoading(); |
| this.deps.visualizationUpdater.rerenderHistograms(); |
| return; |
| } |
| this.deps.showToast( |
| this.deps.extractErrorMessage(err, this.deps.tr('Semantic analysis failed')), |
| 'error' |
| ); |
| this.deps.lmf.hideLoading(); |
| this.deps.visualizationUpdater.rerenderHistograms(); |
| } finally { |
| this.abortController = null; |
| this.deps.appStateManager.setSemanticSearching(false); |
| d3.select('#semantic_search_loader').style('visibility', 'hidden'); |
| } |
| } |
|
|
| private async runWhole(params: { query: string; text: string; submode: string | undefined; signal: AbortSignal }): Promise<void> { |
| const { query, text, submode, signal } = params; |
| const onProgress = (step: number, totalSteps: number, stage: string, percentage?: number) => { |
| const progressText = percentage !== undefined && percentage !== null |
| ? `Step ${step}/${totalSteps}:\t ${stage} ${percentage}%` |
| : `Step ${step}/${totalSteps}:\t ${stage}`; |
| d3.select('#semantic_progress').text(progressText).style('display', 'inline-block'); |
| }; |
| const res = await this.deps.api.analyzeSemantic(query, text, { onProgress, submode, debug_info: true, signal }); |
| if (res?.success && res?.token_attention) { |
| this.deps.visualizationUpdater.handleSemanticResponse(res, text); |
| const md = res?.full_match_degree; |
| this.deps.finishSemanticSearch(query, md != null && typeof md === 'number' ? md : null, isSemanticFromCache(res)); |
| } else { |
| this.deps.showSemanticError(res?.message); |
| } |
| } |
|
|
| private async runChunked(params: { query: string; text: string; submode: string | undefined; signal: AbortSignal }): Promise<void> { |
| const { query, text, submode, signal } = params; |
| const chunks = splitTextToChunks(text, SEMANTIC_CHUNK_BYTES); |
| if (chunks.length === 0) { |
| this.deps.visualizationUpdater.handleSemanticResponse({ token_attention: [] }, text, undefined); |
| this.deps.finishSemanticSearch(query, null, true); |
| return; |
| } |
| |
| const allChunkProcessedTokens: Array<{ |
| offset: [number, number]; |
| raw: string; |
| score: number; |
| rawScore?: number; |
| }> = []; |
| const chunkInfos: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }> = []; |
| let maxMatchDegree = 0; |
| let allFromCache = true; |
| let aborted = false; |
| let lastChunkFromCache = false; |
|
|
| for (let i = 0; i < chunks.length; i++) { |
| if (signal.aborted) break; |
| d3.select('#semantic_progress').text(`Chunk ${i + 1}/${chunks.length}`).style('display', 'inline-block'); |
| const res = await this.deps.api.analyzeSemantic(query, chunks[i].text, { submode, signal }); |
| |
| |
| |
| |
| |
| if (signal.aborted) { |
| aborted = true; |
| break; |
| } |
| if (!res?.success) { |
| this.deps.showSemanticError(res?.message); |
| aborted = true; |
| break; |
| } |
| lastChunkFromCache = isSemanticFromCache(res); |
| if (!lastChunkFromCache) allFromCache = false; |
| const matchDegree = res.full_match_degree ?? 0; |
| maxMatchDegree = Math.max(maxMatchDegree, matchDegree); |
| const matched = matchDegree >= getSemanticMatchThreshold(); |
| const merged = mergeAttentionTokensFullyForRendering(res.token_attention ?? [], chunks[i].text, { |
| digitMerge: getDigitsMergeEnabled(), |
| }); |
| const normalized = normalizeTokenScores(merged); |
| const tokens = matched |
| ? normalized |
| : normalized.map((t) => ({ ...t, rawScore: getAttentionRawScore(t), score: 0 })); |
|
|
| chunkInfos.push({ |
| startOffset: chunks[i].startOffset, |
| endOffset: chunks[i].startOffset + chunks[i].text.length, |
| chunkIndex: i, |
| chunkMatchDegree: matchDegree, |
| }); |
| const tokensOffsetAdjusted = tokens.map(t => ({ |
| ...t, |
| offset: [t.offset[0] + chunks[i].startOffset, t.offset[1] + chunks[i].startOffset] as [number, number], |
| })); |
| allChunkProcessedTokens.push(...tokensOffsetAdjusted); |
| if (!lastChunkFromCache) { |
| if (!this.deps.visualizationUpdater.handleSemanticResponse( |
| { token_attention: allChunkProcessedTokens, chunkInfos, debug_info: undefined }, |
| text, |
| undefined |
| )) { |
| aborted = true; |
| this.deps.showSemanticError(); |
| break; |
| } |
| } |
| } |
|
|
| if (!aborted) { |
| if (lastChunkFromCache) { |
| this.deps.visualizationUpdater.handleSemanticResponse( |
| { token_attention: allChunkProcessedTokens, chunkInfos, debug_info: undefined }, |
| text, |
| undefined |
| ); |
| } |
| this.deps.finishSemanticSearch(query, maxMatchDegree, allFromCache); |
| } |
| } |
| } |
|
|