smart-web-crawler / index.html
gewei20's picture
Update index.html
8773530 verified
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>交互式 Markdown 知识库处理器 (最终版)</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700;800&display=swap" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<style>
body { font-family: 'Inter', sans-serif; }
.gradient-bg { background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); }
.status-dot { width: 10px; height: 10px; border-radius: 50%; }
.status-dot.red { background-color: #ef4444; }
.status-dot.yellow { background-color: #f59e0b; animation: pulse 2s infinite; }
.status-dot.green { background-color: #22c55e; }
@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: .5; } }
.prose h1, .prose h2, .prose h3 { font-weight: 700; }
.prose p { margin-bottom: 1em; line-height: 1.6; }
.prose ul { list-style-type: disc; margin-left: 1.5em; }
.prose code { background-color: #e5e7eb; padding: 0.2em 0.4em; border-radius: 3px; font-size: 85%; }
.prose pre > code { background-color: transparent; padding: 0; }
details > summary { list-style: none; cursor: pointer; }
details > summary::-webkit-details-marker { display: none; }
details[open] summary .fa-chevron-down { transform: rotate(180deg); }
</style>
</head>
<body class="gradient-bg min-h-screen text-gray-800">
<div class="container mx-auto px-4 py-8">
<header class="text-center mb-12">
<h1 class="text-4xl md:text-5xl font-bold mb-4"><span class="highlight-text relative inline-block z-10">Markdown</span> 知识库处理器</h1>
<p class="text-xl text-gray-600 max-w-3xl mx-auto">✨ Gemini 增强版:将静态文档转变为可对话、会总结的智能知识库。</p>
</header>
<section class="mb-16 bg-white p-6 sm:p-8 rounded-2xl shadow-lg border border-gray-200">
<div class="flex justify-between items-center mb-6">
<h2 class="text-3xl font-bold"><i class="fas fa-bolt text-blue-500 mr-2"></i>知识库控制台</h2>
<div id="status-container" class="flex items-center space-x-2">
<div id="status-dot" class="status-dot red"></div>
<span id="status-text" class="text-gray-600 font-medium">服务未连接</span>
</div>
</div>
<div class="bg-yellow-50 border border-yellow-200 p-6 rounded-lg mb-8">
<h3 class="font-bold text-xl mb-4 text-yellow-800"><i class="fas fa-key mr-2"></i>API 密钥配置</h3>
<p class="text-gray-700 mb-4">请输入您的应用专属 API 密钥以授权访问。</p>
<div class="flex flex-col sm:flex-row gap-4">
<input type="password" id="apiKeyInput" class="w-full px-4 py-2 border-2 border-gray-300 rounded-lg focus:ring-2 focus:ring-yellow-500" placeholder="在此输入您的 API 密钥">
<button id="saveApiKeyButton" class="bg-yellow-500 hover:bg-yellow-600 text-white font-bold py-2 px-6 rounded-lg transition-colors shadow flex-shrink-0">
<i class="fas fa-save mr-2"></i>保存密钥
</button>
</div>
<p id="apiKeyMessage" class="text-sm text-gray-600 mt-3 h-5"></p>
</div>
<div class="bg-gray-50 p-6 rounded-lg mb-8 border">
<h3 class="font-bold text-xl mb-2">1. 构建知识库</h3>
<p class="text-gray-600 mb-4">输入 Markdown 文件夹的本地绝对路径。</p>
<div class="flex flex-col sm:flex-row gap-4">
<input type="text" id="folderPathInput" class="w-full px-4 py-2 border-2 border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500" placeholder="例如: C:\Users\YourName\Documents\Notes">
<button id="buildButton" class="bg-indigo-600 hover:bg-indigo-700 text-white font-bold py-2 px-6 rounded-lg transition-colors shadow flex-shrink-0" disabled>
<i class="fas fa-hammer mr-2"></i>开始构建
</button>
</div>
<div class="mt-2">
<input id="clearExistingCheckbox" type="checkbox" class="h-4 w-4 text-indigo-600 border-gray-300 rounded focus:ring-indigo-500">
<label for="clearExistingCheckbox" class="ml-2 text-sm text-gray-700">在构建前清空现有知识库</label>
</div>
<p id="build-message" class="text-sm text-gray-500 mt-3 h-5"></p>
<details class="mt-4">
<summary class="font-medium text-indigo-600">
高级构建设置 <i class="fas fa-chevron-down ml-1 text-sm transition-transform"></i>
</summary>
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 pt-4 border-t mt-2">
<div><label for="chunkSizeInput" class="block text-sm font-medium text-gray-700">块大小</label><input type="number" id="chunkSizeInput" value="4096" class="mt-1 block w-full p-2 border border-gray-300 rounded-md"></div>
<div><label for="overlapInput" class="block text-sm font-medium text-gray-700">重叠大小</label><input type="number" id="overlapInput" value="400" class="mt-1 block w-full p-2 border border-gray-300 rounded-md"></div>
<div><label for="maxFilesInput" class="block text-sm font-medium text-gray-700">最大文件数</label><input type="number" id="maxFilesInput" value="500" class="mt-1 block w-full p-2 border border-gray-300 rounded-md"></div>
<div><label for="sampleModeInput" class="block text-sm font-medium text-gray-700">采样模式</label><select id="sampleModeInput" class="mt-1 block w-full p-2 border-gray-300 rounded-md"><option value="largest">最大的</option><option value="random">随机</option><option value="recent">最新的</option></select></div>
</div>
</details>
</div>
<div class="bg-gray-50 p-6 rounded-lg mb-8 border">
<h3 class="font-bold text-xl mb-2">2. 搜索知识库</h3>
<div class="relative">
<input type="text" id="searchInput" class="w-full pl-4 pr-12 py-3 border-2 border-gray-300 rounded-lg" placeholder="输入问题开始搜索..." disabled>
<button id="searchButton" class="absolute inset-y-0 right-0 px-4 text-gray-600" disabled><i class="fas fa-search text-xl"></i></button>
</div>
<details class="mt-4">
<summary class="font-medium text-blue-600">
搜索设置 <i class="fas fa-chevron-down ml-1 text-sm transition-transform"></i>
</summary>
<div class="flex items-center gap-8 pt-4 border-t mt-2">
<div><label for="topKInput" class="block text-sm font-medium text-gray-700">返回结果数</label><input type="number" id="topKInput" value="5" class="mt-1 block w-full p-2 border border-gray-300 rounded-md"></div>
</div>
</details>
</div>
<div id="summarySection" class="hidden"><div class="flex justify-between items-center mb-4"><h3 class="font-bold text-xl">✨ AI 智能总结</h3><button id="summarizeButton" class="bg-gradient-to-r from-purple-500 to-blue-500 text-white font-bold py-2 px-4 rounded-lg"><i class="fas fa-magic-wand-sparkles mr-2"></i>生成智能总结</button></div><div id="summaryResultCard" class="bg-blue-50 border-l-4 border-blue-400 p-4 rounded-r-lg"></div></div>
<div id="loadingIndicator" class="hidden text-center mt-8"><i class="fas fa-spinner fa-spin text-3xl text-blue-500"></i><p class="mt-2">正在检索...</p></div>
<div id="searchResults" class="mt-8 grid grid-cols-1 md:grid-cols-2 gap-6"></div>
</section>
</div>
<script>
document.addEventListener('DOMContentLoaded', () => {
const API_BASE_URL = 'http://127.0.0.1:5000';
const el = id => document.getElementById(id);
const statusDot = el('status-dot'), statusText = el('status-text');
const apiKeyInput = el('apiKeyInput'), saveApiKeyButton = el('saveApiKeyButton'), apiKeyMessage = el('apiKeyMessage');
const folderPathInput = el('folderPathInput'), buildButton = el('buildButton'), buildMessage = el('build-message'), clearExistingCheckbox = el('clearExistingCheckbox');
const chunkSizeInput = el('chunkSizeInput'), overlapInput = el('overlapInput'), maxFilesInput = el('maxFilesInput'), sampleModeInput = el('sampleModeInput');
const searchInput = el('searchInput'), searchButton = el('searchButton'), topKInput = el('topKInput');
const summarySection = el('summarySection'), summarizeButton = el('summarizeButton'), summaryResultCard = el('summaryResultCard');
const loadingIndicator = el('loadingIndicator'), searchResultsContainer = el('searchResults');
let lastSearchResults = [];
let statusInterval;
const saveApiKey = () => {
const key = apiKeyInput.value.trim();
if (key) {
localStorage.setItem('knowledgeBaseApiKey', key);
apiKeyMessage.textContent = '密钥已保存到浏览器。';
apiKeyMessage.style.color = 'green';
} else {
apiKeyMessage.textContent = '请输入有效的密钥。';
apiKeyMessage.style.color = 'red';
}
setTimeout(() => apiKeyMessage.textContent = '', 3000);
};
const loadApiKey = () => {
const key = localStorage.getItem('knowledgeBaseApiKey');
if (key) {
apiKeyInput.value = key;
apiKeyMessage.textContent = '已从本地加载密钥。';
setTimeout(() => apiKeyMessage.textContent = '', 3000);
}
};
const getAuthHeaders = (isGetRequest = false) => {
const key = localStorage.getItem('knowledgeBaseApiKey');
const headers = {};
if (!isGetRequest) {
headers['Content-Type'] = 'application/json';
}
if (key) {
headers['X-API-Key'] = key;
} else {
console.warn("API Key not found in localStorage.");
}
return headers;
};
const updateStatus = async () => {
try {
const response = await fetch(`${API_BASE_URL}/status`);
if (!response.ok) throw new Error('Network response was not ok');
const data = await response.json();
statusText.textContent = data.message;
statusDot.className = 'status-dot';
const isReadyForSearch = data.is_built && !data.is_building;
const isReadyForBuild = !data.is_building;
searchInput.disabled = !isReadyForSearch;
searchButton.disabled = !isReadyForSearch;
buildButton.disabled = !isReadyForBuild;
if (data.is_building) {
statusDot.classList.add('yellow');
buildButton.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i>构建中...';
} else {
buildButton.innerHTML = '<i class="fas fa-hammer mr-2"></i>开始构建';
statusDot.classList.add(data.is_built ? 'green' : 'red');
}
} catch (error) {
statusText.textContent = '服务连接失败';
statusDot.className = 'status-dot red';
searchInput.disabled = true;
searchButton.disabled = true;
buildButton.disabled = true;
if(statusInterval) clearInterval(statusInterval);
}
};
const handleBuild = async () => {
const folderPath = folderPathInput.value.trim();
if (!folderPath) { buildMessage.textContent = '错误:文件夹路径不能为空。'; return; }
const buildParams = {
folder_path: folderPath,
clear_existing: clearExistingCheckbox.checked,
chunk_size: parseInt(chunkSizeInput.value, 10) || 4096,
overlap: parseInt(overlapInput.value, 10) || 400,
max_files: parseInt(maxFilesInput.value, 10) || 500,
sample_mode: sampleModeInput.value,
};
buildMessage.textContent = '已发送构建请求...';
try {
const response = await fetch(`${API_BASE_URL}/build`, {
method: 'POST',
headers: getAuthHeaders(),
body: JSON.stringify(buildParams),
});
const result = await response.json();
if (!response.ok) throw new Error(result.error || '构建请求失败');
buildMessage.textContent = result.message;
updateStatus();
} catch (error) { buildMessage.textContent = `错误: ${error.message}`; }
};
const performSearch = async () => {
const query = searchInput.value.trim();
if (!query) return;
summarySection.classList.add('hidden');
summaryResultCard.innerHTML = '';
searchResultsContainer.innerHTML = '';
loadingIndicator.classList.remove('hidden');
const searchUrl = new URL(`${API_BASE_URL}/search`);
searchUrl.searchParams.append('query', query);
searchUrl.searchParams.append('top_k', topKInput.value || 5);
try {
const response = await fetch(searchUrl, { method: 'GET', headers: getAuthHeaders(true) });
if (response.status === 403) throw new Error('授权失败。请检查 API 密钥是否正确。');
const results = await response.json();
if (!response.ok) throw new Error(results.error || '搜索失败');
lastSearchResults = results;
displayResults(results, query);
if (results.length > 0) {
summarySection.classList.remove('hidden');
}
} catch (error) {
searchResultsContainer.innerHTML = `<p class="text-center text-red-500 md:col-span-2">搜索出错: ${error.message}</p>`;
} finally {
loadingIndicator.classList.add('hidden');
}
};
const displayResults = (results, query) => {
if (!results || results.length === 0) {
searchResultsContainer.innerHTML = `<p class="text-center text-gray-500 md:col-span-2">未找到与 "${query}" 相关的结果。</p>`;
return;
}
searchResultsContainer.innerHTML = results.map(result => {
const distance = typeof result.distance === 'number' ? result.distance : 2.0;
const similarity = Math.max(0, 1 - distance / 2); // Normalize score to be more intuitive
const fileName = result.metadata?.file_name || '未知文件';
const sourcePath = result.metadata?.source || fileName;
const sanitizedContent = result.content.replace(/</g, "&lt;").replace(/>/g, "&gt;");
return `
<div class="bg-white border border-gray-200 rounded-lg p-4 transition-all hover:shadow-md">
<div class="flex justify-between items-center mb-3">
<h4 class="font-bold text-blue-700 truncate pr-4" title="${sourcePath}">${fileName}</h4>
<span class="text-xs font-medium bg-blue-100 text-blue-800 py-1 px-2 rounded-full flex-shrink-0">相似度: ${similarity.toFixed(4)}</span>
</div>
<p class="text-gray-600 text-sm break-words">${sanitizedContent}</p>
</div>`;
}).join('');
};
const handleSummarize = async () => {
if (lastSearchResults.length === 0) return;
summarizeButton.disabled = true;
summarizeButton.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i>AI 正在思考...';
summaryResultCard.innerHTML = '<p class="text-gray-600">请稍候,正在为您生成总结...</p>';
try {
const response = await fetch(`${API_BASE_URL}/summarize`, {
method: 'POST',
headers: getAuthHeaders(),
body: JSON.stringify({ query: searchInput.value, results: lastSearchResults }),
});
if (response.status === 403) throw new Error('授权失败。请检查 API 密钥。');
const data = await response.json();
if (!response.ok) throw new Error(data.error || '总结生成失败');
summaryResultCard.innerHTML = marked.parse(data.summary);
summaryResultCard.classList.add('prose');
} catch (error) {
summaryResultCard.innerHTML = `<p class="text-red-500">生成总结时出错: ${error.message}</p>`;
} finally {
summarizeButton.disabled = false;
summarizeButton.innerHTML = '<i class="fas fa-magic-wand-sparkles mr-2"></i>重新生成总结';
}
};
saveApiKeyButton.addEventListener('click', saveApiKey);
buildButton.addEventListener('click', handleBuild);
searchButton.addEventListener('click', performSearch);
searchInput.addEventListener('keyup', e => e.key === 'Enter' && performSearch());
summarizeButton.addEventListener('click', handleSummarize);
loadApiKey();
updateStatus();
statusInterval = setInterval(updateStatus, 5000);
});
</script>
</body>
</html>