ArindamRoy23 commited on
Commit
d17e1be
·
unverified ·
2 Parent(s): e3ae7ea fa65a80

Merge branch 'HKUDS:main' into main

Browse files
Files changed (46) hide show
  1. lightrag/__init__.py +1 -1
  2. lightrag/api/lightrag_server.py +21 -21
  3. lightrag/api/routers/document_routes.py +88 -45
  4. lightrag/api/routers/ollama_api.py +1 -1
  5. lightrag/api/utils_api.py +14 -1
  6. lightrag/base.py +24 -0
  7. lightrag/kg/chroma_impl.py +64 -0
  8. lightrag/kg/faiss_impl.py +43 -0
  9. lightrag/kg/json_doc_status_impl.py +34 -17
  10. lightrag/kg/json_kv_impl.py +58 -16
  11. lightrag/kg/milvus_impl.py +54 -0
  12. lightrag/kg/mongo_impl.py +53 -0
  13. lightrag/kg/nano_vector_db_impl.py +30 -0
  14. lightrag/kg/neo4j_impl.py +616 -355
  15. lightrag/kg/oracle_impl.py +74 -0
  16. lightrag/kg/postgres_impl.py +54 -0
  17. lightrag/kg/shared_storage.py +189 -36
  18. lightrag/kg/tidb_impl.py +94 -0
  19. lightrag/lightrag.py +77 -203
  20. lightrag/llm/azure_openai.py +2 -0
  21. lightrag/operate.py +220 -62
  22. lightrag/prompt.py +1 -1
  23. lightrag/utils.py +91 -14
  24. lightrag_webui/bun.lock +10 -0
  25. lightrag_webui/package.json +2 -0
  26. lightrag_webui/src/components/ThemeToggle.tsx +4 -2
  27. lightrag_webui/src/components/documents/ClearDocumentsDialog.tsx +10 -8
  28. lightrag_webui/src/components/documents/UploadDocumentsDialog.tsx +12 -10
  29. lightrag_webui/src/components/graph/FullScreenControl.tsx +4 -2
  30. lightrag_webui/src/components/graph/GraphLabels.tsx +6 -4
  31. lightrag_webui/src/components/graph/GraphSearch.tsx +4 -2
  32. lightrag_webui/src/components/graph/LayoutsControl.tsx +6 -3
  33. lightrag_webui/src/components/graph/PropertiesView.tsx +15 -12
  34. lightrag_webui/src/components/graph/Settings.tsx +18 -16
  35. lightrag_webui/src/components/graph/StatusCard.tsx +20 -18
  36. lightrag_webui/src/components/graph/StatusIndicator.tsx +3 -1
  37. lightrag_webui/src/components/graph/ZoomControl.tsx +5 -3
  38. lightrag_webui/src/components/retrieval/ChatMessage.tsx +5 -2
  39. lightrag_webui/src/components/retrieval/QuerySettings.tsx +43 -41
  40. lightrag_webui/src/features/DocumentManager.tsx +24 -22
  41. lightrag_webui/src/features/RetrievalTesting.tsx +8 -6
  42. lightrag_webui/src/features/SiteHeader.tsx +8 -5
  43. lightrag_webui/src/i18n.js +21 -0
  44. lightrag_webui/src/locales/en.json +234 -0
  45. lightrag_webui/src/locales/zh.json +235 -0
  46. lightrag_webui/src/main.tsx +2 -0
lightrag/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
- __version__ = "1.2.4"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
 
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
+ __version__ = "1.2.5"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
lightrag/api/lightrag_server.py CHANGED
@@ -50,9 +50,6 @@ from .auth import auth_handler
50
  # This update allows the user to put a different.env file for each lightrag folder
51
  load_dotenv(".env", override=True)
52
 
53
- # Read entity extraction cache config
54
- enable_llm_cache = os.getenv("ENABLE_LLM_CACHE_FOR_EXTRACT", "false").lower() == "true"
55
-
56
  # Initialize config parser
57
  config = configparser.ConfigParser()
58
  config.read("config.ini")
@@ -144,23 +141,25 @@ def create_app(args):
144
  try:
145
  # Initialize database connections
146
  await rag.initialize_storages()
147
- await initialize_pipeline_status()
148
 
149
- # Auto scan documents if enabled
150
- if args.auto_scan_at_startup:
151
- # Check if a task is already running (with lock protection)
152
- pipeline_status = await get_namespace_data("pipeline_status")
153
- should_start_task = False
154
- async with get_pipeline_status_lock():
155
- if not pipeline_status.get("busy", False):
156
- should_start_task = True
157
- # Only start the task if no other task is running
158
- if should_start_task:
159
- # Create background task
160
- task = asyncio.create_task(run_scanning_process(rag, doc_manager))
161
- app.state.background_tasks.add(task)
162
- task.add_done_callback(app.state.background_tasks.discard)
163
- logger.info("Auto scan task started at startup.")
 
 
 
164
 
165
  ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
166
 
@@ -326,7 +325,7 @@ def create_app(args):
326
  vector_db_storage_cls_kwargs={
327
  "cosine_better_than_threshold": args.cosine_threshold
328
  },
329
- enable_llm_cache_for_entity_extract=enable_llm_cache, # Read from environment variable
330
  embedding_cache_config={
331
  "enabled": True,
332
  "similarity_threshold": 0.95,
@@ -355,7 +354,7 @@ def create_app(args):
355
  vector_db_storage_cls_kwargs={
356
  "cosine_better_than_threshold": args.cosine_threshold
357
  },
358
- enable_llm_cache_for_entity_extract=enable_llm_cache, # Read from environment variable
359
  embedding_cache_config={
360
  "enabled": True,
361
  "similarity_threshold": 0.95,
@@ -419,6 +418,7 @@ def create_app(args):
419
  "doc_status_storage": args.doc_status_storage,
420
  "graph_storage": args.graph_storage,
421
  "vector_storage": args.vector_storage,
 
422
  },
423
  "update_status": update_status,
424
  }
 
50
  # This update allows the user to put a different.env file for each lightrag folder
51
  load_dotenv(".env", override=True)
52
 
 
 
 
53
  # Initialize config parser
54
  config = configparser.ConfigParser()
55
  config.read("config.ini")
 
141
  try:
142
  # Initialize database connections
143
  await rag.initialize_storages()
 
144
 
145
+ await initialize_pipeline_status()
146
+ pipeline_status = await get_namespace_data("pipeline_status")
147
+
148
+ should_start_autoscan = False
149
+ async with get_pipeline_status_lock():
150
+ # Auto scan documents if enabled
151
+ if args.auto_scan_at_startup:
152
+ if not pipeline_status.get("autoscanned", False):
153
+ pipeline_status["autoscanned"] = True
154
+ should_start_autoscan = True
155
+
156
+ # Only run auto scan when no other process started it first
157
+ if should_start_autoscan:
158
+ # Create background task
159
+ task = asyncio.create_task(run_scanning_process(rag, doc_manager))
160
+ app.state.background_tasks.add(task)
161
+ task.add_done_callback(app.state.background_tasks.discard)
162
+ logger.info(f"Process {os.getpid()} auto scan task started at startup.")
163
 
164
  ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
165
 
 
325
  vector_db_storage_cls_kwargs={
326
  "cosine_better_than_threshold": args.cosine_threshold
327
  },
328
+ enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
329
  embedding_cache_config={
330
  "enabled": True,
331
  "similarity_threshold": 0.95,
 
354
  vector_db_storage_cls_kwargs={
355
  "cosine_better_than_threshold": args.cosine_threshold
356
  },
357
+ enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
358
  embedding_cache_config={
359
  "enabled": True,
360
  "similarity_threshold": 0.95,
 
418
  "doc_status_storage": args.doc_status_storage,
419
  "graph_storage": args.graph_storage,
420
  "vector_storage": args.vector_storage,
421
+ "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
422
  },
423
  "update_status": update_status,
424
  }
lightrag/api/routers/document_routes.py CHANGED
@@ -16,7 +16,11 @@ from pydantic import BaseModel, Field, field_validator
16
 
17
  from lightrag import LightRAG
18
  from lightrag.base import DocProcessingStatus, DocStatus
19
- from ..utils_api import get_api_key_dependency, get_auth_dependency
 
 
 
 
20
 
21
  router = APIRouter(
22
  prefix="/documents",
@@ -240,54 +244,93 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
240
  )
241
  return False
242
  case ".pdf":
243
- if not pm.is_installed("pypdf2"): # type: ignore
244
- pm.install("pypdf2")
245
- from PyPDF2 import PdfReader # type: ignore
246
- from io import BytesIO
247
-
248
- pdf_file = BytesIO(file)
249
- reader = PdfReader(pdf_file)
250
- for page in reader.pages:
251
- content += page.extract_text() + "\n"
 
 
 
 
 
 
 
 
 
252
  case ".docx":
253
- if not pm.is_installed("python-docx"): # type: ignore
254
- pm.install("docx")
255
- from docx import Document # type: ignore
256
- from io import BytesIO
257
-
258
- docx_file = BytesIO(file)
259
- doc = Document(docx_file)
260
- content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
 
 
 
 
 
 
 
 
 
 
 
261
  case ".pptx":
262
- if not pm.is_installed("python-pptx"): # type: ignore
263
- pm.install("pptx")
264
- from pptx import Presentation # type: ignore
265
- from io import BytesIO
266
-
267
- pptx_file = BytesIO(file)
268
- prs = Presentation(pptx_file)
269
- for slide in prs.slides:
270
- for shape in slide.shapes:
271
- if hasattr(shape, "text"):
272
- content += shape.text + "\n"
 
 
 
 
 
 
 
 
 
273
  case ".xlsx":
274
- if not pm.is_installed("openpyxl"): # type: ignore
275
- pm.install("openpyxl")
276
- from openpyxl import load_workbook # type: ignore
277
- from io import BytesIO
278
-
279
- xlsx_file = BytesIO(file)
280
- wb = load_workbook(xlsx_file)
281
- for sheet in wb:
282
- content += f"Sheet: {sheet.title}\n"
283
- for row in sheet.iter_rows(values_only=True):
284
- content += (
285
- "\t".join(
286
- str(cell) if cell is not None else "" for cell in row
 
 
 
 
 
 
 
 
 
 
 
 
287
  )
288
- + "\n"
289
- )
290
- content += "\n"
291
  case _:
292
  logger.error(
293
  f"Unsupported file type: {file_path.name} (extension {ext})"
 
16
 
17
  from lightrag import LightRAG
18
  from lightrag.base import DocProcessingStatus, DocStatus
19
+ from lightrag.api.utils_api import (
20
+ get_api_key_dependency,
21
+ global_args,
22
+ get_auth_dependency,
23
+ )
24
 
25
  router = APIRouter(
26
  prefix="/documents",
 
244
  )
245
  return False
246
  case ".pdf":
247
+ if global_args["main_args"].document_loading_engine == "DOCLING":
248
+ if not pm.is_installed("docling"): # type: ignore
249
+ pm.install("docling")
250
+ from docling.document_converter import DocumentConverter
251
+
252
+ converter = DocumentConverter()
253
+ result = converter.convert(file_path)
254
+ content = result.document.export_to_markdown()
255
+ else:
256
+ if not pm.is_installed("pypdf2"): # type: ignore
257
+ pm.install("pypdf2")
258
+ from PyPDF2 import PdfReader # type: ignore
259
+ from io import BytesIO
260
+
261
+ pdf_file = BytesIO(file)
262
+ reader = PdfReader(pdf_file)
263
+ for page in reader.pages:
264
+ content += page.extract_text() + "\n"
265
  case ".docx":
266
+ if global_args["main_args"].document_loading_engine == "DOCLING":
267
+ if not pm.is_installed("docling"): # type: ignore
268
+ pm.install("docling")
269
+ from docling.document_converter import DocumentConverter
270
+
271
+ converter = DocumentConverter()
272
+ result = converter.convert(file_path)
273
+ content = result.document.export_to_markdown()
274
+ else:
275
+ if not pm.is_installed("python-docx"): # type: ignore
276
+ pm.install("docx")
277
+ from docx import Document # type: ignore
278
+ from io import BytesIO
279
+
280
+ docx_file = BytesIO(file)
281
+ doc = Document(docx_file)
282
+ content = "\n".join(
283
+ [paragraph.text for paragraph in doc.paragraphs]
284
+ )
285
  case ".pptx":
286
+ if global_args["main_args"].document_loading_engine == "DOCLING":
287
+ if not pm.is_installed("docling"): # type: ignore
288
+ pm.install("docling")
289
+ from docling.document_converter import DocumentConverter
290
+
291
+ converter = DocumentConverter()
292
+ result = converter.convert(file_path)
293
+ content = result.document.export_to_markdown()
294
+ else:
295
+ if not pm.is_installed("python-pptx"): # type: ignore
296
+ pm.install("pptx")
297
+ from pptx import Presentation # type: ignore
298
+ from io import BytesIO
299
+
300
+ pptx_file = BytesIO(file)
301
+ prs = Presentation(pptx_file)
302
+ for slide in prs.slides:
303
+ for shape in slide.shapes:
304
+ if hasattr(shape, "text"):
305
+ content += shape.text + "\n"
306
  case ".xlsx":
307
+ if global_args["main_args"].document_loading_engine == "DOCLING":
308
+ if not pm.is_installed("docling"): # type: ignore
309
+ pm.install("docling")
310
+ from docling.document_converter import DocumentConverter
311
+
312
+ converter = DocumentConverter()
313
+ result = converter.convert(file_path)
314
+ content = result.document.export_to_markdown()
315
+ else:
316
+ if not pm.is_installed("openpyxl"): # type: ignore
317
+ pm.install("openpyxl")
318
+ from openpyxl import load_workbook # type: ignore
319
+ from io import BytesIO
320
+
321
+ xlsx_file = BytesIO(file)
322
+ wb = load_workbook(xlsx_file)
323
+ for sheet in wb:
324
+ content += f"Sheet: {sheet.title}\n"
325
+ for row in sheet.iter_rows(values_only=True):
326
+ content += (
327
+ "\t".join(
328
+ str(cell) if cell is not None else ""
329
+ for cell in row
330
+ )
331
+ + "\n"
332
  )
333
+ content += "\n"
 
 
334
  case _:
335
  logger.error(
336
  f"Unsupported file type: {file_path.name} (extension {ext})"
lightrag/api/routers/ollama_api.py CHANGED
@@ -11,7 +11,7 @@ import asyncio
11
  from ascii_colors import trace_exception
12
  from lightrag import LightRAG, QueryParam
13
  from lightrag.utils import encode_string_by_tiktoken
14
- from ..utils_api import ollama_server_infos
15
 
16
 
17
  # query mode according to query prefix (bypass is not LightRAG quer mode)
 
11
  from ascii_colors import trace_exception
12
  from lightrag import LightRAG, QueryParam
13
  from lightrag.utils import encode_string_by_tiktoken
14
+ from lightrag.api.utils_api import ollama_server_infos
15
 
16
 
17
  # query mode according to query prefix (bypass is not LightRAG quer mode)
lightrag/api/utils_api.py CHANGED
@@ -18,6 +18,8 @@ from .auth import auth_handler
18
  # Load environment variables
19
  load_dotenv(override=True)
20
 
 
 
21
 
22
  class OllamaServerInfos:
23
  # Constants for emulated Ollama model information
@@ -360,8 +362,17 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
360
  args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
361
  args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
362
 
 
 
 
 
 
 
 
 
363
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
364
 
 
365
  return args
366
 
367
 
@@ -451,8 +462,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
451
  ASCIIColors.yellow(f"{args.history_turns}")
452
  ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
453
  ASCIIColors.yellow(f"{args.cosine_threshold}")
454
- ASCIIColors.white(" └─ Top-K: ", end="")
455
  ASCIIColors.yellow(f"{args.top_k}")
 
 
456
 
457
  # System Configuration
458
  ASCIIColors.magenta("\n💾 Storage Configuration:")
 
18
  # Load environment variables
19
  load_dotenv(override=True)
20
 
21
+ global_args = {"main_args": None}
22
+
23
 
24
  class OllamaServerInfos:
25
  # Constants for emulated Ollama model information
 
362
  args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
363
  args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
364
 
365
+ # Inject LLM cache configuration
366
+ args.enable_llm_cache_for_extract = get_env_value(
367
+ "ENABLE_LLM_CACHE_FOR_EXTRACT", False, bool
368
+ )
369
+
370
+ # Select Document loading tool (DOCLING, DEFAULT)
371
+ args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
372
+
373
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
374
 
375
+ global_args["main_args"] = args
376
  return args
377
 
378
 
 
462
  ASCIIColors.yellow(f"{args.history_turns}")
463
  ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
464
  ASCIIColors.yellow(f"{args.cosine_threshold}")
465
+ ASCIIColors.white(" ├─ Top-K: ", end="")
466
  ASCIIColors.yellow(f"{args.top_k}")
467
+ ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
468
+ ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
469
 
470
  # System Configuration
471
  ASCIIColors.magenta("\n💾 Storage Configuration:")
lightrag/base.py CHANGED
@@ -127,6 +127,30 @@ class BaseVectorStorage(StorageNameSpace, ABC):
127
  async def delete_entity_relation(self, entity_name: str) -> None:
128
  """Delete relations for a given entity."""
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  @dataclass
132
  class BaseKVStorage(StorageNameSpace, ABC):
 
127
  async def delete_entity_relation(self, entity_name: str) -> None:
128
  """Delete relations for a given entity."""
129
 
130
+ @abstractmethod
131
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
132
+ """Get vector data by its ID
133
+
134
+ Args:
135
+ id: The unique identifier of the vector
136
+
137
+ Returns:
138
+ The vector data if found, or None if not found
139
+ """
140
+ pass
141
+
142
+ @abstractmethod
143
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
144
+ """Get multiple vector data by their IDs
145
+
146
+ Args:
147
+ ids: List of unique identifiers
148
+
149
+ Returns:
150
+ List of vector data objects that were found
151
+ """
152
+ pass
153
+
154
 
155
  @dataclass
156
  class BaseKVStorage(StorageNameSpace, ABC):
lightrag/kg/chroma_impl.py CHANGED
@@ -271,3 +271,67 @@ class ChromaVectorDBStorage(BaseVectorStorage):
271
  except Exception as e:
272
  logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
273
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  except Exception as e:
272
  logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
273
  raise
274
+
275
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
276
+ """Get vector data by its ID
277
+
278
+ Args:
279
+ id: The unique identifier of the vector
280
+
281
+ Returns:
282
+ The vector data if found, or None if not found
283
+ """
284
+ try:
285
+ # Query the collection for a single vector by ID
286
+ result = self._collection.get(
287
+ ids=[id], include=["metadatas", "embeddings", "documents"]
288
+ )
289
+
290
+ if not result or not result["ids"] or len(result["ids"]) == 0:
291
+ return None
292
+
293
+ # Format the result to match the expected structure
294
+ return {
295
+ "id": result["ids"][0],
296
+ "vector": result["embeddings"][0],
297
+ "content": result["documents"][0],
298
+ **result["metadatas"][0],
299
+ }
300
+ except Exception as e:
301
+ logger.error(f"Error retrieving vector data for ID {id}: {e}")
302
+ return None
303
+
304
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
305
+ """Get multiple vector data by their IDs
306
+
307
+ Args:
308
+ ids: List of unique identifiers
309
+
310
+ Returns:
311
+ List of vector data objects that were found
312
+ """
313
+ if not ids:
314
+ return []
315
+
316
+ try:
317
+ # Query the collection for multiple vectors by IDs
318
+ result = self._collection.get(
319
+ ids=ids, include=["metadatas", "embeddings", "documents"]
320
+ )
321
+
322
+ if not result or not result["ids"] or len(result["ids"]) == 0:
323
+ return []
324
+
325
+ # Format the results to match the expected structure
326
+ return [
327
+ {
328
+ "id": result["ids"][i],
329
+ "vector": result["embeddings"][i],
330
+ "content": result["documents"][i],
331
+ **result["metadatas"][i],
332
+ }
333
+ for i in range(len(result["ids"]))
334
+ ]
335
+ except Exception as e:
336
+ logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
337
+ return []
lightrag/kg/faiss_impl.py CHANGED
@@ -394,3 +394,46 @@ class FaissVectorDBStorage(BaseVectorStorage):
394
 
395
  logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
396
  return matching_records
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
  logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
396
  return matching_records
397
+
398
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
399
+ """Get vector data by its ID
400
+
401
+ Args:
402
+ id: The unique identifier of the vector
403
+
404
+ Returns:
405
+ The vector data if found, or None if not found
406
+ """
407
+ # Find the Faiss internal ID for the custom ID
408
+ fid = self._find_faiss_id_by_custom_id(id)
409
+ if fid is None:
410
+ return None
411
+
412
+ # Get the metadata for the found ID
413
+ metadata = self._id_to_meta.get(fid, {})
414
+ if not metadata:
415
+ return None
416
+
417
+ return {**metadata, "id": metadata.get("__id__")}
418
+
419
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
420
+ """Get multiple vector data by their IDs
421
+
422
+ Args:
423
+ ids: List of unique identifiers
424
+
425
+ Returns:
426
+ List of vector data objects that were found
427
+ """
428
+ if not ids:
429
+ return []
430
+
431
+ results = []
432
+ for id in ids:
433
+ fid = self._find_faiss_id_by_custom_id(id)
434
+ if fid is not None:
435
+ metadata = self._id_to_meta.get(fid, {})
436
+ if metadata:
437
+ results.append({**metadata, "id": metadata.get("__id__")})
438
+
439
+ return results
lightrag/kg/json_doc_status_impl.py CHANGED
@@ -15,6 +15,10 @@ from lightrag.utils import (
15
  from .shared_storage import (
16
  get_namespace_data,
17
  get_storage_lock,
 
 
 
 
18
  try_initialize_namespace,
19
  )
20
 
@@ -27,21 +31,25 @@ class JsonDocStatusStorage(DocStatusStorage):
27
  def __post_init__(self):
28
  working_dir = self.global_config["working_dir"]
29
  self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
30
- self._storage_lock = get_storage_lock()
31
  self._data = None
 
 
32
 
33
  async def initialize(self):
34
  """Initialize storage data"""
35
- # check need_init must before get_namespace_data
36
- need_init = try_initialize_namespace(self.namespace)
37
- self._data = await get_namespace_data(self.namespace)
38
- if need_init:
39
- loaded_data = load_json(self._file_name) or {}
40
- async with self._storage_lock:
41
- self._data.update(loaded_data)
42
- logger.info(
43
- f"Loaded document status storage with {len(loaded_data)} records"
44
- )
 
 
 
45
 
46
  async def filter_keys(self, keys: set[str]) -> set[str]:
47
  """Return keys that should be processed (not in storage or not successfully processed)"""
@@ -87,18 +95,24 @@ class JsonDocStatusStorage(DocStatusStorage):
87
 
88
  async def index_done_callback(self) -> None:
89
  async with self._storage_lock:
90
- data_dict = (
91
- dict(self._data) if hasattr(self._data, "_getvalue") else self._data
92
- )
93
- write_json(data_dict, self._file_name)
 
 
 
 
 
94
 
95
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
96
- logger.info(f"Inserting {len(data)} to {self.namespace}")
97
  if not data:
98
  return
99
-
100
  async with self._storage_lock:
101
  self._data.update(data)
 
 
102
  await self.index_done_callback()
103
 
104
  async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
@@ -109,9 +123,12 @@ class JsonDocStatusStorage(DocStatusStorage):
109
  async with self._storage_lock:
110
  for doc_id in doc_ids:
111
  self._data.pop(doc_id, None)
 
112
  await self.index_done_callback()
113
 
114
  async def drop(self) -> None:
115
  """Drop the storage"""
116
  async with self._storage_lock:
117
  self._data.clear()
 
 
 
15
  from .shared_storage import (
16
  get_namespace_data,
17
  get_storage_lock,
18
+ get_data_init_lock,
19
+ get_update_flag,
20
+ set_all_update_flags,
21
+ clear_all_update_flags,
22
  try_initialize_namespace,
23
  )
24
 
 
31
  def __post_init__(self):
32
  working_dir = self.global_config["working_dir"]
33
  self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
 
34
  self._data = None
35
+ self._storage_lock = None
36
+ self.storage_updated = None
37
 
38
  async def initialize(self):
39
  """Initialize storage data"""
40
+ self._storage_lock = get_storage_lock()
41
+ self.storage_updated = await get_update_flag(self.namespace)
42
+ async with get_data_init_lock():
43
+ # check need_init must before get_namespace_data
44
+ need_init = await try_initialize_namespace(self.namespace)
45
+ self._data = await get_namespace_data(self.namespace)
46
+ if need_init:
47
+ loaded_data = load_json(self._file_name) or {}
48
+ async with self._storage_lock:
49
+ self._data.update(loaded_data)
50
+ logger.info(
51
+ f"Process {os.getpid()} doc status load {self.namespace} with {len(loaded_data)} records"
52
+ )
53
 
54
  async def filter_keys(self, keys: set[str]) -> set[str]:
55
  """Return keys that should be processed (not in storage or not successfully processed)"""
 
95
 
96
  async def index_done_callback(self) -> None:
97
  async with self._storage_lock:
98
+ if self.storage_updated.value:
99
+ data_dict = (
100
+ dict(self._data) if hasattr(self._data, "_getvalue") else self._data
101
+ )
102
+ logger.info(
103
+ f"Process {os.getpid()} doc status writting {len(data_dict)} records to {self.namespace}"
104
+ )
105
+ write_json(data_dict, self._file_name)
106
+ await clear_all_update_flags(self.namespace)
107
 
108
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
 
109
  if not data:
110
  return
111
+ logger.info(f"Inserting {len(data)} records to {self.namespace}")
112
  async with self._storage_lock:
113
  self._data.update(data)
114
+ await set_all_update_flags(self.namespace)
115
+
116
  await self.index_done_callback()
117
 
118
  async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
 
123
  async with self._storage_lock:
124
  for doc_id in doc_ids:
125
  self._data.pop(doc_id, None)
126
+ await set_all_update_flags(self.namespace)
127
  await self.index_done_callback()
128
 
129
  async def drop(self) -> None:
130
  """Drop the storage"""
131
  async with self._storage_lock:
132
  self._data.clear()
133
+ await set_all_update_flags(self.namespace)
134
+ await self.index_done_callback()
lightrag/kg/json_kv_impl.py CHANGED
@@ -13,6 +13,10 @@ from lightrag.utils import (
13
  from .shared_storage import (
14
  get_namespace_data,
15
  get_storage_lock,
 
 
 
 
16
  try_initialize_namespace,
17
  )
18
 
@@ -23,26 +27,63 @@ class JsonKVStorage(BaseKVStorage):
23
  def __post_init__(self):
24
  working_dir = self.global_config["working_dir"]
25
  self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
26
- self._storage_lock = get_storage_lock()
27
  self._data = None
 
 
28
 
29
  async def initialize(self):
30
  """Initialize storage data"""
31
- # check need_init must before get_namespace_data
32
- need_init = try_initialize_namespace(self.namespace)
33
- self._data = await get_namespace_data(self.namespace)
34
- if need_init:
35
- loaded_data = load_json(self._file_name) or {}
36
- async with self._storage_lock:
37
- self._data.update(loaded_data)
38
- logger.info(f"Load KV {self.namespace} with {len(loaded_data)} data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  async def index_done_callback(self) -> None:
41
  async with self._storage_lock:
42
- data_dict = (
43
- dict(self._data) if hasattr(self._data, "_getvalue") else self._data
44
- )
45
- write_json(data_dict, self._file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  async def get_all(self) -> dict[str, Any]:
48
  """Get all data from storage
@@ -73,15 +114,16 @@ class JsonKVStorage(BaseKVStorage):
73
  return set(keys) - set(self._data.keys())
74
 
75
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
76
- logger.info(f"Inserting {len(data)} to {self.namespace}")
77
  if not data:
78
  return
 
79
  async with self._storage_lock:
80
- left_data = {k: v for k, v in data.items() if k not in self._data}
81
- self._data.update(left_data)
82
 
83
  async def delete(self, ids: list[str]) -> None:
84
  async with self._storage_lock:
85
  for doc_id in ids:
86
  self._data.pop(doc_id, None)
 
87
  await self.index_done_callback()
 
13
  from .shared_storage import (
14
  get_namespace_data,
15
  get_storage_lock,
16
+ get_data_init_lock,
17
+ get_update_flag,
18
+ set_all_update_flags,
19
+ clear_all_update_flags,
20
  try_initialize_namespace,
21
  )
22
 
 
27
  def __post_init__(self):
28
  working_dir = self.global_config["working_dir"]
29
  self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
 
30
  self._data = None
31
+ self._storage_lock = None
32
+ self.storage_updated = None
33
 
34
  async def initialize(self):
35
  """Initialize storage data"""
36
+ self._storage_lock = get_storage_lock()
37
+ self.storage_updated = await get_update_flag(self.namespace)
38
+ async with get_data_init_lock():
39
+ # check need_init must before get_namespace_data
40
+ need_init = await try_initialize_namespace(self.namespace)
41
+ self._data = await get_namespace_data(self.namespace)
42
+ if need_init:
43
+ loaded_data = load_json(self._file_name) or {}
44
+ async with self._storage_lock:
45
+ self._data.update(loaded_data)
46
+
47
+ # Calculate data count based on namespace
48
+ if self.namespace.endswith("cache"):
49
+ # For cache namespaces, sum the cache entries across all cache types
50
+ data_count = sum(
51
+ len(first_level_dict)
52
+ for first_level_dict in loaded_data.values()
53
+ if isinstance(first_level_dict, dict)
54
+ )
55
+ else:
56
+ # For non-cache namespaces, use the original count method
57
+ data_count = len(loaded_data)
58
+
59
+ logger.info(
60
+ f"Process {os.getpid()} KV load {self.namespace} with {data_count} records"
61
+ )
62
 
63
  async def index_done_callback(self) -> None:
64
  async with self._storage_lock:
65
+ if self.storage_updated.value:
66
+ data_dict = (
67
+ dict(self._data) if hasattr(self._data, "_getvalue") else self._data
68
+ )
69
+
70
+ # Calculate data count based on namespace
71
+ if self.namespace.endswith("cache"):
72
+ # # For cache namespaces, sum the cache entries across all cache types
73
+ data_count = sum(
74
+ len(first_level_dict)
75
+ for first_level_dict in data_dict.values()
76
+ if isinstance(first_level_dict, dict)
77
+ )
78
+ else:
79
+ # For non-cache namespaces, use the original count method
80
+ data_count = len(data_dict)
81
+
82
+ logger.info(
83
+ f"Process {os.getpid()} KV writting {data_count} records to {self.namespace}"
84
+ )
85
+ write_json(data_dict, self._file_name)
86
+ await clear_all_update_flags(self.namespace)
87
 
88
  async def get_all(self) -> dict[str, Any]:
89
  """Get all data from storage
 
114
  return set(keys) - set(self._data.keys())
115
 
116
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
 
117
  if not data:
118
  return
119
+ logger.info(f"Inserting {len(data)} records to {self.namespace}")
120
  async with self._storage_lock:
121
+ self._data.update(data)
122
+ await set_all_update_flags(self.namespace)
123
 
124
  async def delete(self, ids: list[str]) -> None:
125
  async with self._storage_lock:
126
  for doc_id in ids:
127
  self._data.pop(doc_id, None)
128
+ await set_all_update_flags(self.namespace)
129
  await self.index_done_callback()
lightrag/kg/milvus_impl.py CHANGED
@@ -233,3 +233,57 @@ class MilvusVectorDBStorage(BaseVectorStorage):
233
  except Exception as e:
234
  logger.error(f"Error searching for records with prefix '{prefix}': {e}")
235
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  except Exception as e:
234
  logger.error(f"Error searching for records with prefix '{prefix}': {e}")
235
  return []
236
+
237
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
238
+ """Get vector data by its ID
239
+
240
+ Args:
241
+ id: The unique identifier of the vector
242
+
243
+ Returns:
244
+ The vector data if found, or None if not found
245
+ """
246
+ try:
247
+ # Query Milvus for a specific ID
248
+ result = self._client.query(
249
+ collection_name=self.namespace,
250
+ filter=f'id == "{id}"',
251
+ output_fields=list(self.meta_fields) + ["id"],
252
+ )
253
+
254
+ if not result or len(result) == 0:
255
+ return None
256
+
257
+ return result[0]
258
+ except Exception as e:
259
+ logger.error(f"Error retrieving vector data for ID {id}: {e}")
260
+ return None
261
+
262
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
263
+ """Get multiple vector data by their IDs
264
+
265
+ Args:
266
+ ids: List of unique identifiers
267
+
268
+ Returns:
269
+ List of vector data objects that were found
270
+ """
271
+ if not ids:
272
+ return []
273
+
274
+ try:
275
+ # Prepare the ID filter expression
276
+ id_list = '", "'.join(ids)
277
+ filter_expr = f'id in ["{id_list}"]'
278
+
279
+ # Query Milvus with the filter
280
+ result = self._client.query(
281
+ collection_name=self.namespace,
282
+ filter=filter_expr,
283
+ output_fields=list(self.meta_fields) + ["id"],
284
+ )
285
+
286
+ return result or []
287
+ except Exception as e:
288
+ logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
289
+ return []
lightrag/kg/mongo_impl.py CHANGED
@@ -1073,6 +1073,59 @@ class MongoVectorDBStorage(BaseVectorStorage):
1073
  logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
1074
  return []
1075
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
 
1077
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
1078
  collection_names = await db.list_collection_names()
 
1073
  logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
1074
  return []
1075
 
1076
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
1077
+ """Get vector data by its ID
1078
+
1079
+ Args:
1080
+ id: The unique identifier of the vector
1081
+
1082
+ Returns:
1083
+ The vector data if found, or None if not found
1084
+ """
1085
+ try:
1086
+ # Search for the specific ID in MongoDB
1087
+ result = await self._data.find_one({"_id": id})
1088
+ if result:
1089
+ # Format the result to include id field expected by API
1090
+ result_dict = dict(result)
1091
+ if "_id" in result_dict and "id" not in result_dict:
1092
+ result_dict["id"] = result_dict["_id"]
1093
+ return result_dict
1094
+ return None
1095
+ except Exception as e:
1096
+ logger.error(f"Error retrieving vector data for ID {id}: {e}")
1097
+ return None
1098
+
1099
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
1100
+ """Get multiple vector data by their IDs
1101
+
1102
+ Args:
1103
+ ids: List of unique identifiers
1104
+
1105
+ Returns:
1106
+ List of vector data objects that were found
1107
+ """
1108
+ if not ids:
1109
+ return []
1110
+
1111
+ try:
1112
+ # Query MongoDB for multiple IDs
1113
+ cursor = self._data.find({"_id": {"$in": ids}})
1114
+ results = await cursor.to_list(length=None)
1115
+
1116
+ # Format results to include id field expected by API
1117
+ formatted_results = []
1118
+ for result in results:
1119
+ result_dict = dict(result)
1120
+ if "_id" in result_dict and "id" not in result_dict:
1121
+ result_dict["id"] = result_dict["_id"]
1122
+ formatted_results.append(result_dict)
1123
+
1124
+ return formatted_results
1125
+ except Exception as e:
1126
+ logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
1127
+ return []
1128
+
1129
 
1130
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
1131
  collection_names = await db.list_collection_names()
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -258,3 +258,33 @@ class NanoVectorDBStorage(BaseVectorStorage):
258
 
259
  logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
260
  return matching_records
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
260
  return matching_records
261
+
262
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
263
+ """Get vector data by its ID
264
+
265
+ Args:
266
+ id: The unique identifier of the vector
267
+
268
+ Returns:
269
+ The vector data if found, or None if not found
270
+ """
271
+ client = await self._get_client()
272
+ result = client.get([id])
273
+ if result:
274
+ return result[0]
275
+ return None
276
+
277
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
278
+ """Get multiple vector data by their IDs
279
+
280
+ Args:
281
+ ids: List of unique identifiers
282
+
283
+ Returns:
284
+ List of vector data objects that were found
285
+ """
286
+ if not ids:
287
+ return []
288
+
289
+ client = await self._get_client()
290
+ return client.get(ids)
lightrag/kg/neo4j_impl.py CHANGED
@@ -3,7 +3,7 @@ import inspect
3
  import os
4
  import re
5
  from dataclasses import dataclass
6
- from typing import Any, List, Dict, final
7
  import numpy as np
8
  import configparser
9
 
@@ -15,6 +15,7 @@ from tenacity import (
15
  retry_if_exception_type,
16
  )
17
 
 
18
  from ..utils import logger
19
  from ..base import BaseGraphStorage
20
  from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
@@ -37,6 +38,9 @@ config.read("config.ini", "utf-8")
37
  # Get maximum number of graph nodes from environment variable, default is 1000
38
  MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
39
 
 
 
 
40
 
41
  @final
42
  @dataclass
@@ -60,19 +64,25 @@ class Neo4JStorage(BaseGraphStorage):
60
  MAX_CONNECTION_POOL_SIZE = int(
61
  os.environ.get(
62
  "NEO4J_MAX_CONNECTION_POOL_SIZE",
63
- config.get("neo4j", "connection_pool_size", fallback=800),
64
  )
65
  )
66
  CONNECTION_TIMEOUT = float(
67
  os.environ.get(
68
  "NEO4J_CONNECTION_TIMEOUT",
69
- config.get("neo4j", "connection_timeout", fallback=60.0),
70
  ),
71
  )
72
  CONNECTION_ACQUISITION_TIMEOUT = float(
73
  os.environ.get(
74
  "NEO4J_CONNECTION_ACQUISITION_TIMEOUT",
75
- config.get("neo4j", "connection_acquisition_timeout", fallback=60.0),
 
 
 
 
 
 
76
  ),
77
  )
78
  DATABASE = os.environ.get(
@@ -85,6 +95,7 @@ class Neo4JStorage(BaseGraphStorage):
85
  max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
86
  connection_timeout=CONNECTION_TIMEOUT,
87
  connection_acquisition_timeout=CONNECTION_ACQUISITION_TIMEOUT,
 
88
  )
89
 
90
  # Try to connect to the database
@@ -152,65 +163,84 @@ class Neo4JStorage(BaseGraphStorage):
152
  }
153
 
154
  async def close(self):
 
155
  if self._driver:
156
  await self._driver.close()
157
  self._driver = None
158
 
159
  async def __aexit__(self, exc_type, exc, tb):
160
- if self._driver:
161
- await self._driver.close()
162
 
163
  async def index_done_callback(self) -> None:
164
  # Noe4J handles persistence automatically
165
  pass
166
 
167
- async def _label_exists(self, label: str) -> bool:
168
- """Check if a label exists in the Neo4j database."""
169
- query = "CALL db.labels() YIELD label RETURN label"
170
- try:
171
- async with self._driver.session(database=self._DATABASE) as session:
172
- result = await session.run(query)
173
- labels = [record["label"] for record in await result.data()]
174
- return label in labels
175
- except Exception as e:
176
- logger.error(f"Error checking label existence: {e}")
177
- return False
178
 
179
- async def _ensure_label(self, label: str) -> str:
180
- """Ensure a label exists by validating it."""
181
- clean_label = label.strip('"')
182
- if not await self._label_exists(clean_label):
183
- logger.warning(f"Label '{clean_label}' does not exist in Neo4j")
184
- return clean_label
185
 
186
- async def has_node(self, node_id: str) -> bool:
187
- entity_name_label = await self._ensure_label(node_id)
188
- async with self._driver.session(database=self._DATABASE) as session:
189
- query = (
190
- f"MATCH (n:`{entity_name_label}`) RETURN count(n) > 0 AS node_exists"
191
- )
192
- result = await session.run(query)
193
- single_result = await result.single()
194
- logger.debug(
195
- f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result['node_exists']}"
196
- )
197
- return single_result["node_exists"]
 
 
 
 
 
198
 
199
  async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
200
- entity_name_label_source = source_node_id.strip('"')
201
- entity_name_label_target = target_node_id.strip('"')
202
 
203
- async with self._driver.session(database=self._DATABASE) as session:
204
- query = (
205
- f"MATCH (a:`{entity_name_label_source}`)-[r]-(b:`{entity_name_label_target}`) "
206
- "RETURN COUNT(r) > 0 AS edgeExists"
207
- )
208
- result = await session.run(query)
209
- single_result = await result.single()
210
- logger.debug(
211
- f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result['edgeExists']}"
212
- )
213
- return single_result["edgeExists"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  async def get_node(self, node_id: str) -> dict[str, str] | None:
216
  """Get node by its label identifier.
@@ -221,161 +251,258 @@ class Neo4JStorage(BaseGraphStorage):
221
  Returns:
222
  dict: Node properties if found
223
  None: If node not found
 
 
 
 
224
  """
225
- async with self._driver.session(database=self._DATABASE) as session:
226
- entity_name_label = await self._ensure_label(node_id)
227
- query = f"MATCH (n:`{entity_name_label}`) RETURN n"
228
- result = await session.run(query)
229
- record = await result.single()
230
- if record:
231
- node = record["n"]
232
- node_dict = dict(node)
233
- logger.debug(
234
- f"{inspect.currentframe().f_code.co_name}: query: {query}, result: {node_dict}"
235
- )
236
- return node_dict
237
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  async def node_degree(self, node_id: str) -> int:
240
- entity_name_label = node_id.strip('"')
 
 
241
 
242
- async with self._driver.session(database=self._DATABASE) as session:
243
- query = f"""
244
- MATCH (n:`{entity_name_label}`)
245
- RETURN COUNT{{ (n)--() }} AS totalEdgeCount
246
- """
247
- result = await session.run(query)
248
- record = await result.single()
249
- if record:
250
- edge_count = record["totalEdgeCount"]
251
- logger.debug(
252
- f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{edge_count}"
253
- )
254
- return edge_count
255
- else:
256
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  async def edge_degree(self, src_id: str, tgt_id: str) -> int:
259
- entity_name_label_source = src_id.strip('"')
260
- entity_name_label_target = tgt_id.strip('"')
261
- src_degree = await self.node_degree(entity_name_label_source)
262
- trg_degree = await self.node_degree(entity_name_label_target)
 
 
 
 
 
 
 
263
 
264
  # Convert None to 0 for addition
265
  src_degree = 0 if src_degree is None else src_degree
266
  trg_degree = 0 if trg_degree is None else trg_degree
267
 
268
  degrees = int(src_degree) + int(trg_degree)
269
- logger.debug(
270
- f"{inspect.currentframe().f_code.co_name}:query:src_Degree+trg_degree:result:{degrees}"
271
- )
272
  return degrees
273
 
274
  async def get_edge(
275
  self, source_node_id: str, target_node_id: str
276
  ) -> dict[str, str] | None:
277
- try:
278
- entity_name_label_source = source_node_id.strip('"')
279
- entity_name_label_target = target_node_id.strip('"')
280
 
281
- async with self._driver.session(database=self._DATABASE) as session:
282
- query = f"""
283
- MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  RETURN properties(r) as edge_properties
285
- LIMIT 1
286
  """
 
 
 
 
 
 
 
287
 
288
- result = await session.run(query)
289
- record = await result.single()
290
- if record:
291
- try:
292
- result = dict(record["edge_properties"])
293
- logger.info(f"Result: {result}")
294
- # Ensure required keys exist with defaults
295
- required_keys = {
296
- "weight": 0.0,
297
- "source_id": None,
298
- "description": None,
299
- "keywords": None,
300
- }
301
- for key, default_value in required_keys.items():
302
- if key not in result:
303
- result[key] = default_value
304
- logger.warning(
305
- f"Edge between {entity_name_label_source} and {entity_name_label_target} "
306
- f"missing {key}, using default: {default_value}"
307
- )
308
-
309
- logger.debug(
310
- f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{result}"
311
- )
312
- return result
313
- except (KeyError, TypeError, ValueError) as e:
314
- logger.error(
315
- f"Error processing edge properties between {entity_name_label_source} "
316
- f"and {entity_name_label_target}: {str(e)}"
317
  )
318
- # Return default edge properties on error
319
- return {
320
- "weight": 0.0,
321
- "description": None,
322
- "keywords": None,
323
- "source_id": None,
324
- }
325
-
326
- logger.debug(
327
- f"{inspect.currentframe().f_code.co_name}: No edge found between {entity_name_label_source} and {entity_name_label_target}"
328
- )
329
- # Return default edge properties when no edge found
330
- return {
331
- "weight": 0.0,
332
- "description": None,
333
- "keywords": None,
334
- "source_id": None,
335
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  except Exception as e:
338
  logger.error(
339
  f"Error in get_edge between {source_node_id} and {target_node_id}: {str(e)}"
340
  )
341
- # Return default edge properties on error
342
- return {
343
- "weight": 0.0,
344
- "description": None,
345
- "keywords": None,
346
- "source_id": None,
347
- }
348
 
349
  async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
350
- node_label = source_node_id.strip('"')
351
 
 
 
 
 
 
 
 
 
 
 
352
  """
353
- Retrieves all edges (relationships) for a particular node identified by its label.
354
- :return: List of dictionaries containing edge information
355
- """
356
- query = f"""MATCH (n:`{node_label}`)
357
- OPTIONAL MATCH (n)-[r]-(connected)
358
- RETURN n, r, connected"""
359
- async with self._driver.session(database=self._DATABASE) as session:
360
- results = await session.run(query)
361
- edges = []
362
- async for record in results:
363
- source_node = record["n"]
364
- connected_node = record["connected"]
365
-
366
- source_label = (
367
- list(source_node.labels)[0] if source_node.labels else None
368
- )
369
- target_label = (
370
- list(connected_node.labels)[0]
371
- if connected_node and connected_node.labels
372
- else None
373
- )
 
 
 
 
 
 
 
 
 
374
 
375
- if source_label and target_label:
376
- edges.append((source_label, target_label))
377
 
378
- return edges
 
 
 
 
 
 
 
 
 
 
379
 
380
  @retry(
381
  stop=stop_after_attempt(3),
@@ -397,26 +524,47 @@ class Neo4JStorage(BaseGraphStorage):
397
  node_id: The unique identifier for the node (used as label)
398
  node_data: Dictionary of node properties
399
  """
400
- label = await self._ensure_label(node_id)
401
  properties = node_data
402
-
403
- async def _do_upsert(tx: AsyncManagedTransaction):
404
- query = f"""
405
- MERGE (n:`{label}`)
406
- SET n += $properties
407
- """
408
- await tx.run(query, properties=properties)
409
- logger.debug(
410
- f"Upserted node with label '{label}' and properties: {properties}"
411
- )
412
 
413
  try:
414
  async with self._driver.session(database=self._DATABASE) as session:
415
- await session.execute_write(_do_upsert)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  except Exception as e:
417
  logger.error(f"Error during upsert: {str(e)}")
418
  raise
419
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  @retry(
421
  stop=stop_after_attempt(3),
422
  wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -434,34 +582,47 @@ class Neo4JStorage(BaseGraphStorage):
434
  ) -> None:
435
  """
436
  Upsert an edge and its properties between two nodes identified by their labels.
 
 
437
 
438
  Args:
439
  source_node_id (str): Label of the source node (used as identifier)
440
  target_node_id (str): Label of the target node (used as identifier)
441
  edge_data (dict): Dictionary of properties to set on the edge
442
- """
443
- source_label = await self._ensure_label(source_node_id)
444
- target_label = await self._ensure_label(target_node_id)
445
- edge_properties = edge_data
446
-
447
- async def _do_upsert_edge(tx: AsyncManagedTransaction):
448
- query = f"""
449
- MATCH (source:`{source_label}`)
450
- WITH source
451
- MATCH (target:`{target_label}`)
452
- MERGE (source)-[r:DIRECTED]->(target)
453
- SET r += $properties
454
- RETURN r
455
- """
456
- result = await tx.run(query, properties=edge_properties)
457
- record = await result.single()
458
- logger.debug(
459
- f"Upserted edge from '{source_label}' to '{target_label}' with properties: {edge_properties}, result: {record['r'] if record else None}"
460
- )
461
 
 
 
 
462
  try:
 
463
  async with self._driver.session(database=self._DATABASE) as session:
464
- await session.execute_write(_do_upsert_edge)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  except Exception as e:
466
  logger.error(f"Error during edge upsert: {str(e)}")
467
  raise
@@ -470,199 +631,293 @@ class Neo4JStorage(BaseGraphStorage):
470
  print("Implemented but never called.")
471
 
472
  async def get_knowledge_graph(
473
- self, node_label: str, max_depth: int = 5
 
 
 
 
474
  ) -> KnowledgeGraph:
475
  """
476
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
477
  Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
478
  When reducing the number of nodes, the prioritization criteria are as follows:
479
- 1. Label matching nodes take precedence (nodes containing the specified label string)
480
- 2. Followed by nodes directly connected to the matching nodes
481
- 3. Finally, the degree of the nodes
 
482
 
483
  Args:
484
- node_label (str): String to match in node labels (will match any node containing this string in its label)
485
- max_depth (int, optional): Maximum depth of the graph. Defaults to 5.
 
 
486
  Returns:
487
  KnowledgeGraph: Complete connected subgraph for specified node
488
  """
489
- label = node_label.strip('"')
490
- # Escape single quotes to prevent injection attacks
491
- escaped_label = label.replace("'", "\\'")
492
  result = KnowledgeGraph()
493
  seen_nodes = set()
494
  seen_edges = set()
495
 
496
- async with self._driver.session(database=self._DATABASE) as session:
 
 
497
  try:
498
- if label == "*":
499
  main_query = """
500
  MATCH (n)
501
  OPTIONAL MATCH (n)-[r]-()
502
  WITH n, count(r) AS degree
 
503
  ORDER BY degree DESC
504
  LIMIT $max_nodes
505
- WITH collect(n) AS nodes
506
- MATCH (a)-[r]->(b)
507
- WHERE a IN nodes AND b IN nodes
508
- RETURN nodes, collect(DISTINCT r) AS relationships
 
 
 
509
  """
510
  result_set = await session.run(
511
- main_query, {"max_nodes": MAX_GRAPH_NODES}
 
512
  )
513
 
514
  else:
515
- validate_query = f"""
516
- MATCH (n)
517
- WHERE any(label IN labels(n) WHERE label CONTAINS '{escaped_label}')
518
- RETURN n LIMIT 1
519
- """
520
- validate_result = await session.run(validate_query)
521
- if not await validate_result.single():
522
- logger.warning(
523
- f"No nodes containing '{label}' in their labels found!"
524
- )
525
- return result
526
-
527
  # Main query uses partial matching
528
- main_query = f"""
529
  MATCH (start)
530
- WHERE any(label IN labels(start) WHERE label CONTAINS '{escaped_label}')
 
 
 
 
531
  WITH start
532
- CALL apoc.path.subgraphAll(start, {{
533
- relationshipFilter: '>',
534
  minLevel: 0,
535
- maxLevel: {max_depth},
536
  bfs: true
537
- }})
538
  YIELD nodes, relationships
539
  WITH start, nodes, relationships
540
  UNWIND nodes AS node
541
  OPTIONAL MATCH (node)-[r]-()
542
- WITH node, count(r) AS degree, start, nodes, relationships,
543
- CASE
544
- WHEN id(node) = id(start) THEN 2
545
- WHEN EXISTS((start)-->(node)) OR EXISTS((node)-->(start)) THEN 1
546
- ELSE 0
547
- END AS priority
548
- ORDER BY priority DESC, degree DESC
 
 
549
  LIMIT $max_nodes
550
- WITH collect(node) AS filtered_nodes, nodes, relationships
551
- RETURN filtered_nodes AS nodes,
552
- [rel IN relationships WHERE startNode(rel) IN filtered_nodes AND endNode(rel) IN filtered_nodes] AS relationships
 
 
 
 
553
  """
554
  result_set = await session.run(
555
- main_query, {"max_nodes": MAX_GRAPH_NODES}
 
 
 
 
 
 
 
556
  )
557
 
558
- record = await result_set.single()
559
-
560
- if record:
561
- # Handle nodes (compatible with multi-label cases)
562
- for node in record["nodes"]:
563
- # Use node ID + label combination as unique identifier
564
- node_id = node.id
565
- if node_id not in seen_nodes:
566
- result.nodes.append(
567
- KnowledgeGraphNode(
568
- id=f"{node_id}",
569
- labels=list(node.labels),
570
- properties=dict(node),
 
 
 
 
 
 
571
  )
572
- )
573
- seen_nodes.add(node_id)
574
-
575
- # Handle relationships (including direction information)
576
- for rel in record["relationships"]:
577
- edge_id = rel.id
578
- if edge_id not in seen_edges:
579
- start = rel.start_node
580
- end = rel.end_node
581
- result.edges.append(
582
- KnowledgeGraphEdge(
583
- id=f"{edge_id}",
584
- type=rel.type,
585
- source=f"{start.id}",
586
- target=f"{end.id}",
587
- properties=dict(rel),
588
  )
589
- )
590
- seen_edges.add(edge_id)
591
 
592
- logger.info(
593
- f"Subgraph query successful | Node count: {len(result.nodes)} | Edge count: {len(result.edges)}"
594
- )
 
 
595
 
596
  except neo4jExceptions.ClientError as e:
597
- logger.error(f"APOC query failed: {str(e)}")
598
- return await self._robust_fallback(label, max_depth)
 
 
 
 
 
 
 
 
 
 
599
 
600
  return result
601
 
602
  async def _robust_fallback(
603
- self, label: str, max_depth: int
604
- ) -> Dict[str, List[Dict]]:
605
- """Enhanced fallback query solution"""
606
- result = {"nodes": [], "edges": []}
 
 
 
 
607
  visited_nodes = set()
608
  visited_edges = set()
609
 
610
- async def traverse(current_label: str, current_depth: int):
 
 
 
 
 
611
  if current_depth > max_depth:
 
612
  return
613
-
614
- # Get current node details
615
- node = await self.get_node(current_label)
616
- if not node:
617
  return
618
 
619
- node_id = f"{current_label}"
620
- if node_id in visited_nodes:
621
  return
622
- visited_nodes.add(node_id)
623
-
624
- # Add node data (with complete labels)
625
- node_data = {k: v for k, v in node.items()}
626
- node_data["labels"] = [
627
- current_label
628
- ] # Assume get_node method returns label information
629
- result["nodes"].append(node_data)
630
-
631
- # Get all outgoing and incoming edges
632
- query = f"""
633
- MATCH (a)-[r]-(b)
634
- WHERE a:`{current_label}` OR b:`{current_label}`
635
- RETURN a, r, b,
636
- CASE WHEN startNode(r) = a THEN 'OUTGOING' ELSE 'INCOMING' END AS direction
637
- """
638
- async with self._driver.session(database=self._DATABASE) as session:
639
- results = await session.run(query)
640
- async for record in results:
641
- # Handle edges
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  rel = record["r"]
643
- edge_id = f"{rel.id}_{rel.type}"
644
  if edge_id not in visited_edges:
645
- edge_data = dict(rel)
646
- edge_data.update(
647
- {
648
- "source": list(record["a"].labels)[0],
649
- "target": list(record["b"].labels)[0],
650
- "type": rel.type,
651
- "direction": record["direction"],
652
- }
653
- )
654
- result["edges"].append(edge_data)
655
- visited_edges.add(edge_id)
656
-
657
- # Recursively traverse adjacent nodes
658
- next_label = (
659
- list(record["b"].labels)[0]
660
- if record["direction"] == "OUTGOING"
661
- else list(record["a"].labels)[0]
662
- )
663
- await traverse(next_label, current_depth + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664
 
665
- await traverse(label, 0)
666
  return result
667
 
668
  async def get_all_labels(self) -> list[str]:
@@ -671,23 +926,28 @@ class Neo4JStorage(BaseGraphStorage):
671
  Returns:
672
  ["Person", "Company", ...] # Alphabetically sorted label list
673
  """
674
- async with self._driver.session(database=self._DATABASE) as session:
 
 
675
  # Method 1: Direct metadata query (Available for Neo4j 4.3+)
676
  # query = "CALL db.labels() YIELD label RETURN label"
677
 
678
  # Method 2: Query compatible with older versions
679
  query = """
680
- MATCH (n)
681
- WITH DISTINCT labels(n) AS node_labels
682
- UNWIND node_labels AS label
683
- RETURN DISTINCT label
684
- ORDER BY label
685
  """
686
-
687
  result = await session.run(query)
688
  labels = []
689
- async for record in result:
690
- labels.append(record["label"])
 
 
 
 
 
691
  return labels
692
 
693
  @retry(
@@ -708,15 +968,15 @@ class Neo4JStorage(BaseGraphStorage):
708
  Args:
709
  node_id: The label of the node to delete
710
  """
711
- label = await self._ensure_label(node_id)
712
 
713
  async def _do_delete(tx: AsyncManagedTransaction):
714
- query = f"""
715
- MATCH (n:`{label}`)
716
  DETACH DELETE n
717
  """
718
- await tx.run(query)
719
- logger.debug(f"Deleted node with label '{label}'")
 
720
 
721
  try:
722
  async with self._driver.session(database=self._DATABASE) as session:
@@ -765,16 +1025,17 @@ class Neo4JStorage(BaseGraphStorage):
765
  edges: List of edges to be deleted, each edge is a (source, target) tuple
766
  """
767
  for source, target in edges:
768
- source_label = await self._ensure_label(source)
769
- target_label = await self._ensure_label(target)
770
 
771
  async def _do_delete_edge(tx: AsyncManagedTransaction):
772
- query = f"""
773
- MATCH (source:`{source_label}`)-[r]->(target:`{target_label}`)
774
  DELETE r
775
  """
776
- await tx.run(query)
777
- logger.debug(f"Deleted edge from '{source_label}' to '{target_label}'")
 
 
 
778
 
779
  try:
780
  async with self._driver.session(database=self._DATABASE) as session:
 
3
  import os
4
  import re
5
  from dataclasses import dataclass
6
+ from typing import Any, final, Optional
7
  import numpy as np
8
  import configparser
9
 
 
15
  retry_if_exception_type,
16
  )
17
 
18
+ import logging
19
  from ..utils import logger
20
  from ..base import BaseGraphStorage
21
  from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
 
38
  # Get maximum number of graph nodes from environment variable, default is 1000
39
  MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
40
 
41
+ # Set neo4j logger level to ERROR to suppress warning logs
42
+ logging.getLogger("neo4j").setLevel(logging.ERROR)
43
+
44
 
45
  @final
46
  @dataclass
 
64
  MAX_CONNECTION_POOL_SIZE = int(
65
  os.environ.get(
66
  "NEO4J_MAX_CONNECTION_POOL_SIZE",
67
+ config.get("neo4j", "connection_pool_size", fallback=50),
68
  )
69
  )
70
  CONNECTION_TIMEOUT = float(
71
  os.environ.get(
72
  "NEO4J_CONNECTION_TIMEOUT",
73
+ config.get("neo4j", "connection_timeout", fallback=30.0),
74
  ),
75
  )
76
  CONNECTION_ACQUISITION_TIMEOUT = float(
77
  os.environ.get(
78
  "NEO4J_CONNECTION_ACQUISITION_TIMEOUT",
79
+ config.get("neo4j", "connection_acquisition_timeout", fallback=30.0),
80
+ ),
81
+ )
82
+ MAX_TRANSACTION_RETRY_TIME = float(
83
+ os.environ.get(
84
+ "NEO4J_MAX_TRANSACTION_RETRY_TIME",
85
+ config.get("neo4j", "max_transaction_retry_time", fallback=30.0),
86
  ),
87
  )
88
  DATABASE = os.environ.get(
 
95
  max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
96
  connection_timeout=CONNECTION_TIMEOUT,
97
  connection_acquisition_timeout=CONNECTION_ACQUISITION_TIMEOUT,
98
+ max_transaction_retry_time=MAX_TRANSACTION_RETRY_TIME,
99
  )
100
 
101
  # Try to connect to the database
 
163
  }
164
 
165
  async def close(self):
166
+ """Close the Neo4j driver and release all resources"""
167
  if self._driver:
168
  await self._driver.close()
169
  self._driver = None
170
 
171
  async def __aexit__(self, exc_type, exc, tb):
172
+ """Ensure driver is closed when context manager exits"""
173
+ await self.close()
174
 
175
  async def index_done_callback(self) -> None:
176
  # Noe4J handles persistence automatically
177
  pass
178
 
179
+ async def has_node(self, node_id: str) -> bool:
180
+ """
181
+ Check if a node with the given label exists in the database
182
+
183
+ Args:
184
+ node_id: Label of the node to check
 
 
 
 
 
185
 
186
+ Returns:
187
+ bool: True if node exists, False otherwise
 
 
 
 
188
 
189
+ Raises:
190
+ ValueError: If node_id is invalid
191
+ Exception: If there is an error executing the query
192
+ """
193
+ async with self._driver.session(
194
+ database=self._DATABASE, default_access_mode="READ"
195
+ ) as session:
196
+ try:
197
+ query = "MATCH (n:base {entity_id: $entity_id}) RETURN count(n) > 0 AS node_exists"
198
+ result = await session.run(query, entity_id=node_id)
199
+ single_result = await result.single()
200
+ await result.consume() # Ensure result is fully consumed
201
+ return single_result["node_exists"]
202
+ except Exception as e:
203
+ logger.error(f"Error checking node existence for {node_id}: {str(e)}")
204
+ await result.consume() # Ensure results are consumed even on error
205
+ raise
206
 
207
  async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
208
+ """
209
+ Check if an edge exists between two nodes
210
 
211
+ Args:
212
+ source_node_id: Label of the source node
213
+ target_node_id: Label of the target node
214
+
215
+ Returns:
216
+ bool: True if edge exists, False otherwise
217
+
218
+ Raises:
219
+ ValueError: If either node_id is invalid
220
+ Exception: If there is an error executing the query
221
+ """
222
+ async with self._driver.session(
223
+ database=self._DATABASE, default_access_mode="READ"
224
+ ) as session:
225
+ try:
226
+ query = (
227
+ "MATCH (a:base {entity_id: $source_entity_id})-[r]-(b:base {entity_id: $target_entity_id}) "
228
+ "RETURN COUNT(r) > 0 AS edgeExists"
229
+ )
230
+ result = await session.run(
231
+ query,
232
+ source_entity_id=source_node_id,
233
+ target_entity_id=target_node_id,
234
+ )
235
+ single_result = await result.single()
236
+ await result.consume() # Ensure result is fully consumed
237
+ return single_result["edgeExists"]
238
+ except Exception as e:
239
+ logger.error(
240
+ f"Error checking edge existence between {source_node_id} and {target_node_id}: {str(e)}"
241
+ )
242
+ await result.consume() # Ensure results are consumed even on error
243
+ raise
244
 
245
  async def get_node(self, node_id: str) -> dict[str, str] | None:
246
  """Get node by its label identifier.
 
251
  Returns:
252
  dict: Node properties if found
253
  None: If node not found
254
+
255
+ Raises:
256
+ ValueError: If node_id is invalid
257
+ Exception: If there is an error executing the query
258
  """
259
+ async with self._driver.session(
260
+ database=self._DATABASE, default_access_mode="READ"
261
+ ) as session:
262
+ try:
263
+ query = "MATCH (n:base {entity_id: $entity_id}) RETURN n"
264
+ result = await session.run(query, entity_id=node_id)
265
+ try:
266
+ records = await result.fetch(
267
+ 2
268
+ ) # Get 2 records for duplication check
269
+
270
+ if len(records) > 1:
271
+ logger.warning(
272
+ f"Multiple nodes found with label '{node_id}'. Using first node."
273
+ )
274
+ if records:
275
+ node = records[0]["n"]
276
+ node_dict = dict(node)
277
+ # Remove base label from labels list if it exists
278
+ if "labels" in node_dict:
279
+ node_dict["labels"] = [
280
+ label
281
+ for label in node_dict["labels"]
282
+ if label != "base"
283
+ ]
284
+ logger.debug(f"Neo4j query node {query} return: {node_dict}")
285
+ return node_dict
286
+ return None
287
+ finally:
288
+ await result.consume() # Ensure result is fully consumed
289
+ except Exception as e:
290
+ logger.error(f"Error getting node for {node_id}: {str(e)}")
291
+ raise
292
 
293
  async def node_degree(self, node_id: str) -> int:
294
+ """Get the degree (number of relationships) of a node with the given label.
295
+ If multiple nodes have the same label, returns the degree of the first node.
296
+ If no node is found, returns 0.
297
 
298
+ Args:
299
+ node_id: The label of the node
300
+
301
+ Returns:
302
+ int: The number of relationships the node has, or 0 if no node found
303
+
304
+ Raises:
305
+ ValueError: If node_id is invalid
306
+ Exception: If there is an error executing the query
307
+ """
308
+ async with self._driver.session(
309
+ database=self._DATABASE, default_access_mode="READ"
310
+ ) as session:
311
+ try:
312
+ query = """
313
+ MATCH (n:base {entity_id: $entity_id})
314
+ OPTIONAL MATCH (n)-[r]-()
315
+ RETURN COUNT(r) AS degree
316
+ """
317
+ result = await session.run(query, entity_id=node_id)
318
+ try:
319
+ record = await result.single()
320
+
321
+ if not record:
322
+ logger.warning(f"No node found with label '{node_id}'")
323
+ return 0
324
+
325
+ degree = record["degree"]
326
+ logger.debug(
327
+ "Neo4j query node degree for {node_id} return: {degree}"
328
+ )
329
+ return degree
330
+ finally:
331
+ await result.consume() # Ensure result is fully consumed
332
+ except Exception as e:
333
+ logger.error(f"Error getting node degree for {node_id}: {str(e)}")
334
+ raise
335
 
336
  async def edge_degree(self, src_id: str, tgt_id: str) -> int:
337
+ """Get the total degree (sum of relationships) of two nodes.
338
+
339
+ Args:
340
+ src_id: Label of the source node
341
+ tgt_id: Label of the target node
342
+
343
+ Returns:
344
+ int: Sum of the degrees of both nodes
345
+ """
346
+ src_degree = await self.node_degree(src_id)
347
+ trg_degree = await self.node_degree(tgt_id)
348
 
349
  # Convert None to 0 for addition
350
  src_degree = 0 if src_degree is None else src_degree
351
  trg_degree = 0 if trg_degree is None else trg_degree
352
 
353
  degrees = int(src_degree) + int(trg_degree)
 
 
 
354
  return degrees
355
 
356
  async def get_edge(
357
  self, source_node_id: str, target_node_id: str
358
  ) -> dict[str, str] | None:
359
+ """Get edge properties between two nodes.
 
 
360
 
361
+ Args:
362
+ source_node_id: Label of the source node
363
+ target_node_id: Label of the target node
364
+
365
+ Returns:
366
+ dict: Edge properties if found, default properties if not found or on error
367
+
368
+ Raises:
369
+ ValueError: If either node_id is invalid
370
+ Exception: If there is an error executing the query
371
+ """
372
+ try:
373
+ async with self._driver.session(
374
+ database=self._DATABASE, default_access_mode="READ"
375
+ ) as session:
376
+ query = """
377
+ MATCH (start:base {entity_id: $source_entity_id})-[r]-(end:base {entity_id: $target_entity_id})
378
  RETURN properties(r) as edge_properties
 
379
  """
380
+ result = await session.run(
381
+ query,
382
+ source_entity_id=source_node_id,
383
+ target_entity_id=target_node_id,
384
+ )
385
+ try:
386
+ records = await result.fetch(2)
387
 
388
+ if len(records) > 1:
389
+ logger.warning(
390
+ f"Multiple edges found between '{source_node_id}' and '{target_node_id}'. Using first edge."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  )
392
+ if records:
393
+ try:
394
+ edge_result = dict(records[0]["edge_properties"])
395
+ logger.debug(f"Result: {edge_result}")
396
+ # Ensure required keys exist with defaults
397
+ required_keys = {
398
+ "weight": 0.0,
399
+ "source_id": None,
400
+ "description": None,
401
+ "keywords": None,
402
+ }
403
+ for key, default_value in required_keys.items():
404
+ if key not in edge_result:
405
+ edge_result[key] = default_value
406
+ logger.warning(
407
+ f"Edge between {source_node_id} and {target_node_id} "
408
+ f"missing {key}, using default: {default_value}"
409
+ )
410
+
411
+ logger.debug(
412
+ f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{edge_result}"
413
+ )
414
+ return edge_result
415
+ except (KeyError, TypeError, ValueError) as e:
416
+ logger.error(
417
+ f"Error processing edge properties between {source_node_id} "
418
+ f"and {target_node_id}: {str(e)}"
419
+ )
420
+ # Return default edge properties on error
421
+ return {
422
+ "weight": 0.0,
423
+ "source_id": None,
424
+ "description": None,
425
+ "keywords": None,
426
+ }
427
+
428
+ logger.debug(
429
+ f"{inspect.currentframe().f_code.co_name}: No edge found between {source_node_id} and {target_node_id}"
430
+ )
431
+ # Return default edge properties when no edge found
432
+ return {
433
+ "weight": 0.0,
434
+ "source_id": None,
435
+ "description": None,
436
+ "keywords": None,
437
+ }
438
+ finally:
439
+ await result.consume() # Ensure result is fully consumed
440
 
441
  except Exception as e:
442
  logger.error(
443
  f"Error in get_edge between {source_node_id} and {target_node_id}: {str(e)}"
444
  )
445
+ raise
 
 
 
 
 
 
446
 
447
  async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
448
+ """Retrieves all edges (relationships) for a particular node identified by its label.
449
 
450
+ Args:
451
+ source_node_id: Label of the node to get edges for
452
+
453
+ Returns:
454
+ list[tuple[str, str]]: List of (source_label, target_label) tuples representing edges
455
+ None: If no edges found
456
+
457
+ Raises:
458
+ ValueError: If source_node_id is invalid
459
+ Exception: If there is an error executing the query
460
  """
461
+ try:
462
+ async with self._driver.session(
463
+ database=self._DATABASE, default_access_mode="READ"
464
+ ) as session:
465
+ try:
466
+ query = """MATCH (n:base {entity_id: $entity_id})
467
+ OPTIONAL MATCH (n)-[r]-(connected:base)
468
+ WHERE connected.entity_id IS NOT NULL
469
+ RETURN n, r, connected"""
470
+ results = await session.run(query, entity_id=source_node_id)
471
+
472
+ edges = []
473
+ async for record in results:
474
+ source_node = record["n"]
475
+ connected_node = record["connected"]
476
+
477
+ # Skip if either node is None
478
+ if not source_node or not connected_node:
479
+ continue
480
+
481
+ source_label = (
482
+ source_node.get("entity_id")
483
+ if source_node.get("entity_id")
484
+ else None
485
+ )
486
+ target_label = (
487
+ connected_node.get("entity_id")
488
+ if connected_node.get("entity_id")
489
+ else None
490
+ )
491
 
492
+ if source_label and target_label:
493
+ edges.append((source_label, target_label))
494
 
495
+ await results.consume() # Ensure results are consumed
496
+ return edges
497
+ except Exception as e:
498
+ logger.error(
499
+ f"Error getting edges for node {source_node_id}: {str(e)}"
500
+ )
501
+ await results.consume() # Ensure results are consumed even on error
502
+ raise
503
+ except Exception as e:
504
+ logger.error(f"Error in get_node_edges for {source_node_id}: {str(e)}")
505
+ raise
506
 
507
  @retry(
508
  stop=stop_after_attempt(3),
 
524
  node_id: The unique identifier for the node (used as label)
525
  node_data: Dictionary of node properties
526
  """
 
527
  properties = node_data
528
+ entity_type = properties["entity_type"]
529
+ entity_id = properties["entity_id"]
530
+ if "entity_id" not in properties:
531
+ raise ValueError("Neo4j: node properties must contain an 'entity_id' field")
 
 
 
 
 
 
532
 
533
  try:
534
  async with self._driver.session(database=self._DATABASE) as session:
535
+
536
+ async def execute_upsert(tx: AsyncManagedTransaction):
537
+ query = (
538
+ """
539
+ MERGE (n:base {entity_id: $properties.entity_id})
540
+ SET n += $properties
541
+ SET n:`%s`
542
+ """
543
+ % entity_type
544
+ )
545
+ result = await tx.run(query, properties=properties)
546
+ logger.debug(
547
+ f"Upserted node with entity_id '{entity_id}' and properties: {properties}"
548
+ )
549
+ await result.consume() # Ensure result is fully consumed
550
+
551
+ await session.execute_write(execute_upsert)
552
  except Exception as e:
553
  logger.error(f"Error during upsert: {str(e)}")
554
  raise
555
 
556
+ @retry(
557
+ stop=stop_after_attempt(3),
558
+ wait=wait_exponential(multiplier=1, min=4, max=10),
559
+ retry=retry_if_exception_type(
560
+ (
561
+ neo4jExceptions.ServiceUnavailable,
562
+ neo4jExceptions.TransientError,
563
+ neo4jExceptions.WriteServiceUnavailable,
564
+ neo4jExceptions.ClientError,
565
+ )
566
+ ),
567
+ )
568
  @retry(
569
  stop=stop_after_attempt(3),
570
  wait=wait_exponential(multiplier=1, min=4, max=10),
 
582
  ) -> None:
583
  """
584
  Upsert an edge and its properties between two nodes identified by their labels.
585
+ Ensures both source and target nodes exist and are unique before creating the edge.
586
+ Uses entity_id property to uniquely identify nodes.
587
 
588
  Args:
589
  source_node_id (str): Label of the source node (used as identifier)
590
  target_node_id (str): Label of the target node (used as identifier)
591
  edge_data (dict): Dictionary of properties to set on the edge
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
 
593
+ Raises:
594
+ ValueError: If either source or target node does not exist or is not unique
595
+ """
596
  try:
597
+ edge_properties = edge_data
598
  async with self._driver.session(database=self._DATABASE) as session:
599
+
600
+ async def execute_upsert(tx: AsyncManagedTransaction):
601
+ query = """
602
+ MATCH (source:base {entity_id: $source_entity_id})
603
+ WITH source
604
+ MATCH (target:base {entity_id: $target_entity_id})
605
+ MERGE (source)-[r:DIRECTED]-(target)
606
+ SET r += $properties
607
+ RETURN r, source, target
608
+ """
609
+ result = await tx.run(
610
+ query,
611
+ source_entity_id=source_node_id,
612
+ target_entity_id=target_node_id,
613
+ properties=edge_properties,
614
+ )
615
+ try:
616
+ records = await result.fetch(2)
617
+ if records:
618
+ logger.debug(
619
+ f"Upserted edge from '{source_node_id}' to '{target_node_id}'"
620
+ f"with properties: {edge_properties}"
621
+ )
622
+ finally:
623
+ await result.consume() # Ensure result is consumed
624
+
625
+ await session.execute_write(execute_upsert)
626
  except Exception as e:
627
  logger.error(f"Error during edge upsert: {str(e)}")
628
  raise
 
631
  print("Implemented but never called.")
632
 
633
  async def get_knowledge_graph(
634
+ self,
635
+ node_label: str,
636
+ max_depth: int = 3,
637
+ min_degree: int = 0,
638
+ inclusive: bool = False,
639
  ) -> KnowledgeGraph:
640
  """
641
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
642
  Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
643
  When reducing the number of nodes, the prioritization criteria are as follows:
644
+ 1. min_degree does not affect nodes directly connected to the matching nodes
645
+ 2. Label matching nodes take precedence
646
+ 3. Followed by nodes directly connected to the matching nodes
647
+ 4. Finally, the degree of the nodes
648
 
649
  Args:
650
+ node_label: Label of the starting node
651
+ max_depth: Maximum depth of the subgraph
652
+ min_degree: Minimum degree of nodes to include. Defaults to 0
653
+ inclusive: Do an inclusive search if true
654
  Returns:
655
  KnowledgeGraph: Complete connected subgraph for specified node
656
  """
 
 
 
657
  result = KnowledgeGraph()
658
  seen_nodes = set()
659
  seen_edges = set()
660
 
661
+ async with self._driver.session(
662
+ database=self._DATABASE, default_access_mode="READ"
663
+ ) as session:
664
  try:
665
+ if node_label == "*":
666
  main_query = """
667
  MATCH (n)
668
  OPTIONAL MATCH (n)-[r]-()
669
  WITH n, count(r) AS degree
670
+ WHERE degree >= $min_degree
671
  ORDER BY degree DESC
672
  LIMIT $max_nodes
673
+ WITH collect({node: n}) AS filtered_nodes
674
+ UNWIND filtered_nodes AS node_info
675
+ WITH collect(node_info.node) AS kept_nodes, filtered_nodes
676
+ MATCH (a)-[r]-(b)
677
+ WHERE a IN kept_nodes AND b IN kept_nodes
678
+ RETURN filtered_nodes AS node_info,
679
+ collect(DISTINCT r) AS relationships
680
  """
681
  result_set = await session.run(
682
+ main_query,
683
+ {"max_nodes": MAX_GRAPH_NODES, "min_degree": min_degree},
684
  )
685
 
686
  else:
 
 
 
 
 
 
 
 
 
 
 
 
687
  # Main query uses partial matching
688
+ main_query = """
689
  MATCH (start)
690
+ WHERE
691
+ CASE
692
+ WHEN $inclusive THEN start.entity_id CONTAINS $entity_id
693
+ ELSE start.entity_id = $entity_id
694
+ END
695
  WITH start
696
+ CALL apoc.path.subgraphAll(start, {
697
+ relationshipFilter: '',
698
  minLevel: 0,
699
+ maxLevel: $max_depth,
700
  bfs: true
701
+ })
702
  YIELD nodes, relationships
703
  WITH start, nodes, relationships
704
  UNWIND nodes AS node
705
  OPTIONAL MATCH (node)-[r]-()
706
+ WITH node, count(r) AS degree, start, nodes, relationships
707
+ WHERE node = start OR EXISTS((start)--(node)) OR degree >= $min_degree
708
+ ORDER BY
709
+ CASE
710
+ WHEN node = start THEN 3
711
+ WHEN EXISTS((start)--(node)) THEN 2
712
+ ELSE 1
713
+ END DESC,
714
+ degree DESC
715
  LIMIT $max_nodes
716
+ WITH collect({node: node}) AS filtered_nodes
717
+ UNWIND filtered_nodes AS node_info
718
+ WITH collect(node_info.node) AS kept_nodes, filtered_nodes
719
+ MATCH (a)-[r]-(b)
720
+ WHERE a IN kept_nodes AND b IN kept_nodes
721
+ RETURN filtered_nodes AS node_info,
722
+ collect(DISTINCT r) AS relationships
723
  """
724
  result_set = await session.run(
725
+ main_query,
726
+ {
727
+ "max_nodes": MAX_GRAPH_NODES,
728
+ "entity_id": node_label,
729
+ "inclusive": inclusive,
730
+ "max_depth": max_depth,
731
+ "min_degree": min_degree,
732
+ },
733
  )
734
 
735
+ try:
736
+ record = await result_set.single()
737
+
738
+ if record:
739
+ # Handle nodes (compatible with multi-label cases)
740
+ for node_info in record["node_info"]:
741
+ node = node_info["node"]
742
+ node_id = node.id
743
+ if node_id not in seen_nodes:
744
+ result.nodes.append(
745
+ KnowledgeGraphNode(
746
+ id=f"{node_id}",
747
+ labels=[
748
+ label
749
+ for label in node.labels
750
+ if label != "base"
751
+ ],
752
+ properties=dict(node),
753
+ )
754
  )
755
+ seen_nodes.add(node_id)
756
+
757
+ # Handle relationships (including direction information)
758
+ for rel in record["relationships"]:
759
+ edge_id = rel.id
760
+ if edge_id not in seen_edges:
761
+ start = rel.start_node
762
+ end = rel.end_node
763
+ result.edges.append(
764
+ KnowledgeGraphEdge(
765
+ id=f"{edge_id}",
766
+ type=rel.type,
767
+ source=f"{start.id}",
768
+ target=f"{end.id}",
769
+ properties=dict(rel),
770
+ )
771
  )
772
+ seen_edges.add(edge_id)
 
773
 
774
+ logger.info(
775
+ f"Process {os.getpid()} graph query return: {len(result.nodes)} nodes, {len(result.edges)} edges"
776
+ )
777
+ finally:
778
+ await result_set.consume() # Ensure result set is consumed
779
 
780
  except neo4jExceptions.ClientError as e:
781
+ logger.warning(f"APOC plugin error: {str(e)}")
782
+ if node_label != "*":
783
+ logger.warning(
784
+ "Neo4j: falling back to basic Cypher recursive search..."
785
+ )
786
+ if inclusive:
787
+ logger.warning(
788
+ "Neo4j: inclusive search mode is not supported in recursive query, using exact matching"
789
+ )
790
+ return await self._robust_fallback(
791
+ node_label, max_depth, min_degree
792
+ )
793
 
794
  return result
795
 
796
  async def _robust_fallback(
797
+ self, node_label: str, max_depth: int, min_degree: int = 0
798
+ ) -> KnowledgeGraph:
799
+ """
800
+ Fallback implementation when APOC plugin is not available or incompatible.
801
+ This method implements the same functionality as get_knowledge_graph but uses
802
+ only basic Cypher queries and recursive traversal instead of APOC procedures.
803
+ """
804
+ result = KnowledgeGraph()
805
  visited_nodes = set()
806
  visited_edges = set()
807
 
808
+ async def traverse(
809
+ node: KnowledgeGraphNode,
810
+ edge: Optional[KnowledgeGraphEdge],
811
+ current_depth: int,
812
+ ):
813
+ # Check traversal limits
814
  if current_depth > max_depth:
815
+ logger.debug(f"Reached max depth: {max_depth}")
816
  return
817
+ if len(visited_nodes) >= MAX_GRAPH_NODES:
818
+ logger.debug(f"Reached max nodes limit: {MAX_GRAPH_NODES}")
 
 
819
  return
820
 
821
+ # Check if node already visited
822
+ if node.id in visited_nodes:
823
  return
824
+
825
+ # Get all edges and target nodes
826
+ async with self._driver.session(
827
+ database=self._DATABASE, default_access_mode="READ"
828
+ ) as session:
829
+ query = """
830
+ MATCH (a:base {entity_id: $entity_id})-[r]-(b)
831
+ WITH r, b, id(r) as edge_id, id(b) as target_id
832
+ RETURN r, b, edge_id, target_id
833
+ """
834
+ results = await session.run(query, entity_id=node.id)
835
+
836
+ # Get all records and release database connection
837
+ records = await results.fetch(
838
+ 1000
839
+ ) # Max neighbour nodes we can handled
840
+ await results.consume() # Ensure results are consumed
841
+
842
+ # Nodes not connected to start node need to check degree
843
+ if current_depth > 1 and len(records) < min_degree:
844
+ return
845
+
846
+ # Add current node to result
847
+ result.nodes.append(node)
848
+ visited_nodes.add(node.id)
849
+
850
+ # Add edge to result if it exists and not already added
851
+ if edge and edge.id not in visited_edges:
852
+ result.edges.append(edge)
853
+ visited_edges.add(edge.id)
854
+
855
+ # Prepare nodes and edges for recursive processing
856
+ nodes_to_process = []
857
+ for record in records:
858
  rel = record["r"]
859
+ edge_id = str(record["edge_id"])
860
  if edge_id not in visited_edges:
861
+ b_node = record["b"]
862
+ target_id = b_node.get("entity_id")
863
+
864
+ if target_id: # Only process if target node has entity_id
865
+ # Create KnowledgeGraphNode for target
866
+ target_node = KnowledgeGraphNode(
867
+ id=f"{target_id}",
868
+ labels=[
869
+ label for label in b_node.labels if label != "base"
870
+ ],
871
+ properties=dict(b_node.properties),
872
+ )
873
+
874
+ # Create KnowledgeGraphEdge
875
+ target_edge = KnowledgeGraphEdge(
876
+ id=f"{edge_id}",
877
+ type=rel.type,
878
+ source=f"{node.id}",
879
+ target=f"{target_id}",
880
+ properties=dict(rel),
881
+ )
882
+
883
+ nodes_to_process.append((target_node, target_edge))
884
+ else:
885
+ logger.warning(
886
+ f"Skipping edge {edge_id} due to missing labels on target node"
887
+ )
888
+
889
+ # Process nodes after releasing database connection
890
+ for target_node, target_edge in nodes_to_process:
891
+ await traverse(target_node, target_edge, current_depth + 1)
892
+
893
+ # Get the starting node's data
894
+ async with self._driver.session(
895
+ database=self._DATABASE, default_access_mode="READ"
896
+ ) as session:
897
+ query = """
898
+ MATCH (n:base {entity_id: $entity_id})
899
+ RETURN id(n) as node_id, n
900
+ """
901
+ node_result = await session.run(query, entity_id=node_label)
902
+ try:
903
+ node_record = await node_result.single()
904
+ if not node_record:
905
+ return result
906
+
907
+ # Create initial KnowledgeGraphNode
908
+ start_node = KnowledgeGraphNode(
909
+ id=f"{node_record['n'].get('entity_id')}",
910
+ labels=[
911
+ label for label in node_record["n"].labels if label != "base"
912
+ ],
913
+ properties=dict(node_record["n"].properties),
914
+ )
915
+ finally:
916
+ await node_result.consume() # Ensure results are consumed
917
+
918
+ # Start traversal with the initial node
919
+ await traverse(start_node, None, 0)
920
 
 
921
  return result
922
 
923
  async def get_all_labels(self) -> list[str]:
 
926
  Returns:
927
  ["Person", "Company", ...] # Alphabetically sorted label list
928
  """
929
+ async with self._driver.session(
930
+ database=self._DATABASE, default_access_mode="READ"
931
+ ) as session:
932
  # Method 1: Direct metadata query (Available for Neo4j 4.3+)
933
  # query = "CALL db.labels() YIELD label RETURN label"
934
 
935
  # Method 2: Query compatible with older versions
936
  query = """
937
+ MATCH (n)
938
+ WHERE n.entity_id IS NOT NULL
939
+ RETURN DISTINCT n.entity_id AS label
940
+ ORDER BY label
 
941
  """
 
942
  result = await session.run(query)
943
  labels = []
944
+ try:
945
+ async for record in result:
946
+ labels.append(record["label"])
947
+ finally:
948
+ await (
949
+ result.consume()
950
+ ) # Ensure results are consumed even if processing fails
951
  return labels
952
 
953
  @retry(
 
968
  Args:
969
  node_id: The label of the node to delete
970
  """
 
971
 
972
  async def _do_delete(tx: AsyncManagedTransaction):
973
+ query = """
974
+ MATCH (n:base {entity_id: $entity_id})
975
  DETACH DELETE n
976
  """
977
+ result = await tx.run(query, entity_id=node_id)
978
+ logger.debug(f"Deleted node with label '{node_id}'")
979
+ await result.consume() # Ensure result is fully consumed
980
 
981
  try:
982
  async with self._driver.session(database=self._DATABASE) as session:
 
1025
  edges: List of edges to be deleted, each edge is a (source, target) tuple
1026
  """
1027
  for source, target in edges:
 
 
1028
 
1029
  async def _do_delete_edge(tx: AsyncManagedTransaction):
1030
+ query = """
1031
+ MATCH (source:base {entity_id: $source_entity_id})-[r]-(target:base {entity_id: $target_entity_id})
1032
  DELETE r
1033
  """
1034
+ result = await tx.run(
1035
+ query, source_entity_id=source, target_entity_id=target
1036
+ )
1037
+ logger.debug(f"Deleted edge from '{source}' to '{target}'")
1038
+ await result.consume() # Ensure result is fully consumed
1039
 
1040
  try:
1041
  async with self._driver.session(database=self._DATABASE) as session:
lightrag/kg/oracle_impl.py CHANGED
@@ -531,6 +531,80 @@ class OracleVectorDBStorage(BaseVectorStorage):
531
  logger.error(f"Error searching records with prefix '{prefix}': {e}")
532
  return []
533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
  @final
536
  @dataclass
 
531
  logger.error(f"Error searching records with prefix '{prefix}': {e}")
532
  return []
533
 
534
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
535
+ """Get vector data by its ID
536
+
537
+ Args:
538
+ id: The unique identifier of the vector
539
+
540
+ Returns:
541
+ The vector data if found, or None if not found
542
+ """
543
+ try:
544
+ # Determine the table name based on namespace
545
+ table_name = namespace_to_table_name(self.namespace)
546
+ if not table_name:
547
+ logger.error(f"Unknown namespace for ID lookup: {self.namespace}")
548
+ return None
549
+
550
+ # Create the appropriate ID field name based on namespace
551
+ id_field = "entity_id" if "NODES" in table_name else "relation_id"
552
+ if "CHUNKS" in table_name:
553
+ id_field = "chunk_id"
554
+
555
+ # Prepare and execute the query
556
+ query = f"""
557
+ SELECT * FROM {table_name}
558
+ WHERE {id_field} = :id AND workspace = :workspace
559
+ """
560
+ params = {"id": id, "workspace": self.db.workspace}
561
+
562
+ result = await self.db.query(query, params)
563
+ return result
564
+ except Exception as e:
565
+ logger.error(f"Error retrieving vector data for ID {id}: {e}")
566
+ return None
567
+
568
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
569
+ """Get multiple vector data by their IDs
570
+
571
+ Args:
572
+ ids: List of unique identifiers
573
+
574
+ Returns:
575
+ List of vector data objects that were found
576
+ """
577
+ if not ids:
578
+ return []
579
+
580
+ try:
581
+ # Determine the table name based on namespace
582
+ table_name = namespace_to_table_name(self.namespace)
583
+ if not table_name:
584
+ logger.error(f"Unknown namespace for IDs lookup: {self.namespace}")
585
+ return []
586
+
587
+ # Create the appropriate ID field name based on namespace
588
+ id_field = "entity_id" if "NODES" in table_name else "relation_id"
589
+ if "CHUNKS" in table_name:
590
+ id_field = "chunk_id"
591
+
592
+ # Format the list of IDs for SQL IN clause
593
+ ids_list = ", ".join([f"'{id}'" for id in ids])
594
+
595
+ # Prepare and execute the query
596
+ query = f"""
597
+ SELECT * FROM {table_name}
598
+ WHERE {id_field} IN ({ids_list}) AND workspace = :workspace
599
+ """
600
+ params = {"workspace": self.db.workspace}
601
+
602
+ results = await self.db.query(query, params, multirows=True)
603
+ return results or []
604
+ except Exception as e:
605
+ logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
606
+ return []
607
+
608
 
609
  @final
610
  @dataclass
lightrag/kg/postgres_impl.py CHANGED
@@ -621,6 +621,60 @@ class PGVectorStorage(BaseVectorStorage):
621
  logger.error(f"Error during prefix search for '{prefix}': {e}")
622
  return []
623
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
  @final
626
  @dataclass
 
621
  logger.error(f"Error during prefix search for '{prefix}': {e}")
622
  return []
623
 
624
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
625
+ """Get vector data by its ID
626
+
627
+ Args:
628
+ id: The unique identifier of the vector
629
+
630
+ Returns:
631
+ The vector data if found, or None if not found
632
+ """
633
+ table_name = namespace_to_table_name(self.namespace)
634
+ if not table_name:
635
+ logger.error(f"Unknown namespace for ID lookup: {self.namespace}")
636
+ return None
637
+
638
+ query = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id=$2"
639
+ params = {"workspace": self.db.workspace, "id": id}
640
+
641
+ try:
642
+ result = await self.db.query(query, params)
643
+ if result:
644
+ return dict(result)
645
+ return None
646
+ except Exception as e:
647
+ logger.error(f"Error retrieving vector data for ID {id}: {e}")
648
+ return None
649
+
650
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
651
+ """Get multiple vector data by their IDs
652
+
653
+ Args:
654
+ ids: List of unique identifiers
655
+
656
+ Returns:
657
+ List of vector data objects that were found
658
+ """
659
+ if not ids:
660
+ return []
661
+
662
+ table_name = namespace_to_table_name(self.namespace)
663
+ if not table_name:
664
+ logger.error(f"Unknown namespace for IDs lookup: {self.namespace}")
665
+ return []
666
+
667
+ ids_str = ",".join([f"'{id}'" for id in ids])
668
+ query = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id IN ({ids_str})"
669
+ params = {"workspace": self.db.workspace}
670
+
671
+ try:
672
+ results = await self.db.query(query, params, multirows=True)
673
+ return [dict(record) for record in results]
674
+ except Exception as e:
675
+ logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
676
+ return []
677
+
678
 
679
  @final
680
  @dataclass
lightrag/kg/shared_storage.py CHANGED
@@ -7,12 +7,18 @@ from typing import Any, Dict, Optional, Union, TypeVar, Generic
7
 
8
 
9
  # Define a direct print function for critical logs that must be visible in all processes
10
- def direct_log(message, level="INFO"):
11
  """
12
  Log a message directly to stderr to ensure visibility in all processes,
13
  including the Gunicorn master process.
 
 
 
 
 
14
  """
15
- print(f"{level}: {message}", file=sys.stderr, flush=True)
 
16
 
17
 
18
  T = TypeVar("T")
@@ -32,55 +38,165 @@ _update_flags: Optional[Dict[str, bool]] = None # namespace -> updated
32
  _storage_lock: Optional[LockType] = None
33
  _internal_lock: Optional[LockType] = None
34
  _pipeline_status_lock: Optional[LockType] = None
 
 
35
 
36
 
37
  class UnifiedLock(Generic[T]):
38
  """Provide a unified lock interface type for asyncio.Lock and multiprocessing.Lock"""
39
 
40
- def __init__(self, lock: Union[ProcessLock, asyncio.Lock], is_async: bool):
 
 
 
 
 
 
41
  self._lock = lock
42
  self._is_async = is_async
 
 
 
43
 
44
  async def __aenter__(self) -> "UnifiedLock[T]":
45
- if self._is_async:
46
- await self._lock.acquire()
47
- else:
48
- self._lock.acquire()
49
- return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  async def __aexit__(self, exc_type, exc_val, exc_tb):
52
- if self._is_async:
53
- self._lock.release()
54
- else:
55
- self._lock.release()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def __enter__(self) -> "UnifiedLock[T]":
58
  """For backward compatibility"""
59
- if self._is_async:
60
- raise RuntimeError("Use 'async with' for shared_storage lock")
61
- self._lock.acquire()
62
- return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def __exit__(self, exc_type, exc_val, exc_tb):
65
  """For backward compatibility"""
66
- if self._is_async:
67
- raise RuntimeError("Use 'async with' for shared_storage lock")
68
- self._lock.release()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
- def get_internal_lock() -> UnifiedLock:
72
  """return unified storage lock for data consistency"""
73
- return UnifiedLock(lock=_internal_lock, is_async=not is_multiprocess)
 
 
 
 
 
74
 
75
 
76
- def get_storage_lock() -> UnifiedLock:
77
  """return unified storage lock for data consistency"""
78
- return UnifiedLock(lock=_storage_lock, is_async=not is_multiprocess)
 
 
 
 
 
79
 
80
 
81
- def get_pipeline_status_lock() -> UnifiedLock:
82
  """return unified storage lock for data consistency"""
83
- return UnifiedLock(lock=_pipeline_status_lock, is_async=not is_multiprocess)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  def initialize_share_data(workers: int = 1):
@@ -108,6 +224,8 @@ def initialize_share_data(workers: int = 1):
108
  _storage_lock, \
109
  _internal_lock, \
110
  _pipeline_status_lock, \
 
 
111
  _shared_dicts, \
112
  _init_flags, \
113
  _initialized, \
@@ -120,14 +238,16 @@ def initialize_share_data(workers: int = 1):
120
  )
121
  return
122
 
123
- _manager = Manager()
124
  _workers = workers
125
 
126
  if workers > 1:
127
  is_multiprocess = True
 
128
  _internal_lock = _manager.Lock()
129
  _storage_lock = _manager.Lock()
130
  _pipeline_status_lock = _manager.Lock()
 
 
131
  _shared_dicts = _manager.dict()
132
  _init_flags = _manager.dict()
133
  _update_flags = _manager.dict()
@@ -139,6 +259,8 @@ def initialize_share_data(workers: int = 1):
139
  _internal_lock = asyncio.Lock()
140
  _storage_lock = asyncio.Lock()
141
  _pipeline_status_lock = asyncio.Lock()
 
 
142
  _shared_dicts = {}
143
  _init_flags = {}
144
  _update_flags = {}
@@ -164,6 +286,7 @@ async def initialize_pipeline_status():
164
  history_messages = _manager.list() if is_multiprocess else []
165
  pipeline_namespace.update(
166
  {
 
167
  "busy": False, # Control concurrent processes
168
  "job_name": "Default Job", # Current job name (indexing files/indexing texts)
169
  "job_start": None, # Job start time
@@ -200,7 +323,12 @@ async def get_update_flag(namespace: str):
200
  if is_multiprocess and _manager is not None:
201
  new_update_flag = _manager.Value("b", False)
202
  else:
203
- new_update_flag = False
 
 
 
 
 
204
 
205
  _update_flags[namespace].append(new_update_flag)
206
  return new_update_flag
@@ -220,7 +348,26 @@ async def set_all_update_flags(namespace: str):
220
  if is_multiprocess:
221
  _update_flags[namespace][i].value = True
222
  else:
223
- _update_flags[namespace][i] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
 
226
  async def get_all_update_flags_status() -> Dict[str, list]:
@@ -247,7 +394,7 @@ async def get_all_update_flags_status() -> Dict[str, list]:
247
  return result
248
 
249
 
250
- def try_initialize_namespace(namespace: str) -> bool:
251
  """
252
  Returns True if the current worker(process) gets initialization permission for loading data later.
253
  The worker does not get the permission is prohibited to load data from files.
@@ -257,15 +404,17 @@ def try_initialize_namespace(namespace: str) -> bool:
257
  if _init_flags is None:
258
  raise ValueError("Try to create nanmespace before Shared-Data is initialized")
259
 
260
- if namespace not in _init_flags:
261
- _init_flags[namespace] = True
 
 
 
 
 
262
  direct_log(
263
- f"Process {os.getpid()} ready to initialize storage namespace: [{namespace}]"
264
  )
265
- return True
266
- direct_log(
267
- f"Process {os.getpid()} storage namespace already initialized: [{namespace}]"
268
- )
269
  return False
270
 
271
 
@@ -304,6 +453,8 @@ def finalize_share_data():
304
  _storage_lock, \
305
  _internal_lock, \
306
  _pipeline_status_lock, \
 
 
307
  _shared_dicts, \
308
  _init_flags, \
309
  _initialized, \
@@ -369,6 +520,8 @@ def finalize_share_data():
369
  _storage_lock = None
370
  _internal_lock = None
371
  _pipeline_status_lock = None
 
 
372
  _update_flags = None
373
 
374
  direct_log(f"Process {os.getpid()} storage data finalization complete")
 
7
 
8
 
9
  # Define a direct print function for critical logs that must be visible in all processes
10
+ def direct_log(message, level="INFO", enable_output: bool = True):
11
  """
12
  Log a message directly to stderr to ensure visibility in all processes,
13
  including the Gunicorn master process.
14
+
15
+ Args:
16
+ message: The message to log
17
+ level: Log level (default: "INFO")
18
+ enable_output: Whether to actually output the log (default: True)
19
  """
20
+ if enable_output:
21
+ print(f"{level}: {message}", file=sys.stderr, flush=True)
22
 
23
 
24
  T = TypeVar("T")
 
38
  _storage_lock: Optional[LockType] = None
39
  _internal_lock: Optional[LockType] = None
40
  _pipeline_status_lock: Optional[LockType] = None
41
+ _graph_db_lock: Optional[LockType] = None
42
+ _data_init_lock: Optional[LockType] = None
43
 
44
 
45
  class UnifiedLock(Generic[T]):
46
  """Provide a unified lock interface type for asyncio.Lock and multiprocessing.Lock"""
47
 
48
+ def __init__(
49
+ self,
50
+ lock: Union[ProcessLock, asyncio.Lock],
51
+ is_async: bool,
52
+ name: str = "unnamed",
53
+ enable_logging: bool = True,
54
+ ):
55
  self._lock = lock
56
  self._is_async = is_async
57
+ self._pid = os.getpid() # for debug only
58
+ self._name = name # for debug only
59
+ self._enable_logging = enable_logging # for debug only
60
 
61
  async def __aenter__(self) -> "UnifiedLock[T]":
62
+ try:
63
+ direct_log(
64
+ f"== Lock == Process {self._pid}: Acquiring lock '{self._name}' (async={self._is_async})",
65
+ enable_output=self._enable_logging,
66
+ )
67
+ if self._is_async:
68
+ await self._lock.acquire()
69
+ else:
70
+ self._lock.acquire()
71
+ direct_log(
72
+ f"== Lock == Process {self._pid}: Lock '{self._name}' acquired (async={self._is_async})",
73
+ enable_output=self._enable_logging,
74
+ )
75
+ return self
76
+ except Exception as e:
77
+ direct_log(
78
+ f"== Lock == Process {self._pid}: Failed to acquire lock '{self._name}': {e}",
79
+ level="ERROR",
80
+ enable_output=self._enable_logging,
81
+ )
82
+ raise
83
 
84
  async def __aexit__(self, exc_type, exc_val, exc_tb):
85
+ try:
86
+ direct_log(
87
+ f"== Lock == Process {self._pid}: Releasing lock '{self._name}' (async={self._is_async})",
88
+ enable_output=self._enable_logging,
89
+ )
90
+ if self._is_async:
91
+ self._lock.release()
92
+ else:
93
+ self._lock.release()
94
+ direct_log(
95
+ f"== Lock == Process {self._pid}: Lock '{self._name}' released (async={self._is_async})",
96
+ enable_output=self._enable_logging,
97
+ )
98
+ except Exception as e:
99
+ direct_log(
100
+ f"== Lock == Process {self._pid}: Failed to release lock '{self._name}': {e}",
101
+ level="ERROR",
102
+ enable_output=self._enable_logging,
103
+ )
104
+ raise
105
 
106
  def __enter__(self) -> "UnifiedLock[T]":
107
  """For backward compatibility"""
108
+ try:
109
+ if self._is_async:
110
+ raise RuntimeError("Use 'async with' for shared_storage lock")
111
+ direct_log(
112
+ f"== Lock == Process {self._pid}: Acquiring lock '{self._name}' (sync)",
113
+ enable_output=self._enable_logging,
114
+ )
115
+ self._lock.acquire()
116
+ direct_log(
117
+ f"== Lock == Process {self._pid}: Lock '{self._name}' acquired (sync)",
118
+ enable_output=self._enable_logging,
119
+ )
120
+ return self
121
+ except Exception as e:
122
+ direct_log(
123
+ f"== Lock == Process {self._pid}: Failed to acquire lock '{self._name}' (sync): {e}",
124
+ level="ERROR",
125
+ enable_output=self._enable_logging,
126
+ )
127
+ raise
128
 
129
  def __exit__(self, exc_type, exc_val, exc_tb):
130
  """For backward compatibility"""
131
+ try:
132
+ if self._is_async:
133
+ raise RuntimeError("Use 'async with' for shared_storage lock")
134
+ direct_log(
135
+ f"== Lock == Process {self._pid}: Releasing lock '{self._name}' (sync)",
136
+ enable_output=self._enable_logging,
137
+ )
138
+ self._lock.release()
139
+ direct_log(
140
+ f"== Lock == Process {self._pid}: Lock '{self._name}' released (sync)",
141
+ enable_output=self._enable_logging,
142
+ )
143
+ except Exception as e:
144
+ direct_log(
145
+ f"== Lock == Process {self._pid}: Failed to release lock '{self._name}' (sync): {e}",
146
+ level="ERROR",
147
+ enable_output=self._enable_logging,
148
+ )
149
+ raise
150
 
151
 
152
+ def get_internal_lock(enable_logging: bool = False) -> UnifiedLock:
153
  """return unified storage lock for data consistency"""
154
+ return UnifiedLock(
155
+ lock=_internal_lock,
156
+ is_async=not is_multiprocess,
157
+ name="internal_lock",
158
+ enable_logging=enable_logging,
159
+ )
160
 
161
 
162
+ def get_storage_lock(enable_logging: bool = False) -> UnifiedLock:
163
  """return unified storage lock for data consistency"""
164
+ return UnifiedLock(
165
+ lock=_storage_lock,
166
+ is_async=not is_multiprocess,
167
+ name="storage_lock",
168
+ enable_logging=enable_logging,
169
+ )
170
 
171
 
172
+ def get_pipeline_status_lock(enable_logging: bool = False) -> UnifiedLock:
173
  """return unified storage lock for data consistency"""
174
+ return UnifiedLock(
175
+ lock=_pipeline_status_lock,
176
+ is_async=not is_multiprocess,
177
+ name="pipeline_status_lock",
178
+ enable_logging=enable_logging,
179
+ )
180
+
181
+
182
+ def get_graph_db_lock(enable_logging: bool = False) -> UnifiedLock:
183
+ """return unified graph database lock for ensuring atomic operations"""
184
+ return UnifiedLock(
185
+ lock=_graph_db_lock,
186
+ is_async=not is_multiprocess,
187
+ name="graph_db_lock",
188
+ enable_logging=enable_logging,
189
+ )
190
+
191
+
192
+ def get_data_init_lock(enable_logging: bool = False) -> UnifiedLock:
193
+ """return unified data initialization lock for ensuring atomic data initialization"""
194
+ return UnifiedLock(
195
+ lock=_data_init_lock,
196
+ is_async=not is_multiprocess,
197
+ name="data_init_lock",
198
+ enable_logging=enable_logging,
199
+ )
200
 
201
 
202
  def initialize_share_data(workers: int = 1):
 
224
  _storage_lock, \
225
  _internal_lock, \
226
  _pipeline_status_lock, \
227
+ _graph_db_lock, \
228
+ _data_init_lock, \
229
  _shared_dicts, \
230
  _init_flags, \
231
  _initialized, \
 
238
  )
239
  return
240
 
 
241
  _workers = workers
242
 
243
  if workers > 1:
244
  is_multiprocess = True
245
+ _manager = Manager()
246
  _internal_lock = _manager.Lock()
247
  _storage_lock = _manager.Lock()
248
  _pipeline_status_lock = _manager.Lock()
249
+ _graph_db_lock = _manager.Lock()
250
+ _data_init_lock = _manager.Lock()
251
  _shared_dicts = _manager.dict()
252
  _init_flags = _manager.dict()
253
  _update_flags = _manager.dict()
 
259
  _internal_lock = asyncio.Lock()
260
  _storage_lock = asyncio.Lock()
261
  _pipeline_status_lock = asyncio.Lock()
262
+ _graph_db_lock = asyncio.Lock()
263
+ _data_init_lock = asyncio.Lock()
264
  _shared_dicts = {}
265
  _init_flags = {}
266
  _update_flags = {}
 
286
  history_messages = _manager.list() if is_multiprocess else []
287
  pipeline_namespace.update(
288
  {
289
+ "autoscanned": False, # Auto-scan started
290
  "busy": False, # Control concurrent processes
291
  "job_name": "Default Job", # Current job name (indexing files/indexing texts)
292
  "job_start": None, # Job start time
 
323
  if is_multiprocess and _manager is not None:
324
  new_update_flag = _manager.Value("b", False)
325
  else:
326
+ # Create a simple mutable object to store boolean value for compatibility with mutiprocess
327
+ class MutableBoolean:
328
+ def __init__(self, initial_value=False):
329
+ self.value = initial_value
330
+
331
+ new_update_flag = MutableBoolean(False)
332
 
333
  _update_flags[namespace].append(new_update_flag)
334
  return new_update_flag
 
348
  if is_multiprocess:
349
  _update_flags[namespace][i].value = True
350
  else:
351
+ # Use .value attribute instead of direct assignment
352
+ _update_flags[namespace][i].value = True
353
+
354
+
355
+ async def clear_all_update_flags(namespace: str):
356
+ """Clear all update flag of namespace indicating all workers need to reload data from files"""
357
+ global _update_flags
358
+ if _update_flags is None:
359
+ raise ValueError("Try to create namespace before Shared-Data is initialized")
360
+
361
+ async with get_internal_lock():
362
+ if namespace not in _update_flags:
363
+ raise ValueError(f"Namespace {namespace} not found in update flags")
364
+ # Update flags for both modes
365
+ for i in range(len(_update_flags[namespace])):
366
+ if is_multiprocess:
367
+ _update_flags[namespace][i].value = False
368
+ else:
369
+ # Use .value attribute instead of direct assignment
370
+ _update_flags[namespace][i].value = False
371
 
372
 
373
  async def get_all_update_flags_status() -> Dict[str, list]:
 
394
  return result
395
 
396
 
397
+ async def try_initialize_namespace(namespace: str) -> bool:
398
  """
399
  Returns True if the current worker(process) gets initialization permission for loading data later.
400
  The worker does not get the permission is prohibited to load data from files.
 
404
  if _init_flags is None:
405
  raise ValueError("Try to create nanmespace before Shared-Data is initialized")
406
 
407
+ async with get_internal_lock():
408
+ if namespace not in _init_flags:
409
+ _init_flags[namespace] = True
410
+ direct_log(
411
+ f"Process {os.getpid()} ready to initialize storage namespace: [{namespace}]"
412
+ )
413
+ return True
414
  direct_log(
415
+ f"Process {os.getpid()} storage namespace already initialized: [{namespace}]"
416
  )
417
+
 
 
 
418
  return False
419
 
420
 
 
453
  _storage_lock, \
454
  _internal_lock, \
455
  _pipeline_status_lock, \
456
+ _graph_db_lock, \
457
+ _data_init_lock, \
458
  _shared_dicts, \
459
  _init_flags, \
460
  _initialized, \
 
520
  _storage_lock = None
521
  _internal_lock = None
522
  _pipeline_status_lock = None
523
+ _graph_db_lock = None
524
+ _data_init_lock = None
525
  _update_flags = None
526
 
527
  direct_log(f"Process {os.getpid()} storage data finalization complete")
lightrag/kg/tidb_impl.py CHANGED
@@ -465,6 +465,100 @@ class TiDBVectorDBStorage(BaseVectorStorage):
465
  logger.error(f"Error searching records with prefix '{prefix}': {e}")
466
  return []
467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
 
469
  @final
470
  @dataclass
 
465
  logger.error(f"Error searching records with prefix '{prefix}': {e}")
466
  return []
467
 
468
+ async def get_by_id(self, id: str) -> dict[str, Any] | None:
469
+ """Get vector data by its ID
470
+
471
+ Args:
472
+ id: The unique identifier of the vector
473
+
474
+ Returns:
475
+ The vector data if found, or None if not found
476
+ """
477
+ try:
478
+ # Determine which table to query based on namespace
479
+ if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
480
+ sql_template = """
481
+ SELECT entity_id as id, name as entity_name, entity_type, description, content
482
+ FROM LIGHTRAG_GRAPH_NODES
483
+ WHERE entity_id = :entity_id AND workspace = :workspace
484
+ """
485
+ params = {"entity_id": id, "workspace": self.db.workspace}
486
+ elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
487
+ sql_template = """
488
+ SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
489
+ keywords, description, content
490
+ FROM LIGHTRAG_GRAPH_EDGES
491
+ WHERE relation_id = :relation_id AND workspace = :workspace
492
+ """
493
+ params = {"relation_id": id, "workspace": self.db.workspace}
494
+ elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
495
+ sql_template = """
496
+ SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
497
+ FROM LIGHTRAG_DOC_CHUNKS
498
+ WHERE chunk_id = :chunk_id AND workspace = :workspace
499
+ """
500
+ params = {"chunk_id": id, "workspace": self.db.workspace}
501
+ else:
502
+ logger.warning(
503
+ f"Namespace {self.namespace} not supported for get_by_id"
504
+ )
505
+ return None
506
+
507
+ result = await self.db.query(sql_template, params=params)
508
+ return result
509
+ except Exception as e:
510
+ logger.error(f"Error retrieving vector data for ID {id}: {e}")
511
+ return None
512
+
513
+ async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
514
+ """Get multiple vector data by their IDs
515
+
516
+ Args:
517
+ ids: List of unique identifiers
518
+
519
+ Returns:
520
+ List of vector data objects that were found
521
+ """
522
+ if not ids:
523
+ return []
524
+
525
+ try:
526
+ # Format IDs for SQL IN clause
527
+ ids_str = ", ".join([f"'{id}'" for id in ids])
528
+
529
+ # Determine which table to query based on namespace
530
+ if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
531
+ sql_template = f"""
532
+ SELECT entity_id as id, name as entity_name, entity_type, description, content
533
+ FROM LIGHTRAG_GRAPH_NODES
534
+ WHERE entity_id IN ({ids_str}) AND workspace = :workspace
535
+ """
536
+ elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
537
+ sql_template = f"""
538
+ SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
539
+ keywords, description, content
540
+ FROM LIGHTRAG_GRAPH_EDGES
541
+ WHERE relation_id IN ({ids_str}) AND workspace = :workspace
542
+ """
543
+ elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
544
+ sql_template = f"""
545
+ SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
546
+ FROM LIGHTRAG_DOC_CHUNKS
547
+ WHERE chunk_id IN ({ids_str}) AND workspace = :workspace
548
+ """
549
+ else:
550
+ logger.warning(
551
+ f"Namespace {self.namespace} not supported for get_by_ids"
552
+ )
553
+ return []
554
+
555
+ params = {"workspace": self.db.workspace}
556
+ results = await self.db.query(sql_template, params=params, multirows=True)
557
+ return results if results else []
558
+ except Exception as e:
559
+ logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
560
+ return []
561
+
562
 
563
  @final
564
  @dataclass
lightrag/lightrag.py CHANGED
@@ -30,11 +30,10 @@ from .namespace import NameSpace, make_namespace
30
  from .operate import (
31
  chunking_by_token_size,
32
  extract_entities,
33
- extract_keywords_only,
34
  kg_query,
35
- kg_query_with_keywords,
36
  mix_kg_vector_query,
37
  naive_query,
 
38
  )
39
  from .prompt import GRAPH_FIELD_SEP, PROMPTS
40
  from .utils import (
@@ -45,6 +44,9 @@ from .utils import (
45
  encode_string_by_tiktoken,
46
  lazy_external_import,
47
  limit_async_func_call,
 
 
 
48
  logger,
49
  )
50
  from .types import KnowledgeGraph
@@ -309,7 +311,7 @@ class LightRAG:
309
  # Verify storage implementation compatibility
310
  verify_storage_implementation(storage_type, storage_name)
311
  # Check environment variables
312
- # self.check_storage_env_vars(storage_name)
313
 
314
  # Ensure vector_db_storage_cls_kwargs has required fields
315
  self.vector_db_storage_cls_kwargs = {
@@ -354,6 +356,9 @@ class LightRAG:
354
  namespace=make_namespace(
355
  self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
356
  ),
 
 
 
357
  embedding_func=self.embedding_func,
358
  )
359
 
@@ -404,18 +409,8 @@ class LightRAG:
404
  embedding_func=None,
405
  )
406
 
407
- if self.llm_response_cache and hasattr(
408
- self.llm_response_cache, "global_config"
409
- ):
410
- hashing_kv = self.llm_response_cache
411
- else:
412
- hashing_kv = self.key_string_value_json_storage_cls( # type: ignore
413
- namespace=make_namespace(
414
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
415
- ),
416
- global_config=asdict(self),
417
- embedding_func=self.embedding_func,
418
- )
419
 
420
  self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
421
  partial(
@@ -543,11 +538,6 @@ class LightRAG:
543
  storage_class = lazy_external_import(import_path, storage_name)
544
  return storage_class
545
 
546
- @staticmethod
547
- def clean_text(text: str) -> str:
548
- """Clean text by removing null bytes (0x00) and whitespace"""
549
- return text.strip().replace("\x00", "")
550
-
551
  def insert(
552
  self,
553
  input: str | list[str],
@@ -590,6 +580,7 @@ class LightRAG:
590
  split_by_character, split_by_character_only
591
  )
592
 
 
593
  def insert_custom_chunks(
594
  self,
595
  full_text: str,
@@ -601,14 +592,15 @@ class LightRAG:
601
  self.ainsert_custom_chunks(full_text, text_chunks, doc_id)
602
  )
603
 
 
604
  async def ainsert_custom_chunks(
605
  self, full_text: str, text_chunks: list[str], doc_id: str | None = None
606
  ) -> None:
607
  update_storage = False
608
  try:
609
  # Clean input texts
610
- full_text = self.clean_text(full_text)
611
- text_chunks = [self.clean_text(chunk) for chunk in text_chunks]
612
 
613
  # Process cleaned texts
614
  if doc_id is None:
@@ -687,7 +679,7 @@ class LightRAG:
687
  contents = {id_: doc for id_, doc in zip(ids, input)}
688
  else:
689
  # Clean input text and remove duplicates
690
- input = list(set(self.clean_text(doc) for doc in input))
691
  # Generate contents dict of MD5 hash IDs and documents
692
  contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
693
 
@@ -703,7 +695,7 @@ class LightRAG:
703
  new_docs: dict[str, Any] = {
704
  id_: {
705
  "content": content,
706
- "content_summary": self._get_content_summary(content),
707
  "content_length": len(content),
708
  "status": DocStatus.PENDING,
709
  "created_at": datetime.now().isoformat(),
@@ -892,7 +884,9 @@ class LightRAG:
892
  self.chunks_vdb.upsert(chunks)
893
  )
894
  entity_relation_task = asyncio.create_task(
895
- self._process_entity_relation_graph(chunks)
 
 
896
  )
897
  full_docs_task = asyncio.create_task(
898
  self.full_docs.upsert(
@@ -1007,21 +1001,27 @@ class LightRAG:
1007
  pipeline_status["latest_message"] = log_message
1008
  pipeline_status["history_messages"].append(log_message)
1009
 
1010
- async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
 
 
1011
  try:
1012
  await extract_entities(
1013
  chunk,
1014
  knowledge_graph_inst=self.chunk_entity_relation_graph,
1015
  entity_vdb=self.entities_vdb,
1016
  relationships_vdb=self.relationships_vdb,
1017
- llm_response_cache=self.llm_response_cache,
1018
  global_config=asdict(self),
 
 
 
1019
  )
1020
  except Exception as e:
1021
  logger.error("Failed to extract entities and relationships")
1022
  raise e
1023
 
1024
- async def _insert_done(self) -> None:
 
 
1025
  tasks = [
1026
  cast(StorageNameSpace, storage_inst).index_done_callback()
1027
  for storage_inst in [ # type: ignore
@@ -1040,12 +1040,10 @@ class LightRAG:
1040
  log_message = "All Insert done"
1041
  logger.info(log_message)
1042
 
1043
- # 获取 pipeline_status 并更新 latest_message history_messages
1044
- from lightrag.kg.shared_storage import get_namespace_data
1045
-
1046
- pipeline_status = await get_namespace_data("pipeline_status")
1047
- pipeline_status["latest_message"] = log_message
1048
- pipeline_status["history_messages"].append(log_message)
1049
 
1050
  def insert_custom_kg(
1051
  self, custom_kg: dict[str, Any], full_doc_id: str = None
@@ -1062,7 +1060,7 @@ class LightRAG:
1062
  all_chunks_data: dict[str, dict[str, str]] = {}
1063
  chunk_to_source_map: dict[str, str] = {}
1064
  for chunk_data in custom_kg.get("chunks", []):
1065
- chunk_content = self.clean_text(chunk_data["content"])
1066
  source_id = chunk_data["source_id"]
1067
  tokens = len(
1068
  encode_string_by_tiktoken(
@@ -1260,16 +1258,7 @@ class LightRAG:
1260
  self.text_chunks,
1261
  param,
1262
  asdict(self),
1263
- hashing_kv=self.llm_response_cache
1264
- if self.llm_response_cache
1265
- and hasattr(self.llm_response_cache, "global_config")
1266
- else self.key_string_value_json_storage_cls(
1267
- namespace=make_namespace(
1268
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1269
- ),
1270
- global_config=asdict(self),
1271
- embedding_func=self.embedding_func,
1272
- ),
1273
  system_prompt=system_prompt,
1274
  )
1275
  elif param.mode == "naive":
@@ -1279,16 +1268,7 @@ class LightRAG:
1279
  self.text_chunks,
1280
  param,
1281
  asdict(self),
1282
- hashing_kv=self.llm_response_cache
1283
- if self.llm_response_cache
1284
- and hasattr(self.llm_response_cache, "global_config")
1285
- else self.key_string_value_json_storage_cls(
1286
- namespace=make_namespace(
1287
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1288
- ),
1289
- global_config=asdict(self),
1290
- embedding_func=self.embedding_func,
1291
- ),
1292
  system_prompt=system_prompt,
1293
  )
1294
  elif param.mode == "mix":
@@ -1301,16 +1281,7 @@ class LightRAG:
1301
  self.text_chunks,
1302
  param,
1303
  asdict(self),
1304
- hashing_kv=self.llm_response_cache
1305
- if self.llm_response_cache
1306
- and hasattr(self.llm_response_cache, "global_config")
1307
- else self.key_string_value_json_storage_cls(
1308
- namespace=make_namespace(
1309
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1310
- ),
1311
- global_config=asdict(self),
1312
- embedding_func=self.embedding_func,
1313
- ),
1314
  system_prompt=system_prompt,
1315
  )
1316
  else:
@@ -1322,8 +1293,17 @@ class LightRAG:
1322
  self, query: str, prompt: str, param: QueryParam = QueryParam()
1323
  ):
1324
  """
1325
- 1. Extract keywords from the 'query' using new function in operate.py.
1326
- 2. Then run the standard aquery() flow with the final prompt (formatted_question).
 
 
 
 
 
 
 
 
 
1327
  """
1328
  loop = always_get_an_event_loop()
1329
  return loop.run_until_complete(
@@ -1334,100 +1314,29 @@ class LightRAG:
1334
  self, query: str, prompt: str, param: QueryParam = QueryParam()
1335
  ) -> str | AsyncIterator[str]:
1336
  """
1337
- 1. Calls extract_keywords_only to get HL/LL keywords from 'query'.
1338
- 2. Then calls kg_query(...) or naive_query(...), etc. as the main query, while also injecting the newly extracted keywords if needed.
 
 
 
 
 
 
 
1339
  """
1340
- # ---------------------
1341
- # STEP 1: Keyword Extraction
1342
- # ---------------------
1343
- hl_keywords, ll_keywords = await extract_keywords_only(
1344
- text=query,
1345
  param=param,
 
 
 
 
 
1346
  global_config=asdict(self),
1347
- hashing_kv=self.llm_response_cache
1348
- or self.key_string_value_json_storage_cls(
1349
- namespace=make_namespace(
1350
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1351
- ),
1352
- global_config=asdict(self),
1353
- embedding_func=self.embedding_func,
1354
- ),
1355
  )
1356
 
1357
- param.hl_keywords = hl_keywords
1358
- param.ll_keywords = ll_keywords
1359
-
1360
- # ---------------------
1361
- # STEP 2: Final Query Logic
1362
- # ---------------------
1363
-
1364
- # Create a new string with the prompt and the keywords
1365
- ll_keywords_str = ", ".join(ll_keywords)
1366
- hl_keywords_str = ", ".join(hl_keywords)
1367
- formatted_question = f"{prompt}\n\n### Keywords:\nHigh-level: {hl_keywords_str}\nLow-level: {ll_keywords_str}\n\n### Query:\n{query}"
1368
-
1369
- if param.mode in ["local", "global", "hybrid"]:
1370
- response = await kg_query_with_keywords(
1371
- formatted_question,
1372
- self.chunk_entity_relation_graph,
1373
- self.entities_vdb,
1374
- self.relationships_vdb,
1375
- self.text_chunks,
1376
- param,
1377
- asdict(self),
1378
- hashing_kv=self.llm_response_cache
1379
- if self.llm_response_cache
1380
- and hasattr(self.llm_response_cache, "global_config")
1381
- else self.key_string_value_json_storage_cls(
1382
- namespace=make_namespace(
1383
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1384
- ),
1385
- global_config=asdict(self),
1386
- embedding_func=self.embedding_func,
1387
- ),
1388
- )
1389
- elif param.mode == "naive":
1390
- response = await naive_query(
1391
- formatted_question,
1392
- self.chunks_vdb,
1393
- self.text_chunks,
1394
- param,
1395
- asdict(self),
1396
- hashing_kv=self.llm_response_cache
1397
- if self.llm_response_cache
1398
- and hasattr(self.llm_response_cache, "global_config")
1399
- else self.key_string_value_json_storage_cls(
1400
- namespace=make_namespace(
1401
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1402
- ),
1403
- global_config=asdict(self),
1404
- embedding_func=self.embedding_func,
1405
- ),
1406
- )
1407
- elif param.mode == "mix":
1408
- response = await mix_kg_vector_query(
1409
- formatted_question,
1410
- self.chunk_entity_relation_graph,
1411
- self.entities_vdb,
1412
- self.relationships_vdb,
1413
- self.chunks_vdb,
1414
- self.text_chunks,
1415
- param,
1416
- asdict(self),
1417
- hashing_kv=self.llm_response_cache
1418
- if self.llm_response_cache
1419
- and hasattr(self.llm_response_cache, "global_config")
1420
- else self.key_string_value_json_storage_cls(
1421
- namespace=make_namespace(
1422
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
1423
- ),
1424
- global_config=asdict(self),
1425
- embedding_func=self.embedding_func,
1426
- ),
1427
- )
1428
- else:
1429
- raise ValueError(f"Unknown mode {param.mode}")
1430
-
1431
  await self._query_done()
1432
  return response
1433
 
@@ -1525,21 +1434,6 @@ class LightRAG:
1525
  ]
1526
  )
1527
 
1528
- def _get_content_summary(self, content: str, max_length: int = 100) -> str:
1529
- """Get summary of document content
1530
-
1531
- Args:
1532
- content: Original document content
1533
- max_length: Maximum length of summary
1534
-
1535
- Returns:
1536
- Truncated content with ellipsis if needed
1537
- """
1538
- content = content.strip()
1539
- if len(content) <= max_length:
1540
- return content
1541
- return content[:max_length] + "..."
1542
-
1543
  async def get_processing_status(self) -> dict[str, int]:
1544
  """Get current document processing status counts
1545
 
@@ -1816,19 +1710,7 @@ class LightRAG:
1816
  async def get_entity_info(
1817
  self, entity_name: str, include_vector_data: bool = False
1818
  ) -> dict[str, str | None | dict[str, str]]:
1819
- """Get detailed information of an entity
1820
-
1821
- Args:
1822
- entity_name: Entity name (no need for quotes)
1823
- include_vector_data: Whether to include data from the vector database
1824
-
1825
- Returns:
1826
- dict: A dictionary containing entity information, including:
1827
- - entity_name: Entity name
1828
- - source_id: Source document ID
1829
- - graph_data: Complete node data from the graph database
1830
- - vector_data: (optional) Data from the vector database
1831
- """
1832
 
1833
  # Get information from the graph
1834
  node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
@@ -1843,29 +1725,15 @@ class LightRAG:
1843
  # Optional: Get vector database information
1844
  if include_vector_data:
1845
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
1846
- vector_data = self.entities_vdb._client.get([entity_id])
1847
- result["vector_data"] = vector_data[0] if vector_data else None
1848
 
1849
  return result
1850
 
1851
  async def get_relation_info(
1852
  self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
1853
  ) -> dict[str, str | None | dict[str, str]]:
1854
- """Get detailed information of a relationship
1855
-
1856
- Args:
1857
- src_entity: Source entity name (no need for quotes)
1858
- tgt_entity: Target entity name (no need for quotes)
1859
- include_vector_data: Whether to include data from the vector database
1860
-
1861
- Returns:
1862
- dict: A dictionary containing relationship information, including:
1863
- - src_entity: Source entity name
1864
- - tgt_entity: Target entity name
1865
- - source_id: Source document ID
1866
- - graph_data: Complete edge data from the graph database
1867
- - vector_data: (optional) Data from the vector database
1868
- """
1869
 
1870
  # Get information from the graph
1871
  edge_data = await self.chunk_entity_relation_graph.get_edge(
@@ -1883,8 +1751,8 @@ class LightRAG:
1883
  # Optional: Get vector database information
1884
  if include_vector_data:
1885
  rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
1886
- vector_data = self.relationships_vdb._client.get([rel_id])
1887
- result["vector_data"] = vector_data[0] if vector_data else None
1888
 
1889
  return result
1890
 
@@ -2682,6 +2550,12 @@ class LightRAG:
2682
 
2683
  # 9. Delete source entities
2684
  for entity_name in source_entities:
 
 
 
 
 
 
2685
  # Delete entity node from knowledge graph
2686
  await self.chunk_entity_relation_graph.delete_node(entity_name)
2687
 
 
30
  from .operate import (
31
  chunking_by_token_size,
32
  extract_entities,
 
33
  kg_query,
 
34
  mix_kg_vector_query,
35
  naive_query,
36
+ query_with_keywords,
37
  )
38
  from .prompt import GRAPH_FIELD_SEP, PROMPTS
39
  from .utils import (
 
44
  encode_string_by_tiktoken,
45
  lazy_external_import,
46
  limit_async_func_call,
47
+ get_content_summary,
48
+ clean_text,
49
+ check_storage_env_vars,
50
  logger,
51
  )
52
  from .types import KnowledgeGraph
 
311
  # Verify storage implementation compatibility
312
  verify_storage_implementation(storage_type, storage_name)
313
  # Check environment variables
314
+ check_storage_env_vars(storage_name)
315
 
316
  # Ensure vector_db_storage_cls_kwargs has required fields
317
  self.vector_db_storage_cls_kwargs = {
 
356
  namespace=make_namespace(
357
  self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
358
  ),
359
+ global_config=asdict(
360
+ self
361
+ ), # Add global_config to ensure cache works properly
362
  embedding_func=self.embedding_func,
363
  )
364
 
 
409
  embedding_func=None,
410
  )
411
 
412
+ # Directly use llm_response_cache, don't create a new object
413
+ hashing_kv = self.llm_response_cache
 
 
 
 
 
 
 
 
 
 
414
 
415
  self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
416
  partial(
 
538
  storage_class = lazy_external_import(import_path, storage_name)
539
  return storage_class
540
 
 
 
 
 
 
541
  def insert(
542
  self,
543
  input: str | list[str],
 
580
  split_by_character, split_by_character_only
581
  )
582
 
583
+ # TODO: deprecated, use insert instead
584
  def insert_custom_chunks(
585
  self,
586
  full_text: str,
 
592
  self.ainsert_custom_chunks(full_text, text_chunks, doc_id)
593
  )
594
 
595
+ # TODO: deprecated, use ainsert instead
596
  async def ainsert_custom_chunks(
597
  self, full_text: str, text_chunks: list[str], doc_id: str | None = None
598
  ) -> None:
599
  update_storage = False
600
  try:
601
  # Clean input texts
602
+ full_text = clean_text(full_text)
603
+ text_chunks = [clean_text(chunk) for chunk in text_chunks]
604
 
605
  # Process cleaned texts
606
  if doc_id is None:
 
679
  contents = {id_: doc for id_, doc in zip(ids, input)}
680
  else:
681
  # Clean input text and remove duplicates
682
+ input = list(set(clean_text(doc) for doc in input))
683
  # Generate contents dict of MD5 hash IDs and documents
684
  contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
685
 
 
695
  new_docs: dict[str, Any] = {
696
  id_: {
697
  "content": content,
698
+ "content_summary": get_content_summary(content),
699
  "content_length": len(content),
700
  "status": DocStatus.PENDING,
701
  "created_at": datetime.now().isoformat(),
 
884
  self.chunks_vdb.upsert(chunks)
885
  )
886
  entity_relation_task = asyncio.create_task(
887
+ self._process_entity_relation_graph(
888
+ chunks, pipeline_status, pipeline_status_lock
889
+ )
890
  )
891
  full_docs_task = asyncio.create_task(
892
  self.full_docs.upsert(
 
1001
  pipeline_status["latest_message"] = log_message
1002
  pipeline_status["history_messages"].append(log_message)
1003
 
1004
+ async def _process_entity_relation_graph(
1005
+ self, chunk: dict[str, Any], pipeline_status=None, pipeline_status_lock=None
1006
+ ) -> None:
1007
  try:
1008
  await extract_entities(
1009
  chunk,
1010
  knowledge_graph_inst=self.chunk_entity_relation_graph,
1011
  entity_vdb=self.entities_vdb,
1012
  relationships_vdb=self.relationships_vdb,
 
1013
  global_config=asdict(self),
1014
+ pipeline_status=pipeline_status,
1015
+ pipeline_status_lock=pipeline_status_lock,
1016
+ llm_response_cache=self.llm_response_cache,
1017
  )
1018
  except Exception as e:
1019
  logger.error("Failed to extract entities and relationships")
1020
  raise e
1021
 
1022
+ async def _insert_done(
1023
+ self, pipeline_status=None, pipeline_status_lock=None
1024
+ ) -> None:
1025
  tasks = [
1026
  cast(StorageNameSpace, storage_inst).index_done_callback()
1027
  for storage_inst in [ # type: ignore
 
1040
  log_message = "All Insert done"
1041
  logger.info(log_message)
1042
 
1043
+ if pipeline_status is not None and pipeline_status_lock is not None:
1044
+ async with pipeline_status_lock:
1045
+ pipeline_status["latest_message"] = log_message
1046
+ pipeline_status["history_messages"].append(log_message)
 
 
1047
 
1048
  def insert_custom_kg(
1049
  self, custom_kg: dict[str, Any], full_doc_id: str = None
 
1060
  all_chunks_data: dict[str, dict[str, str]] = {}
1061
  chunk_to_source_map: dict[str, str] = {}
1062
  for chunk_data in custom_kg.get("chunks", []):
1063
+ chunk_content = clean_text(chunk_data["content"])
1064
  source_id = chunk_data["source_id"]
1065
  tokens = len(
1066
  encode_string_by_tiktoken(
 
1258
  self.text_chunks,
1259
  param,
1260
  asdict(self),
1261
+ hashing_kv=self.llm_response_cache, # Directly use llm_response_cache
 
 
 
 
 
 
 
 
 
1262
  system_prompt=system_prompt,
1263
  )
1264
  elif param.mode == "naive":
 
1268
  self.text_chunks,
1269
  param,
1270
  asdict(self),
1271
+ hashing_kv=self.llm_response_cache, # Directly use llm_response_cache
 
 
 
 
 
 
 
 
 
1272
  system_prompt=system_prompt,
1273
  )
1274
  elif param.mode == "mix":
 
1281
  self.text_chunks,
1282
  param,
1283
  asdict(self),
1284
+ hashing_kv=self.llm_response_cache, # Directly use llm_response_cache
 
 
 
 
 
 
 
 
 
1285
  system_prompt=system_prompt,
1286
  )
1287
  else:
 
1293
  self, query: str, prompt: str, param: QueryParam = QueryParam()
1294
  ):
1295
  """
1296
+ Query with separate keyword extraction step.
1297
+
1298
+ This method extracts keywords from the query first, then uses them for the query.
1299
+
1300
+ Args:
1301
+ query: User query
1302
+ prompt: Additional prompt for the query
1303
+ param: Query parameters
1304
+
1305
+ Returns:
1306
+ Query response
1307
  """
1308
  loop = always_get_an_event_loop()
1309
  return loop.run_until_complete(
 
1314
  self, query: str, prompt: str, param: QueryParam = QueryParam()
1315
  ) -> str | AsyncIterator[str]:
1316
  """
1317
+ Async version of query_with_separate_keyword_extraction.
1318
+
1319
+ Args:
1320
+ query: User query
1321
+ prompt: Additional prompt for the query
1322
+ param: Query parameters
1323
+
1324
+ Returns:
1325
+ Query response or async iterator
1326
  """
1327
+ response = await query_with_keywords(
1328
+ query=query,
1329
+ prompt=prompt,
 
 
1330
  param=param,
1331
+ knowledge_graph_inst=self.chunk_entity_relation_graph,
1332
+ entities_vdb=self.entities_vdb,
1333
+ relationships_vdb=self.relationships_vdb,
1334
+ chunks_vdb=self.chunks_vdb,
1335
+ text_chunks_db=self.text_chunks,
1336
  global_config=asdict(self),
1337
+ hashing_kv=self.llm_response_cache,
 
 
 
 
 
 
 
1338
  )
1339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1340
  await self._query_done()
1341
  return response
1342
 
 
1434
  ]
1435
  )
1436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1437
  async def get_processing_status(self) -> dict[str, int]:
1438
  """Get current document processing status counts
1439
 
 
1710
  async def get_entity_info(
1711
  self, entity_name: str, include_vector_data: bool = False
1712
  ) -> dict[str, str | None | dict[str, str]]:
1713
+ """Get detailed information of an entity"""
 
 
 
 
 
 
 
 
 
 
 
 
1714
 
1715
  # Get information from the graph
1716
  node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
 
1725
  # Optional: Get vector database information
1726
  if include_vector_data:
1727
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
1728
+ vector_data = await self.entities_vdb.get_by_id(entity_id)
1729
+ result["vector_data"] = vector_data
1730
 
1731
  return result
1732
 
1733
  async def get_relation_info(
1734
  self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
1735
  ) -> dict[str, str | None | dict[str, str]]:
1736
+ """Get detailed information of a relationship"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1737
 
1738
  # Get information from the graph
1739
  edge_data = await self.chunk_entity_relation_graph.get_edge(
 
1751
  # Optional: Get vector database information
1752
  if include_vector_data:
1753
  rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
1754
+ vector_data = await self.relationships_vdb.get_by_id(rel_id)
1755
+ result["vector_data"] = vector_data
1756
 
1757
  return result
1758
 
 
2550
 
2551
  # 9. Delete source entities
2552
  for entity_name in source_entities:
2553
+ if entity_name == target_entity:
2554
+ logger.info(
2555
+ f"Skipping deletion of '{entity_name}' as it's also the target entity"
2556
+ )
2557
+ continue
2558
+
2559
  # Delete entity node from knowledge graph
2560
  await self.chunk_entity_relation_graph.delete_node(entity_name)
2561
 
lightrag/llm/azure_openai.py CHANGED
@@ -55,6 +55,7 @@ async def azure_openai_complete_if_cache(
55
 
56
  openai_async_client = AsyncAzureOpenAI(
57
  azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
 
58
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
59
  api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
60
  )
@@ -136,6 +137,7 @@ async def azure_openai_embed(
136
 
137
  openai_async_client = AsyncAzureOpenAI(
138
  azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
 
139
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
140
  api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
141
  )
 
55
 
56
  openai_async_client = AsyncAzureOpenAI(
57
  azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
58
+ azure_deployment=model,
59
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
60
  api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
61
  )
 
137
 
138
  openai_async_client = AsyncAzureOpenAI(
139
  azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
140
+ azure_deployment=model,
141
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
142
  api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
143
  )
lightrag/operate.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
  import asyncio
4
  import json
5
  import re
 
6
  from typing import Any, AsyncIterator
7
  from collections import Counter, defaultdict
8
 
@@ -140,18 +141,36 @@ async def _handle_single_entity_extraction(
140
  ):
141
  if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
142
  return None
143
- # add this record as a node in the G
 
144
  entity_name = clean_str(record_attributes[1]).strip('"')
145
  if not entity_name.strip():
 
 
 
146
  return None
 
 
147
  entity_type = clean_str(record_attributes[2]).strip('"')
 
 
 
 
 
 
 
148
  entity_description = clean_str(record_attributes[3]).strip('"')
149
- entity_source_id = chunk_key
 
 
 
 
 
150
  return dict(
151
  entity_name=entity_name,
152
  entity_type=entity_type,
153
  description=entity_description,
154
- source_id=entity_source_id,
155
  metadata={"created_at": time.time()},
156
  )
157
 
@@ -220,6 +239,7 @@ async def _merge_nodes_then_upsert(
220
  entity_name, description, global_config
221
  )
222
  node_data = dict(
 
223
  entity_type=entity_type,
224
  description=description,
225
  source_id=source_id,
@@ -301,6 +321,7 @@ async def _merge_edges_then_upsert(
301
  await knowledge_graph_inst.upsert_node(
302
  need_insert_id,
303
  node_data={
 
304
  "source_id": source_id,
305
  "description": description,
306
  "entity_type": "UNKNOWN",
@@ -337,11 +358,10 @@ async def extract_entities(
337
  entity_vdb: BaseVectorStorage,
338
  relationships_vdb: BaseVectorStorage,
339
  global_config: dict[str, str],
 
 
340
  llm_response_cache: BaseKVStorage | None = None,
341
  ) -> None:
342
- from lightrag.kg.shared_storage import get_namespace_data
343
-
344
- pipeline_status = await get_namespace_data("pipeline_status")
345
  use_llm_func: callable = global_config["llm_model_func"]
346
  entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
347
  enable_llm_cache_for_entity_extract: bool = global_config[
@@ -400,6 +420,7 @@ async def extract_entities(
400
  else:
401
  _prompt = input_text
402
 
 
403
  arg_hash = compute_args_hash(_prompt)
404
  cached_return, _1, _2, _3 = await handle_cache(
405
  llm_response_cache,
@@ -407,7 +428,6 @@ async def extract_entities(
407
  _prompt,
408
  "default",
409
  cache_type="extract",
410
- force_llm_cache=True,
411
  )
412
  if cached_return:
413
  logger.debug(f"Found cache for {arg_hash}")
@@ -436,30 +456,91 @@ async def extract_entities(
436
  else:
437
  return await use_llm_func(input_text)
438
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
440
- """ "Prpocess a single chunk
441
  Args:
442
  chunk_key_dp (tuple[str, TextChunkSchema]):
443
- ("chunck-xxxxxx", {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int})
444
  """
445
  nonlocal processed_chunks
446
  chunk_key = chunk_key_dp[0]
447
  chunk_dp = chunk_key_dp[1]
448
  content = chunk_dp["content"]
449
- # hint_prompt = entity_extract_prompt.format(**context_base, input_text=content)
 
450
  hint_prompt = entity_extract_prompt.format(
451
  **context_base, input_text="{input_text}"
452
  ).format(**context_base, input_text=content)
453
 
454
  final_result = await _user_llm_func_with_cache(hint_prompt)
455
  history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
 
 
 
 
 
 
 
456
  for now_glean_index in range(entity_extract_max_gleaning):
457
  glean_result = await _user_llm_func_with_cache(
458
  continue_prompt, history_messages=history
459
  )
460
 
461
  history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
462
- final_result += glean_result
 
 
 
 
 
 
 
 
 
 
 
463
  if now_glean_index == entity_extract_max_gleaning - 1:
464
  break
465
 
@@ -470,42 +551,15 @@ async def extract_entities(
470
  if if_loop_result != "yes":
471
  break
472
 
473
- records = split_string_by_multi_markers(
474
- final_result,
475
- [context_base["record_delimiter"], context_base["completion_delimiter"]],
476
- )
477
-
478
- maybe_nodes = defaultdict(list)
479
- maybe_edges = defaultdict(list)
480
- for record in records:
481
- record = re.search(r"\((.*)\)", record)
482
- if record is None:
483
- continue
484
- record = record.group(1)
485
- record_attributes = split_string_by_multi_markers(
486
- record, [context_base["tuple_delimiter"]]
487
- )
488
- if_entities = await _handle_single_entity_extraction(
489
- record_attributes, chunk_key
490
- )
491
- if if_entities is not None:
492
- maybe_nodes[if_entities["entity_name"]].append(if_entities)
493
- continue
494
-
495
- if_relation = await _handle_single_relationship_extraction(
496
- record_attributes, chunk_key
497
- )
498
- if if_relation is not None:
499
- maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
500
- if_relation
501
- )
502
  processed_chunks += 1
503
  entities_count = len(maybe_nodes)
504
  relations_count = len(maybe_edges)
505
  log_message = f" Chunk {processed_chunks}/{total_chunks}: extracted {entities_count} entities and {relations_count} relationships (deduplicated)"
506
  logger.info(log_message)
507
- pipeline_status["latest_message"] = log_message
508
- pipeline_status["history_messages"].append(log_message)
 
 
509
  return dict(maybe_nodes), dict(maybe_edges)
510
 
511
  tasks = [_process_single_content(c) for c in ordered_chunks]
@@ -519,42 +573,58 @@ async def extract_entities(
519
  for k, v in m_edges.items():
520
  maybe_edges[tuple(sorted(k))].extend(v)
521
 
522
- all_entities_data = await asyncio.gather(
523
- *[
524
- _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
525
- for k, v in maybe_nodes.items()
526
- ]
527
- )
528
 
529
- all_relationships_data = await asyncio.gather(
530
- *[
531
- _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
532
- for k, v in maybe_edges.items()
533
- ]
534
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
535
 
536
  if not (all_entities_data or all_relationships_data):
537
  log_message = "Didn't extract any entities and relationships."
538
  logger.info(log_message)
539
- pipeline_status["latest_message"] = log_message
540
- pipeline_status["history_messages"].append(log_message)
 
 
541
  return
542
 
543
  if not all_entities_data:
544
  log_message = "Didn't extract any entities"
545
  logger.info(log_message)
546
- pipeline_status["latest_message"] = log_message
547
- pipeline_status["history_messages"].append(log_message)
 
 
548
  if not all_relationships_data:
549
  log_message = "Didn't extract any relationships"
550
  logger.info(log_message)
551
- pipeline_status["latest_message"] = log_message
552
- pipeline_status["history_messages"].append(log_message)
 
 
553
 
554
  log_message = f"Extracted {len(all_entities_data)} entities and {len(all_relationships_data)} relationships (deduplicated)"
555
  logger.info(log_message)
556
- pipeline_status["latest_message"] = log_message
557
- pipeline_status["history_messages"].append(log_message)
 
 
558
  verbose_debug(
559
  f"New entities:{all_entities_data}, relationships:{all_relationships_data}"
560
  )
@@ -1020,6 +1090,7 @@ async def _build_query_context(
1020
  text_chunks_db: BaseKVStorage,
1021
  query_param: QueryParam,
1022
  ):
 
1023
  if query_param.mode == "local":
1024
  entities_context, relations_context, text_units_context = await _get_node_data(
1025
  ll_keywords,
@@ -1845,3 +1916,90 @@ async def kg_query_with_keywords(
1845
  )
1846
 
1847
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import asyncio
4
  import json
5
  import re
6
+ import os
7
  from typing import Any, AsyncIterator
8
  from collections import Counter, defaultdict
9
 
 
141
  ):
142
  if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
143
  return None
144
+
145
+ # Clean and validate entity name
146
  entity_name = clean_str(record_attributes[1]).strip('"')
147
  if not entity_name.strip():
148
+ logger.warning(
149
+ f"Entity extraction error: empty entity name in: {record_attributes}"
150
+ )
151
  return None
152
+
153
+ # Clean and validate entity type
154
  entity_type = clean_str(record_attributes[2]).strip('"')
155
+ if not entity_type.strip() or entity_type.startswith('("'):
156
+ logger.warning(
157
+ f"Entity extraction error: invalid entity type in: {record_attributes}"
158
+ )
159
+ return None
160
+
161
+ # Clean and validate description
162
  entity_description = clean_str(record_attributes[3]).strip('"')
163
+ if not entity_description.strip():
164
+ logger.warning(
165
+ f"Entity extraction error: empty description for entity '{entity_name}' of type '{entity_type}'"
166
+ )
167
+ return None
168
+
169
  return dict(
170
  entity_name=entity_name,
171
  entity_type=entity_type,
172
  description=entity_description,
173
+ source_id=chunk_key,
174
  metadata={"created_at": time.time()},
175
  )
176
 
 
239
  entity_name, description, global_config
240
  )
241
  node_data = dict(
242
+ entity_id=entity_name,
243
  entity_type=entity_type,
244
  description=description,
245
  source_id=source_id,
 
321
  await knowledge_graph_inst.upsert_node(
322
  need_insert_id,
323
  node_data={
324
+ "entity_id": need_insert_id,
325
  "source_id": source_id,
326
  "description": description,
327
  "entity_type": "UNKNOWN",
 
358
  entity_vdb: BaseVectorStorage,
359
  relationships_vdb: BaseVectorStorage,
360
  global_config: dict[str, str],
361
+ pipeline_status: dict = None,
362
+ pipeline_status_lock=None,
363
  llm_response_cache: BaseKVStorage | None = None,
364
  ) -> None:
 
 
 
365
  use_llm_func: callable = global_config["llm_model_func"]
366
  entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
367
  enable_llm_cache_for_entity_extract: bool = global_config[
 
420
  else:
421
  _prompt = input_text
422
 
423
+ # TODO: add cache_type="extract"
424
  arg_hash = compute_args_hash(_prompt)
425
  cached_return, _1, _2, _3 = await handle_cache(
426
  llm_response_cache,
 
428
  _prompt,
429
  "default",
430
  cache_type="extract",
 
431
  )
432
  if cached_return:
433
  logger.debug(f"Found cache for {arg_hash}")
 
456
  else:
457
  return await use_llm_func(input_text)
458
 
459
+ async def _process_extraction_result(result: str, chunk_key: str):
460
+ """Process a single extraction result (either initial or gleaning)
461
+ Args:
462
+ result (str): The extraction result to process
463
+ chunk_key (str): The chunk key for source tracking
464
+ Returns:
465
+ tuple: (nodes_dict, edges_dict) containing the extracted entities and relationships
466
+ """
467
+ maybe_nodes = defaultdict(list)
468
+ maybe_edges = defaultdict(list)
469
+
470
+ records = split_string_by_multi_markers(
471
+ result,
472
+ [context_base["record_delimiter"], context_base["completion_delimiter"]],
473
+ )
474
+
475
+ for record in records:
476
+ record = re.search(r"\((.*)\)", record)
477
+ if record is None:
478
+ continue
479
+ record = record.group(1)
480
+ record_attributes = split_string_by_multi_markers(
481
+ record, [context_base["tuple_delimiter"]]
482
+ )
483
+
484
+ if_entities = await _handle_single_entity_extraction(
485
+ record_attributes, chunk_key
486
+ )
487
+ if if_entities is not None:
488
+ maybe_nodes[if_entities["entity_name"]].append(if_entities)
489
+ continue
490
+
491
+ if_relation = await _handle_single_relationship_extraction(
492
+ record_attributes, chunk_key
493
+ )
494
+ if if_relation is not None:
495
+ maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
496
+ if_relation
497
+ )
498
+
499
+ return maybe_nodes, maybe_edges
500
+
501
  async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
502
+ """Process a single chunk
503
  Args:
504
  chunk_key_dp (tuple[str, TextChunkSchema]):
505
+ ("chunk-xxxxxx", {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int})
506
  """
507
  nonlocal processed_chunks
508
  chunk_key = chunk_key_dp[0]
509
  chunk_dp = chunk_key_dp[1]
510
  content = chunk_dp["content"]
511
+
512
+ # Get initial extraction
513
  hint_prompt = entity_extract_prompt.format(
514
  **context_base, input_text="{input_text}"
515
  ).format(**context_base, input_text=content)
516
 
517
  final_result = await _user_llm_func_with_cache(hint_prompt)
518
  history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
519
+
520
+ # Process initial extraction
521
+ maybe_nodes, maybe_edges = await _process_extraction_result(
522
+ final_result, chunk_key
523
+ )
524
+
525
+ # Process additional gleaning results
526
  for now_glean_index in range(entity_extract_max_gleaning):
527
  glean_result = await _user_llm_func_with_cache(
528
  continue_prompt, history_messages=history
529
  )
530
 
531
  history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
532
+
533
+ # Process gleaning result separately
534
+ glean_nodes, glean_edges = await _process_extraction_result(
535
+ glean_result, chunk_key
536
+ )
537
+
538
+ # Merge results
539
+ for entity_name, entities in glean_nodes.items():
540
+ maybe_nodes[entity_name].extend(entities)
541
+ for edge_key, edges in glean_edges.items():
542
+ maybe_edges[edge_key].extend(edges)
543
+
544
  if now_glean_index == entity_extract_max_gleaning - 1:
545
  break
546
 
 
551
  if if_loop_result != "yes":
552
  break
553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  processed_chunks += 1
555
  entities_count = len(maybe_nodes)
556
  relations_count = len(maybe_edges)
557
  log_message = f" Chunk {processed_chunks}/{total_chunks}: extracted {entities_count} entities and {relations_count} relationships (deduplicated)"
558
  logger.info(log_message)
559
+ if pipeline_status is not None:
560
+ async with pipeline_status_lock:
561
+ pipeline_status["latest_message"] = log_message
562
+ pipeline_status["history_messages"].append(log_message)
563
  return dict(maybe_nodes), dict(maybe_edges)
564
 
565
  tasks = [_process_single_content(c) for c in ordered_chunks]
 
573
  for k, v in m_edges.items():
574
  maybe_edges[tuple(sorted(k))].extend(v)
575
 
576
+ from .kg.shared_storage import get_graph_db_lock
 
 
 
 
 
577
 
578
+ graph_db_lock = get_graph_db_lock(enable_logging=False)
579
+
580
+ # Ensure that nodes and edges are merged and upserted atomically
581
+ async with graph_db_lock:
582
+ all_entities_data = await asyncio.gather(
583
+ *[
584
+ _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
585
+ for k, v in maybe_nodes.items()
586
+ ]
587
+ )
588
+
589
+ all_relationships_data = await asyncio.gather(
590
+ *[
591
+ _merge_edges_then_upsert(
592
+ k[0], k[1], v, knowledge_graph_inst, global_config
593
+ )
594
+ for k, v in maybe_edges.items()
595
+ ]
596
+ )
597
 
598
  if not (all_entities_data or all_relationships_data):
599
  log_message = "Didn't extract any entities and relationships."
600
  logger.info(log_message)
601
+ if pipeline_status is not None:
602
+ async with pipeline_status_lock:
603
+ pipeline_status["latest_message"] = log_message
604
+ pipeline_status["history_messages"].append(log_message)
605
  return
606
 
607
  if not all_entities_data:
608
  log_message = "Didn't extract any entities"
609
  logger.info(log_message)
610
+ if pipeline_status is not None:
611
+ async with pipeline_status_lock:
612
+ pipeline_status["latest_message"] = log_message
613
+ pipeline_status["history_messages"].append(log_message)
614
  if not all_relationships_data:
615
  log_message = "Didn't extract any relationships"
616
  logger.info(log_message)
617
+ if pipeline_status is not None:
618
+ async with pipeline_status_lock:
619
+ pipeline_status["latest_message"] = log_message
620
+ pipeline_status["history_messages"].append(log_message)
621
 
622
  log_message = f"Extracted {len(all_entities_data)} entities and {len(all_relationships_data)} relationships (deduplicated)"
623
  logger.info(log_message)
624
+ if pipeline_status is not None:
625
+ async with pipeline_status_lock:
626
+ pipeline_status["latest_message"] = log_message
627
+ pipeline_status["history_messages"].append(log_message)
628
  verbose_debug(
629
  f"New entities:{all_entities_data}, relationships:{all_relationships_data}"
630
  )
 
1090
  text_chunks_db: BaseKVStorage,
1091
  query_param: QueryParam,
1092
  ):
1093
+ logger.info(f"Process {os.getpid()} buidling query context...")
1094
  if query_param.mode == "local":
1095
  entities_context, relations_context, text_units_context = await _get_node_data(
1096
  ll_keywords,
 
1916
  )
1917
 
1918
  return response
1919
+
1920
+
1921
+ async def query_with_keywords(
1922
+ query: str,
1923
+ prompt: str,
1924
+ param: QueryParam,
1925
+ knowledge_graph_inst: BaseGraphStorage,
1926
+ entities_vdb: BaseVectorStorage,
1927
+ relationships_vdb: BaseVectorStorage,
1928
+ chunks_vdb: BaseVectorStorage,
1929
+ text_chunks_db: BaseKVStorage,
1930
+ global_config: dict[str, str],
1931
+ hashing_kv: BaseKVStorage | None = None,
1932
+ ) -> str | AsyncIterator[str]:
1933
+ """
1934
+ Extract keywords from the query and then use them for retrieving information.
1935
+
1936
+ 1. Extracts high-level and low-level keywords from the query
1937
+ 2. Formats the query with the extracted keywords and prompt
1938
+ 3. Uses the appropriate query method based on param.mode
1939
+
1940
+ Args:
1941
+ query: The user's query
1942
+ prompt: Additional prompt to prepend to the query
1943
+ param: Query parameters
1944
+ knowledge_graph_inst: Knowledge graph storage
1945
+ entities_vdb: Entities vector database
1946
+ relationships_vdb: Relationships vector database
1947
+ chunks_vdb: Document chunks vector database
1948
+ text_chunks_db: Text chunks storage
1949
+ global_config: Global configuration
1950
+ hashing_kv: Cache storage
1951
+
1952
+ Returns:
1953
+ Query response or async iterator
1954
+ """
1955
+ # Extract keywords
1956
+ hl_keywords, ll_keywords = await extract_keywords_only(
1957
+ text=query,
1958
+ param=param,
1959
+ global_config=global_config,
1960
+ hashing_kv=hashing_kv,
1961
+ )
1962
+
1963
+ param.hl_keywords = hl_keywords
1964
+ param.ll_keywords = ll_keywords
1965
+
1966
+ # Create a new string with the prompt and the keywords
1967
+ ll_keywords_str = ", ".join(ll_keywords)
1968
+ hl_keywords_str = ", ".join(hl_keywords)
1969
+ formatted_question = f"{prompt}\n\n### Keywords:\nHigh-level: {hl_keywords_str}\nLow-level: {ll_keywords_str}\n\n### Query:\n{query}"
1970
+
1971
+ # Use appropriate query method based on mode
1972
+ if param.mode in ["local", "global", "hybrid"]:
1973
+ return await kg_query_with_keywords(
1974
+ formatted_question,
1975
+ knowledge_graph_inst,
1976
+ entities_vdb,
1977
+ relationships_vdb,
1978
+ text_chunks_db,
1979
+ param,
1980
+ global_config,
1981
+ hashing_kv=hashing_kv,
1982
+ )
1983
+ elif param.mode == "naive":
1984
+ return await naive_query(
1985
+ formatted_question,
1986
+ chunks_vdb,
1987
+ text_chunks_db,
1988
+ param,
1989
+ global_config,
1990
+ hashing_kv=hashing_kv,
1991
+ )
1992
+ elif param.mode == "mix":
1993
+ return await mix_kg_vector_query(
1994
+ formatted_question,
1995
+ knowledge_graph_inst,
1996
+ entities_vdb,
1997
+ relationships_vdb,
1998
+ chunks_vdb,
1999
+ text_chunks_db,
2000
+ param,
2001
+ global_config,
2002
+ hashing_kv=hashing_kv,
2003
+ )
2004
+ else:
2005
+ raise ValueError(f"Unknown mode {param.mode}")
lightrag/prompt.py CHANGED
@@ -236,7 +236,7 @@ Given the query and conversation history, list both high-level and low-level key
236
  ---Instructions---
237
 
238
  - Consider both the current query and relevant conversation history when extracting keywords
239
- - Output the keywords in JSON format
240
  - The JSON should have two keys:
241
  - "high_level_keywords" for overarching concepts or themes
242
  - "low_level_keywords" for specific entities or details
 
236
  ---Instructions---
237
 
238
  - Consider both the current query and relevant conversation history when extracting keywords
239
+ - Output the keywords in JSON format, it will be parsed by a JSON parser, do not add any extra content in output
240
  - The JSON should have two keys:
241
  - "high_level_keywords" for overarching concepts or themes
242
  - "low_level_keywords" for specific entities or details
lightrag/utils.py CHANGED
@@ -633,15 +633,15 @@ async def handle_cache(
633
  prompt,
634
  mode="default",
635
  cache_type=None,
636
- force_llm_cache=False,
637
  ):
638
  """Generic cache handling function"""
639
- if hashing_kv is None or not (
640
- force_llm_cache or hashing_kv.global_config.get("enable_llm_cache")
641
- ):
642
  return None, None, None, None
643
 
644
- if mode != "default":
 
 
 
645
  # Get embedding cache configuration
646
  embedding_cache_config = hashing_kv.global_config.get(
647
  "embedding_cache_config",
@@ -651,8 +651,7 @@ async def handle_cache(
651
  use_llm_check = embedding_cache_config.get("use_llm_check", False)
652
 
653
  quantized = min_val = max_val = None
654
- if is_embedding_cache_enabled:
655
- # Use embedding cache
656
  current_embedding = await hashing_kv.embedding_func([prompt])
657
  llm_model_func = hashing_kv.global_config.get("llm_model_func")
658
  quantized, min_val, max_val = quantize_embedding(current_embedding[0])
@@ -667,24 +666,29 @@ async def handle_cache(
667
  cache_type=cache_type,
668
  )
669
  if best_cached_response is not None:
670
- logger.info(f"Embedding cached hit(mode:{mode} type:{cache_type})")
671
  return best_cached_response, None, None, None
672
  else:
673
  # if caching keyword embedding is enabled, return the quantized embedding for saving it latter
674
- logger.info(f"Embedding cached missed(mode:{mode} type:{cache_type})")
675
  return None, quantized, min_val, max_val
676
 
677
- # For default mode or is_embedding_cache_enabled is False, use regular cache
678
- # default mode is for extract_entities or naive query
 
 
 
 
 
679
  if exists_func(hashing_kv, "get_by_mode_and_id"):
680
  mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
681
  else:
682
  mode_cache = await hashing_kv.get_by_id(mode) or {}
683
  if args_hash in mode_cache:
684
- logger.info(f"Non-embedding cached hit(mode:{mode} type:{cache_type})")
685
  return mode_cache[args_hash]["return"], None, None, None
686
 
687
- logger.info(f"Non-embedding cached missed(mode:{mode} type:{cache_type})")
688
  return None, None, None, None
689
 
690
 
@@ -701,9 +705,22 @@ class CacheData:
701
 
702
 
703
  async def save_to_cache(hashing_kv, cache_data: CacheData):
704
- if hashing_kv is None or hasattr(cache_data.content, "__aiter__"):
 
 
 
 
 
 
 
 
 
 
 
 
705
  return
706
 
 
707
  if exists_func(hashing_kv, "get_by_mode_and_id"):
708
  mode_cache = (
709
  await hashing_kv.get_by_mode_and_id(cache_data.mode, cache_data.args_hash)
@@ -712,6 +729,16 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
712
  else:
713
  mode_cache = await hashing_kv.get_by_id(cache_data.mode) or {}
714
 
 
 
 
 
 
 
 
 
 
 
715
  mode_cache[cache_data.args_hash] = {
716
  "return": cache_data.content,
717
  "cache_type": cache_data.cache_type,
@@ -726,6 +753,7 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
726
  "original_prompt": cache_data.prompt,
727
  }
728
 
 
729
  await hashing_kv.upsert({cache_data.mode: mode_cache})
730
 
731
 
@@ -862,3 +890,52 @@ def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any
862
  return cls(*args, **kwargs)
863
 
864
  return import_class
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  prompt,
634
  mode="default",
635
  cache_type=None,
 
636
  ):
637
  """Generic cache handling function"""
638
+ if hashing_kv is None:
 
 
639
  return None, None, None, None
640
 
641
+ if mode != "default": # handle cache for all type of query
642
+ if not hashing_kv.global_config.get("enable_llm_cache"):
643
+ return None, None, None, None
644
+
645
  # Get embedding cache configuration
646
  embedding_cache_config = hashing_kv.global_config.get(
647
  "embedding_cache_config",
 
651
  use_llm_check = embedding_cache_config.get("use_llm_check", False)
652
 
653
  quantized = min_val = max_val = None
654
+ if is_embedding_cache_enabled: # Use embedding simularity to match cache
 
655
  current_embedding = await hashing_kv.embedding_func([prompt])
656
  llm_model_func = hashing_kv.global_config.get("llm_model_func")
657
  quantized, min_val, max_val = quantize_embedding(current_embedding[0])
 
666
  cache_type=cache_type,
667
  )
668
  if best_cached_response is not None:
669
+ logger.debug(f"Embedding cached hit(mode:{mode} type:{cache_type})")
670
  return best_cached_response, None, None, None
671
  else:
672
  # if caching keyword embedding is enabled, return the quantized embedding for saving it latter
673
+ logger.debug(f"Embedding cached missed(mode:{mode} type:{cache_type})")
674
  return None, quantized, min_val, max_val
675
 
676
+ else: # handle cache for entity extraction
677
+ if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
678
+ return None, None, None, None
679
+
680
+ # Here is the conditions of code reaching this point:
681
+ # 1. All query mode: enable_llm_cache is True and embedding simularity is not enabled
682
+ # 2. Entity extract: enable_llm_cache_for_entity_extract is True
683
  if exists_func(hashing_kv, "get_by_mode_and_id"):
684
  mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
685
  else:
686
  mode_cache = await hashing_kv.get_by_id(mode) or {}
687
  if args_hash in mode_cache:
688
+ logger.debug(f"Non-embedding cached hit(mode:{mode} type:{cache_type})")
689
  return mode_cache[args_hash]["return"], None, None, None
690
 
691
+ logger.debug(f"Non-embedding cached missed(mode:{mode} type:{cache_type})")
692
  return None, None, None, None
693
 
694
 
 
705
 
706
 
707
  async def save_to_cache(hashing_kv, cache_data: CacheData):
708
+ """Save data to cache, with improved handling for streaming responses and duplicate content.
709
+
710
+ Args:
711
+ hashing_kv: The key-value storage for caching
712
+ cache_data: The cache data to save
713
+ """
714
+ # Skip if storage is None or content is a streaming response
715
+ if hashing_kv is None or not cache_data.content:
716
+ return
717
+
718
+ # If content is a streaming response, don't cache it
719
+ if hasattr(cache_data.content, "__aiter__"):
720
+ logger.debug("Streaming response detected, skipping cache")
721
  return
722
 
723
+ # Get existing cache data
724
  if exists_func(hashing_kv, "get_by_mode_and_id"):
725
  mode_cache = (
726
  await hashing_kv.get_by_mode_and_id(cache_data.mode, cache_data.args_hash)
 
729
  else:
730
  mode_cache = await hashing_kv.get_by_id(cache_data.mode) or {}
731
 
732
+ # Check if we already have identical content cached
733
+ if cache_data.args_hash in mode_cache:
734
+ existing_content = mode_cache[cache_data.args_hash].get("return")
735
+ if existing_content == cache_data.content:
736
+ logger.info(
737
+ f"Cache content unchanged for {cache_data.args_hash}, skipping update"
738
+ )
739
+ return
740
+
741
+ # Update cache with new content
742
  mode_cache[cache_data.args_hash] = {
743
  "return": cache_data.content,
744
  "cache_type": cache_data.cache_type,
 
753
  "original_prompt": cache_data.prompt,
754
  }
755
 
756
+ # Only upsert if there's actual new content
757
  await hashing_kv.upsert({cache_data.mode: mode_cache})
758
 
759
 
 
890
  return cls(*args, **kwargs)
891
 
892
  return import_class
893
+
894
+
895
+ def get_content_summary(content: str, max_length: int = 100) -> str:
896
+ """Get summary of document content
897
+
898
+ Args:
899
+ content: Original document content
900
+ max_length: Maximum length of summary
901
+
902
+ Returns:
903
+ Truncated content with ellipsis if needed
904
+ """
905
+ content = content.strip()
906
+ if len(content) <= max_length:
907
+ return content
908
+ return content[:max_length] + "..."
909
+
910
+
911
+ def clean_text(text: str) -> str:
912
+ """Clean text by removing null bytes (0x00) and whitespace
913
+
914
+ Args:
915
+ text: Input text to clean
916
+
917
+ Returns:
918
+ Cleaned text
919
+ """
920
+ return text.strip().replace("\x00", "")
921
+
922
+
923
+ def check_storage_env_vars(storage_name: str) -> None:
924
+ """Check if all required environment variables for storage implementation exist
925
+
926
+ Args:
927
+ storage_name: Storage implementation name
928
+
929
+ Raises:
930
+ ValueError: If required environment variables are missing
931
+ """
932
+ from lightrag.kg import STORAGE_ENV_REQUIREMENTS
933
+
934
+ required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])
935
+ missing_vars = [var for var in required_vars if var not in os.environ]
936
+
937
+ if missing_vars:
938
+ raise ValueError(
939
+ f"Storage implementation '{storage_name}' requires the following "
940
+ f"environment variables: {', '.join(missing_vars)}"
941
+ )
lightrag_webui/bun.lock CHANGED
@@ -34,11 +34,13 @@
34
  "cmdk": "^1.0.4",
35
  "graphology": "^0.26.0",
36
  "graphology-generators": "^0.11.2",
 
37
  "lucide-react": "^0.475.0",
38
  "minisearch": "^7.1.2",
39
  "react": "^19.0.0",
40
  "react-dom": "^19.0.0",
41
  "react-dropzone": "^14.3.6",
 
42
  "react-markdown": "^9.1.0",
43
  "react-number-format": "^5.4.3",
44
  "react-syntax-highlighter": "^15.6.1",
@@ -765,8 +767,12 @@
765
 
766
  "hoist-non-react-statics": ["hoist-non-react-statics@3.3.2", "", { "dependencies": { "react-is": "^16.7.0" } }, "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw=="],
767
 
 
 
768
  "html-url-attributes": ["html-url-attributes@3.0.1", "", {}, "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ=="],
769
 
 
 
770
  "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
771
 
772
  "import-fresh": ["import-fresh@3.3.1", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="],
@@ -1093,6 +1099,8 @@
1093
 
1094
  "react-dropzone": ["react-dropzone@14.3.6", "", { "dependencies": { "attr-accept": "^2.2.4", "file-selector": "^2.1.0", "prop-types": "^15.8.1" }, "peerDependencies": { "react": ">= 16.8 || 18.0.0" } }, "sha512-U792j+x0rcwH/U/Slv/OBNU/LGFYbDLHKKiJoPhNaOianayZevCt4Y5S0CraPssH/6/wT6xhKDfzdXUgCBS0HQ=="],
1095
 
 
 
1096
  "react-is": ["react-is@16.13.1", "", {}, "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="],
1097
 
1098
  "react-markdown": ["react-markdown@9.1.0", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "html-url-attributes": "^3.0.0", "mdast-util-to-hast": "^13.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" }, "peerDependencies": { "@types/react": ">=18", "react": ">=18" } }, "sha512-xaijuJB0kzGiUdG7nc2MOMDUDBWPyGAjZtUrow9XxUeua8IqeP+VlIfAZ3bphpcLTnSZXz6z9jcVC/TCwbfgdw=="],
@@ -1271,6 +1279,8 @@
1271
 
1272
  "vite": ["vite@6.1.1", "", { "dependencies": { "esbuild": "^0.24.2", "postcss": "^8.5.2", "rollup": "^4.30.1" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-4GgM54XrwRfrOp297aIYspIti66k56v16ZnqHvrIM7mG+HjDlAwS7p+Srr7J6fGvEdOJ5JcQ/D9T7HhtdXDTzA=="],
1273
 
 
 
1274
  "which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
1275
 
1276
  "which-boxed-primitive": ["which-boxed-primitive@1.1.1", "", { "dependencies": { "is-bigint": "^1.1.0", "is-boolean-object": "^1.2.1", "is-number-object": "^1.1.1", "is-string": "^1.1.1", "is-symbol": "^1.1.1" } }, "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA=="],
 
34
  "cmdk": "^1.0.4",
35
  "graphology": "^0.26.0",
36
  "graphology-generators": "^0.11.2",
37
+ "i18next": "^24.2.2",
38
  "lucide-react": "^0.475.0",
39
  "minisearch": "^7.1.2",
40
  "react": "^19.0.0",
41
  "react-dom": "^19.0.0",
42
  "react-dropzone": "^14.3.6",
43
+ "react-i18next": "^15.4.1",
44
  "react-markdown": "^9.1.0",
45
  "react-number-format": "^5.4.3",
46
  "react-syntax-highlighter": "^15.6.1",
 
767
 
768
  "hoist-non-react-statics": ["hoist-non-react-statics@3.3.2", "", { "dependencies": { "react-is": "^16.7.0" } }, "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw=="],
769
 
770
+ "html-parse-stringify": ["html-parse-stringify@3.0.1", "", { "dependencies": { "void-elements": "3.1.0" } }, "sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg=="],
771
+
772
  "html-url-attributes": ["html-url-attributes@3.0.1", "", {}, "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ=="],
773
 
774
+ "i18next": ["i18next@24.2.2", "", { "dependencies": { "@babel/runtime": "^7.23.2" }, "peerDependencies": { "typescript": "^5" }, "optionalPeers": ["typescript"] }, "sha512-NE6i86lBCKRYZa5TaUDkU5S4HFgLIEJRLr3Whf2psgaxBleQ2LC1YW1Vc+SCgkAW7VEzndT6al6+CzegSUHcTQ=="],
775
+
776
  "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
777
 
778
  "import-fresh": ["import-fresh@3.3.1", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="],
 
1099
 
1100
  "react-dropzone": ["react-dropzone@14.3.6", "", { "dependencies": { "attr-accept": "^2.2.4", "file-selector": "^2.1.0", "prop-types": "^15.8.1" }, "peerDependencies": { "react": ">= 16.8 || 18.0.0" } }, "sha512-U792j+x0rcwH/U/Slv/OBNU/LGFYbDLHKKiJoPhNaOianayZevCt4Y5S0CraPssH/6/wT6xhKDfzdXUgCBS0HQ=="],
1101
 
1102
+ "react-i18next": ["react-i18next@15.4.1", "", { "dependencies": { "@babel/runtime": "^7.25.0", "html-parse-stringify": "^3.0.1" }, "peerDependencies": { "i18next": ">= 23.2.3", "react": ">= 16.8.0" } }, "sha512-ahGab+IaSgZmNPYXdV1n+OYky95TGpFwnKRflX/16dY04DsYYKHtVLjeny7sBSCREEcoMbAgSkFiGLF5g5Oofw=="],
1103
+
1104
  "react-is": ["react-is@16.13.1", "", {}, "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="],
1105
 
1106
  "react-markdown": ["react-markdown@9.1.0", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "html-url-attributes": "^3.0.0", "mdast-util-to-hast": "^13.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" }, "peerDependencies": { "@types/react": ">=18", "react": ">=18" } }, "sha512-xaijuJB0kzGiUdG7nc2MOMDUDBWPyGAjZtUrow9XxUeua8IqeP+VlIfAZ3bphpcLTnSZXz6z9jcVC/TCwbfgdw=="],
 
1279
 
1280
  "vite": ["vite@6.1.1", "", { "dependencies": { "esbuild": "^0.24.2", "postcss": "^8.5.2", "rollup": "^4.30.1" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-4GgM54XrwRfrOp297aIYspIti66k56v16ZnqHvrIM7mG+HjDlAwS7p+Srr7J6fGvEdOJ5JcQ/D9T7HhtdXDTzA=="],
1281
 
1282
+ "void-elements": ["void-elements@3.1.0", "", {}, "sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w=="],
1283
+
1284
  "which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
1285
 
1286
  "which-boxed-primitive": ["which-boxed-primitive@1.1.1", "", { "dependencies": { "is-bigint": "^1.1.0", "is-boolean-object": "^1.2.1", "is-number-object": "^1.1.1", "is-string": "^1.1.1", "is-symbol": "^1.1.1" } }, "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA=="],
lightrag_webui/package.json CHANGED
@@ -43,11 +43,13 @@
43
  "cmdk": "^1.0.4",
44
  "graphology": "^0.26.0",
45
  "graphology-generators": "^0.11.2",
 
46
  "lucide-react": "^0.475.0",
47
  "minisearch": "^7.1.2",
48
  "react": "^19.0.0",
49
  "react-dom": "^19.0.0",
50
  "react-dropzone": "^14.3.6",
 
51
  "react-markdown": "^9.1.0",
52
  "react-number-format": "^5.4.3",
53
  "react-syntax-highlighter": "^15.6.1",
 
43
  "cmdk": "^1.0.4",
44
  "graphology": "^0.26.0",
45
  "graphology-generators": "^0.11.2",
46
+ "i18next": "^24.2.2",
47
  "lucide-react": "^0.475.0",
48
  "minisearch": "^7.1.2",
49
  "react": "^19.0.0",
50
  "react-dom": "^19.0.0",
51
  "react-dropzone": "^14.3.6",
52
+ "react-i18next": "^15.4.1",
53
  "react-markdown": "^9.1.0",
54
  "react-number-format": "^5.4.3",
55
  "react-syntax-highlighter": "^15.6.1",
lightrag_webui/src/components/ThemeToggle.tsx CHANGED
@@ -3,6 +3,7 @@ import useTheme from '@/hooks/useTheme'
3
  import { MoonIcon, SunIcon } from 'lucide-react'
4
  import { useCallback } from 'react'
5
  import { controlButtonVariant } from '@/lib/constants'
 
6
 
7
  /**
8
  * Component that toggles the theme between light and dark.
@@ -11,13 +12,14 @@ export default function ThemeToggle() {
11
  const { theme, setTheme } = useTheme()
12
  const setLight = useCallback(() => setTheme('light'), [setTheme])
13
  const setDark = useCallback(() => setTheme('dark'), [setTheme])
 
14
 
15
  if (theme === 'dark') {
16
  return (
17
  <Button
18
  onClick={setLight}
19
  variant={controlButtonVariant}
20
- tooltip="Switch to light theme"
21
  size="icon"
22
  side="bottom"
23
  >
@@ -29,7 +31,7 @@ export default function ThemeToggle() {
29
  <Button
30
  onClick={setDark}
31
  variant={controlButtonVariant}
32
- tooltip="Switch to dark theme"
33
  size="icon"
34
  side="bottom"
35
  >
 
3
  import { MoonIcon, SunIcon } from 'lucide-react'
4
  import { useCallback } from 'react'
5
  import { controlButtonVariant } from '@/lib/constants'
6
+ import { useTranslation } from 'react-i18next'
7
 
8
  /**
9
  * Component that toggles the theme between light and dark.
 
12
  const { theme, setTheme } = useTheme()
13
  const setLight = useCallback(() => setTheme('light'), [setTheme])
14
  const setDark = useCallback(() => setTheme('dark'), [setTheme])
15
+ const { t } = useTranslation()
16
 
17
  if (theme === 'dark') {
18
  return (
19
  <Button
20
  onClick={setLight}
21
  variant={controlButtonVariant}
22
+ tooltip={t('header.themeToggle.switchToLight')}
23
  size="icon"
24
  side="bottom"
25
  >
 
31
  <Button
32
  onClick={setDark}
33
  variant={controlButtonVariant}
34
+ tooltip={t('header.themeToggle.switchToDark')}
35
  size="icon"
36
  side="bottom"
37
  >
lightrag_webui/src/components/documents/ClearDocumentsDialog.tsx CHANGED
@@ -13,38 +13,40 @@ import { errorMessage } from '@/lib/utils'
13
  import { clearDocuments } from '@/api/lightrag'
14
 
15
  import { EraserIcon } from 'lucide-react'
 
16
 
17
  export default function ClearDocumentsDialog() {
 
18
  const [open, setOpen] = useState(false)
19
 
20
  const handleClear = useCallback(async () => {
21
  try {
22
  const result = await clearDocuments()
23
  if (result.status === 'success') {
24
- toast.success('Documents cleared successfully')
25
  setOpen(false)
26
  } else {
27
- toast.error(`Clear Documents Failed:\n${result.message}`)
28
  }
29
  } catch (err) {
30
- toast.error('Clear Documents Failed:\n' + errorMessage(err))
31
  }
32
  }, [setOpen])
33
 
34
  return (
35
  <Dialog open={open} onOpenChange={setOpen}>
36
  <DialogTrigger asChild>
37
- <Button variant="outline" side="bottom" tooltip='Clear documents' size="sm">
38
- <EraserIcon/> Clear
39
  </Button>
40
  </DialogTrigger>
41
  <DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
42
  <DialogHeader>
43
- <DialogTitle>Clear documents</DialogTitle>
44
- <DialogDescription>Do you really want to clear all documents?</DialogDescription>
45
  </DialogHeader>
46
  <Button variant="destructive" onClick={handleClear}>
47
- YES
48
  </Button>
49
  </DialogContent>
50
  </Dialog>
 
13
  import { clearDocuments } from '@/api/lightrag'
14
 
15
  import { EraserIcon } from 'lucide-react'
16
+ import { useTranslation } from 'react-i18next'
17
 
18
  export default function ClearDocumentsDialog() {
19
+ const { t } = useTranslation()
20
  const [open, setOpen] = useState(false)
21
 
22
  const handleClear = useCallback(async () => {
23
  try {
24
  const result = await clearDocuments()
25
  if (result.status === 'success') {
26
+ toast.success(t('documentPanel.clearDocuments.success'))
27
  setOpen(false)
28
  } else {
29
+ toast.error(t('documentPanel.clearDocuments.failed', { message: result.message }))
30
  }
31
  } catch (err) {
32
+ toast.error(t('documentPanel.clearDocuments.error', { error: errorMessage(err) }))
33
  }
34
  }, [setOpen])
35
 
36
  return (
37
  <Dialog open={open} onOpenChange={setOpen}>
38
  <DialogTrigger asChild>
39
+ <Button variant="outline" side="bottom" tooltip={t('documentPanel.clearDocuments.tooltip')} size="sm">
40
+ <EraserIcon/> {t('documentPanel.clearDocuments.button')}
41
  </Button>
42
  </DialogTrigger>
43
  <DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
44
  <DialogHeader>
45
+ <DialogTitle>{t('documentPanel.clearDocuments.title')}</DialogTitle>
46
+ <DialogDescription>{t('documentPanel.clearDocuments.confirm')}</DialogDescription>
47
  </DialogHeader>
48
  <Button variant="destructive" onClick={handleClear}>
49
+ {t('documentPanel.clearDocuments.confirmButton')}
50
  </Button>
51
  </DialogContent>
52
  </Dialog>
lightrag_webui/src/components/documents/UploadDocumentsDialog.tsx CHANGED
@@ -14,8 +14,10 @@ import { errorMessage } from '@/lib/utils'
14
  import { uploadDocument } from '@/api/lightrag'
15
 
16
  import { UploadIcon } from 'lucide-react'
 
17
 
18
  export default function UploadDocumentsDialog() {
 
19
  const [open, setOpen] = useState(false)
20
  const [isUploading, setIsUploading] = useState(false)
21
  const [progresses, setProgresses] = useState<Record<string, number>>({})
@@ -29,24 +31,24 @@ export default function UploadDocumentsDialog() {
29
  filesToUpload.map(async (file) => {
30
  try {
31
  const result = await uploadDocument(file, (percentCompleted: number) => {
32
- console.debug(`Uploading ${file.name}: ${percentCompleted}%`)
33
  setProgresses((pre) => ({
34
  ...pre,
35
  [file.name]: percentCompleted
36
  }))
37
  })
38
  if (result.status === 'success') {
39
- toast.success(`Upload Success:\n${file.name} uploaded successfully`)
40
  } else {
41
- toast.error(`Upload Failed:\n${file.name}\n${result.message}`)
42
  }
43
  } catch (err) {
44
- toast.error(`Upload Failed:\n${file.name}\n${errorMessage(err)}`)
45
  }
46
  })
47
  )
48
  } catch (err) {
49
- toast.error('Upload Failed\n' + errorMessage(err))
50
  } finally {
51
  setIsUploading(false)
52
  // setOpen(false)
@@ -66,21 +68,21 @@ export default function UploadDocumentsDialog() {
66
  }}
67
  >
68
  <DialogTrigger asChild>
69
- <Button variant="default" side="bottom" tooltip="Upload documents" size="sm">
70
- <UploadIcon /> Upload
71
  </Button>
72
  </DialogTrigger>
73
  <DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
74
  <DialogHeader>
75
- <DialogTitle>Upload documents</DialogTitle>
76
  <DialogDescription>
77
- Drag and drop your documents here or click to browse.
78
  </DialogDescription>
79
  </DialogHeader>
80
  <FileUploader
81
  maxFileCount={Infinity}
82
  maxSize={200 * 1024 * 1024}
83
- description="supported types: TXT, MD, DOCX, PDF, PPTX, RTF, ODT, EPUB, HTML, HTM, TEX, JSON, XML, YAML, YML, CSV, LOG, CONF, INI, PROPERTIES, SQL, BAT, SH, C, CPP, PY, JAVA, JS, TS, SWIFT, GO, RB, PHP, CSS, SCSS, LESS"
84
  onUpload={handleDocumentsUpload}
85
  progresses={progresses}
86
  disabled={isUploading}
 
14
  import { uploadDocument } from '@/api/lightrag'
15
 
16
  import { UploadIcon } from 'lucide-react'
17
+ import { useTranslation } from 'react-i18next'
18
 
19
  export default function UploadDocumentsDialog() {
20
+ const { t } = useTranslation()
21
  const [open, setOpen] = useState(false)
22
  const [isUploading, setIsUploading] = useState(false)
23
  const [progresses, setProgresses] = useState<Record<string, number>>({})
 
31
  filesToUpload.map(async (file) => {
32
  try {
33
  const result = await uploadDocument(file, (percentCompleted: number) => {
34
+ console.debug(t('documentPanel.uploadDocuments.uploading', { name: file.name, percent: percentCompleted }))
35
  setProgresses((pre) => ({
36
  ...pre,
37
  [file.name]: percentCompleted
38
  }))
39
  })
40
  if (result.status === 'success') {
41
+ toast.success(t('documentPanel.uploadDocuments.success', { name: file.name }))
42
  } else {
43
+ toast.error(t('documentPanel.uploadDocuments.failed', { name: file.name, message: result.message }))
44
  }
45
  } catch (err) {
46
+ toast.error(t('documentPanel.uploadDocuments.error', { name: file.name, error: errorMessage(err) }))
47
  }
48
  })
49
  )
50
  } catch (err) {
51
+ toast.error(t('documentPanel.uploadDocuments.generalError', { error: errorMessage(err) }))
52
  } finally {
53
  setIsUploading(false)
54
  // setOpen(false)
 
68
  }}
69
  >
70
  <DialogTrigger asChild>
71
+ <Button variant="default" side="bottom" tooltip={t('documentPanel.uploadDocuments.tooltip')} size="sm">
72
+ <UploadIcon /> {t('documentPanel.uploadDocuments.button')}
73
  </Button>
74
  </DialogTrigger>
75
  <DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
76
  <DialogHeader>
77
+ <DialogTitle>{t('documentPanel.uploadDocuments.title')}</DialogTitle>
78
  <DialogDescription>
79
+ {t('documentPanel.uploadDocuments.description')}
80
  </DialogDescription>
81
  </DialogHeader>
82
  <FileUploader
83
  maxFileCount={Infinity}
84
  maxSize={200 * 1024 * 1024}
85
+ description={t('documentPanel.uploadDocuments.fileTypes')}
86
  onUpload={handleDocumentsUpload}
87
  progresses={progresses}
88
  disabled={isUploading}
lightrag_webui/src/components/graph/FullScreenControl.tsx CHANGED
@@ -2,21 +2,23 @@ import { useFullScreen } from '@react-sigma/core'
2
  import { MaximizeIcon, MinimizeIcon } from 'lucide-react'
3
  import { controlButtonVariant } from '@/lib/constants'
4
  import Button from '@/components/ui/Button'
 
5
 
6
  /**
7
  * Component that toggles full screen mode.
8
  */
9
  const FullScreenControl = () => {
10
  const { isFullScreen, toggle } = useFullScreen()
 
11
 
12
  return (
13
  <>
14
  {isFullScreen ? (
15
- <Button variant={controlButtonVariant} onClick={toggle} tooltip="Windowed" size="icon">
16
  <MinimizeIcon />
17
  </Button>
18
  ) : (
19
- <Button variant={controlButtonVariant} onClick={toggle} tooltip="Full Screen" size="icon">
20
  <MaximizeIcon />
21
  </Button>
22
  )}
 
2
  import { MaximizeIcon, MinimizeIcon } from 'lucide-react'
3
  import { controlButtonVariant } from '@/lib/constants'
4
  import Button from '@/components/ui/Button'
5
+ import { useTranslation } from 'react-i18next'
6
 
7
  /**
8
  * Component that toggles full screen mode.
9
  */
10
  const FullScreenControl = () => {
11
  const { isFullScreen, toggle } = useFullScreen()
12
+ const { t } = useTranslation()
13
 
14
  return (
15
  <>
16
  {isFullScreen ? (
17
+ <Button variant={controlButtonVariant} onClick={toggle} tooltip={t('graphPanel.sideBar.fullScreenControl.windowed')} size="icon">
18
  <MinimizeIcon />
19
  </Button>
20
  ) : (
21
+ <Button variant={controlButtonVariant} onClick={toggle} tooltip={t('graphPanel.sideBar.fullScreenControl.fullScreen')} size="icon">
22
  <MaximizeIcon />
23
  </Button>
24
  )}
lightrag_webui/src/components/graph/GraphLabels.tsx CHANGED
@@ -5,6 +5,7 @@ import { useSettingsStore } from '@/stores/settings'
5
  import { useGraphStore } from '@/stores/graph'
6
  import { labelListLimit } from '@/lib/constants'
7
  import MiniSearch from 'minisearch'
 
8
 
9
  const lastGraph: any = {
10
  graph: null,
@@ -13,6 +14,7 @@ const lastGraph: any = {
13
  }
14
 
15
  const GraphLabels = () => {
 
16
  const label = useSettingsStore.use.queryLabel()
17
  const graph = useGraphStore.use.sigmaGraph()
18
 
@@ -69,7 +71,7 @@ const GraphLabels = () => {
69
 
70
  return result.length <= labelListLimit
71
  ? result
72
- : [...result.slice(0, labelListLimit), `And ${result.length - labelListLimit} others`]
73
  },
74
  [getSearchEngine]
75
  )
@@ -84,14 +86,14 @@ const GraphLabels = () => {
84
  className="ml-2"
85
  triggerClassName="max-h-8"
86
  searchInputClassName="max-h-8"
87
- triggerTooltip="Select query label"
88
  fetcher={fetchData}
89
  renderOption={(item) => <div>{item}</div>}
90
  getOptionValue={(item) => item}
91
  getDisplayValue={(item) => <div>{item}</div>}
92
  notFound={<div className="py-6 text-center text-sm">No labels found</div>}
93
- label="Label"
94
- placeholder="Search labels..."
95
  value={label !== null ? label : ''}
96
  onChange={setQueryLabel}
97
  />
 
5
  import { useGraphStore } from '@/stores/graph'
6
  import { labelListLimit } from '@/lib/constants'
7
  import MiniSearch from 'minisearch'
8
+ import { useTranslation } from 'react-i18next'
9
 
10
  const lastGraph: any = {
11
  graph: null,
 
14
  }
15
 
16
  const GraphLabels = () => {
17
+ const { t } = useTranslation()
18
  const label = useSettingsStore.use.queryLabel()
19
  const graph = useGraphStore.use.sigmaGraph()
20
 
 
71
 
72
  return result.length <= labelListLimit
73
  ? result
74
+ : [...result.slice(0, labelListLimit), t('graphLabels.andOthers', { count: result.length - labelListLimit })]
75
  },
76
  [getSearchEngine]
77
  )
 
86
  className="ml-2"
87
  triggerClassName="max-h-8"
88
  searchInputClassName="max-h-8"
89
+ triggerTooltip={t('graphPanel.graphLabels.selectTooltip')}
90
  fetcher={fetchData}
91
  renderOption={(item) => <div>{item}</div>}
92
  getOptionValue={(item) => item}
93
  getDisplayValue={(item) => <div>{item}</div>}
94
  notFound={<div className="py-6 text-center text-sm">No labels found</div>}
95
+ label={t('graphPanel.graphLabels.label')}
96
+ placeholder={t('graphPanel.graphLabels.placeholder')}
97
  value={label !== null ? label : ''}
98
  onChange={setQueryLabel}
99
  />
lightrag_webui/src/components/graph/GraphSearch.tsx CHANGED
@@ -9,6 +9,7 @@ import { AsyncSearch } from '@/components/ui/AsyncSearch'
9
  import { searchResultLimit } from '@/lib/constants'
10
  import { useGraphStore } from '@/stores/graph'
11
  import MiniSearch from 'minisearch'
 
12
 
13
  interface OptionItem {
14
  id: string
@@ -44,6 +45,7 @@ export const GraphSearchInput = ({
44
  onFocus?: GraphSearchInputProps['onFocus']
45
  value?: GraphSearchInputProps['value']
46
  }) => {
 
47
  const graph = useGraphStore.use.sigmaGraph()
48
 
49
  const searchEngine = useMemo(() => {
@@ -97,7 +99,7 @@ export const GraphSearchInput = ({
97
  {
98
  type: 'message',
99
  id: messageId,
100
- message: `And ${result.length - searchResultLimit} others`
101
  }
102
  ]
103
  },
@@ -118,7 +120,7 @@ export const GraphSearchInput = ({
118
  if (id !== messageId && onFocus) onFocus(id ? { id, type: 'nodes' } : null)
119
  }}
120
  label={'item'}
121
- placeholder="Search nodes..."
122
  />
123
  )
124
  }
 
9
  import { searchResultLimit } from '@/lib/constants'
10
  import { useGraphStore } from '@/stores/graph'
11
  import MiniSearch from 'minisearch'
12
+ import { useTranslation } from 'react-i18next'
13
 
14
  interface OptionItem {
15
  id: string
 
45
  onFocus?: GraphSearchInputProps['onFocus']
46
  value?: GraphSearchInputProps['value']
47
  }) => {
48
+ const { t } = useTranslation()
49
  const graph = useGraphStore.use.sigmaGraph()
50
 
51
  const searchEngine = useMemo(() => {
 
99
  {
100
  type: 'message',
101
  id: messageId,
102
+ message: t('graphPanel.search.message', { count: result.length - searchResultLimit })
103
  }
104
  ]
105
  },
 
120
  if (id !== messageId && onFocus) onFocus(id ? { id, type: 'nodes' } : null)
121
  }}
122
  label={'item'}
123
+ placeholder={t('graphPanel.search.placeholder')}
124
  />
125
  )
126
  }
lightrag_webui/src/components/graph/LayoutsControl.tsx CHANGED
@@ -16,6 +16,7 @@ import { controlButtonVariant } from '@/lib/constants'
16
  import { useSettingsStore } from '@/stores/settings'
17
 
18
  import { GripIcon, PlayIcon, PauseIcon } from 'lucide-react'
 
19
 
20
  type LayoutName =
21
  | 'Circular'
@@ -28,6 +29,7 @@ type LayoutName =
28
  const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) => {
29
  const sigma = useSigma()
30
  const { stop, start, isRunning } = layout
 
31
 
32
  /**
33
  * Init component when Sigma or component settings change.
@@ -61,7 +63,7 @@ const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) =
61
  <Button
62
  size="icon"
63
  onClick={() => (isRunning ? stop() : start())}
64
- tooltip={isRunning ? 'Stop the layout animation' : 'Start the layout animation'}
65
  variant={controlButtonVariant}
66
  >
67
  {isRunning ? <PauseIcon /> : <PlayIcon />}
@@ -74,6 +76,7 @@ const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) =
74
  */
75
  const LayoutsControl = () => {
76
  const sigma = useSigma()
 
77
  const [layout, setLayout] = useState<LayoutName>('Circular')
78
  const [opened, setOpened] = useState<boolean>(false)
79
 
@@ -149,7 +152,7 @@ const LayoutsControl = () => {
149
  size="icon"
150
  variant={controlButtonVariant}
151
  onClick={() => setOpened((e: boolean) => !e)}
152
- tooltip="Layout Graph"
153
  >
154
  <GripIcon />
155
  </Button>
@@ -166,7 +169,7 @@ const LayoutsControl = () => {
166
  key={name}
167
  className="cursor-pointer text-xs"
168
  >
169
- {name}
170
  </CommandItem>
171
  ))}
172
  </CommandGroup>
 
16
  import { useSettingsStore } from '@/stores/settings'
17
 
18
  import { GripIcon, PlayIcon, PauseIcon } from 'lucide-react'
19
+ import { useTranslation } from 'react-i18next'
20
 
21
  type LayoutName =
22
  | 'Circular'
 
29
  const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) => {
30
  const sigma = useSigma()
31
  const { stop, start, isRunning } = layout
32
+ const { t } = useTranslation()
33
 
34
  /**
35
  * Init component when Sigma or component settings change.
 
63
  <Button
64
  size="icon"
65
  onClick={() => (isRunning ? stop() : start())}
66
+ tooltip={isRunning ? t('graphPanel.sideBar.layoutsControl.stopAnimation') : t('graphPanel.sideBar.layoutsControl.startAnimation')}
67
  variant={controlButtonVariant}
68
  >
69
  {isRunning ? <PauseIcon /> : <PlayIcon />}
 
76
  */
77
  const LayoutsControl = () => {
78
  const sigma = useSigma()
79
+ const { t } = useTranslation()
80
  const [layout, setLayout] = useState<LayoutName>('Circular')
81
  const [opened, setOpened] = useState<boolean>(false)
82
 
 
152
  size="icon"
153
  variant={controlButtonVariant}
154
  onClick={() => setOpened((e: boolean) => !e)}
155
+ tooltip={t('graphPanel.sideBar.layoutsControl.layoutGraph')}
156
  >
157
  <GripIcon />
158
  </Button>
 
169
  key={name}
170
  className="cursor-pointer text-xs"
171
  >
172
+ {t(`graphPanel.sideBar.layoutsControl.layouts.${name}`)}
173
  </CommandItem>
174
  ))}
175
  </CommandGroup>
lightrag_webui/src/components/graph/PropertiesView.tsx CHANGED
@@ -2,6 +2,7 @@ import { useEffect, useState } from 'react'
2
  import { useGraphStore, RawNodeType, RawEdgeType } from '@/stores/graph'
3
  import Text from '@/components/ui/Text'
4
  import useLightragGraph from '@/hooks/useLightragGraph'
 
5
 
6
  /**
7
  * Component that view properties of elements in graph.
@@ -147,21 +148,22 @@ const PropertyRow = ({
147
  }
148
 
149
  const NodePropertiesView = ({ node }: { node: NodeType }) => {
 
150
  return (
151
  <div className="flex flex-col gap-2">
152
- <label className="text-md pl-1 font-bold tracking-wide text-sky-300">Node</label>
153
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
154
- <PropertyRow name={'Id'} value={node.id} />
155
  <PropertyRow
156
- name={'Labels'}
157
  value={node.labels.join(', ')}
158
  onClick={() => {
159
  useGraphStore.getState().setSelectedNode(node.id, true)
160
  }}
161
  />
162
- <PropertyRow name={'Degree'} value={node.degree} />
163
  </div>
164
- <label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">Properties</label>
165
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
166
  {Object.keys(node.properties)
167
  .sort()
@@ -172,7 +174,7 @@ const NodePropertiesView = ({ node }: { node: NodeType }) => {
172
  {node.relationships.length > 0 && (
173
  <>
174
  <label className="text-md pl-1 font-bold tracking-wide text-teal-600/90">
175
- Relationships
176
  </label>
177
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
178
  {node.relationships.map(({ type, id, label }) => {
@@ -195,28 +197,29 @@ const NodePropertiesView = ({ node }: { node: NodeType }) => {
195
  }
196
 
197
  const EdgePropertiesView = ({ edge }: { edge: EdgeType }) => {
 
198
  return (
199
  <div className="flex flex-col gap-2">
200
- <label className="text-md pl-1 font-bold tracking-wide text-teal-600">Relationship</label>
201
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
202
- <PropertyRow name={'Id'} value={edge.id} />
203
- {edge.type && <PropertyRow name={'Type'} value={edge.type} />}
204
  <PropertyRow
205
- name={'Source'}
206
  value={edge.sourceNode ? edge.sourceNode.labels.join(', ') : edge.source}
207
  onClick={() => {
208
  useGraphStore.getState().setSelectedNode(edge.source, true)
209
  }}
210
  />
211
  <PropertyRow
212
- name={'Target'}
213
  value={edge.targetNode ? edge.targetNode.labels.join(', ') : edge.target}
214
  onClick={() => {
215
  useGraphStore.getState().setSelectedNode(edge.target, true)
216
  }}
217
  />
218
  </div>
219
- <label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">Properties</label>
220
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
221
  {Object.keys(edge.properties)
222
  .sort()
 
2
  import { useGraphStore, RawNodeType, RawEdgeType } from '@/stores/graph'
3
  import Text from '@/components/ui/Text'
4
  import useLightragGraph from '@/hooks/useLightragGraph'
5
+ import { useTranslation } from 'react-i18next'
6
 
7
  /**
8
  * Component that view properties of elements in graph.
 
148
  }
149
 
150
  const NodePropertiesView = ({ node }: { node: NodeType }) => {
151
+ const { t } = useTranslation()
152
  return (
153
  <div className="flex flex-col gap-2">
154
+ <label className="text-md pl-1 font-bold tracking-wide text-sky-300">{t('graphPanel.propertiesView.node.title')}</label>
155
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
156
+ <PropertyRow name={t('graphPanel.propertiesView.node.id')} value={node.id} />
157
  <PropertyRow
158
+ name={t('graphPanel.propertiesView.node.labels')}
159
  value={node.labels.join(', ')}
160
  onClick={() => {
161
  useGraphStore.getState().setSelectedNode(node.id, true)
162
  }}
163
  />
164
+ <PropertyRow name={t('graphPanel.propertiesView.node.degree')} value={node.degree} />
165
  </div>
166
+ <label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">{t('graphPanel.propertiesView.node.properties')}</label>
167
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
168
  {Object.keys(node.properties)
169
  .sort()
 
174
  {node.relationships.length > 0 && (
175
  <>
176
  <label className="text-md pl-1 font-bold tracking-wide text-teal-600/90">
177
+ {t('graphPanel.propertiesView.node.relationships')}
178
  </label>
179
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
180
  {node.relationships.map(({ type, id, label }) => {
 
197
  }
198
 
199
  const EdgePropertiesView = ({ edge }: { edge: EdgeType }) => {
200
+ const { t } = useTranslation()
201
  return (
202
  <div className="flex flex-col gap-2">
203
+ <label className="text-md pl-1 font-bold tracking-wide text-teal-600">{t('graphPanel.propertiesView.edge.title')}</label>
204
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
205
+ <PropertyRow name={t('graphPanel.propertiesView.edge.id')} value={edge.id} />
206
+ {edge.type && <PropertyRow name={t('graphPanel.propertiesView.edge.type')} value={edge.type} />}
207
  <PropertyRow
208
+ name={t('graphPanel.propertiesView.edge.source')}
209
  value={edge.sourceNode ? edge.sourceNode.labels.join(', ') : edge.source}
210
  onClick={() => {
211
  useGraphStore.getState().setSelectedNode(edge.source, true)
212
  }}
213
  />
214
  <PropertyRow
215
+ name={t('graphPanel.propertiesView.edge.target')}
216
  value={edge.targetNode ? edge.targetNode.labels.join(', ') : edge.target}
217
  onClick={() => {
218
  useGraphStore.getState().setSelectedNode(edge.target, true)
219
  }}
220
  />
221
  </div>
222
+ <label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">{t('graphPanel.propertiesView.edge.properties')}</label>
223
  <div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
224
  {Object.keys(edge.properties)
225
  .sort()
lightrag_webui/src/components/graph/Settings.tsx CHANGED
@@ -10,6 +10,7 @@ import { useSettingsStore } from '@/stores/settings'
10
  import { useBackendState } from '@/stores/state'
11
 
12
  import { SettingsIcon } from 'lucide-react'
 
13
 
14
  /**
15
  * Component that displays a checkbox with a label.
@@ -204,10 +205,12 @@ export default function Settings() {
204
  [setTempApiKey]
205
  )
206
 
 
 
207
  return (
208
  <Popover open={opened} onOpenChange={setOpened}>
209
  <PopoverTrigger asChild>
210
- <Button variant={controlButtonVariant} tooltip="Settings" size="icon">
211
  <SettingsIcon />
212
  </Button>
213
  </PopoverTrigger>
@@ -221,7 +224,7 @@ export default function Settings() {
221
  <LabeledCheckBox
222
  checked={enableHealthCheck}
223
  onCheckedChange={setEnableHealthCheck}
224
- label="Health Check"
225
  />
226
 
227
  <Separator />
@@ -229,12 +232,12 @@ export default function Settings() {
229
  <LabeledCheckBox
230
  checked={showPropertyPanel}
231
  onCheckedChange={setShowPropertyPanel}
232
- label="Show Property Panel"
233
  />
234
  <LabeledCheckBox
235
  checked={showNodeSearchBar}
236
  onCheckedChange={setShowNodeSearchBar}
237
- label="Show Search Bar"
238
  />
239
 
240
  <Separator />
@@ -242,12 +245,12 @@ export default function Settings() {
242
  <LabeledCheckBox
243
  checked={showNodeLabel}
244
  onCheckedChange={setShowNodeLabel}
245
- label="Show Node Label"
246
  />
247
  <LabeledCheckBox
248
  checked={enableNodeDrag}
249
  onCheckedChange={setEnableNodeDrag}
250
- label="Node Draggable"
251
  />
252
 
253
  <Separator />
@@ -255,51 +258,50 @@ export default function Settings() {
255
  <LabeledCheckBox
256
  checked={showEdgeLabel}
257
  onCheckedChange={setShowEdgeLabel}
258
- label="Show Edge Label"
259
  />
260
  <LabeledCheckBox
261
  checked={enableHideUnselectedEdges}
262
  onCheckedChange={setEnableHideUnselectedEdges}
263
- label="Hide Unselected Edges"
264
  />
265
  <LabeledCheckBox
266
  checked={enableEdgeEvents}
267
  onCheckedChange={setEnableEdgeEvents}
268
- label="Edge Events"
269
  />
270
 
271
  <Separator />
272
  <LabeledNumberInput
273
- label="Max Query Depth"
274
  min={1}
275
  value={graphQueryMaxDepth}
276
  onEditFinished={setGraphQueryMaxDepth}
277
  />
278
  <LabeledNumberInput
279
- label="Minimum Degree"
280
  min={0}
281
  value={graphMinDegree}
282
  onEditFinished={setGraphMinDegree}
283
  />
284
  <LabeledNumberInput
285
- label="Max Layout Iterations"
286
  min={1}
287
  max={20}
288
  value={graphLayoutMaxIterations}
289
  onEditFinished={setGraphLayoutMaxIterations}
290
  />
291
-
292
  <Separator />
293
 
294
  <div className="flex flex-col gap-2">
295
- <label className="text-sm font-medium">API Key</label>
296
  <form className="flex h-6 gap-2" onSubmit={(e) => e.preventDefault()}>
297
  <div className="w-0 flex-1">
298
  <Input
299
  type="password"
300
  value={tempApiKey}
301
  onChange={handleTempApiKeyChange}
302
- placeholder="Enter your API key"
303
  className="max-h-full w-full min-w-0"
304
  autoComplete="off"
305
  />
@@ -310,7 +312,7 @@ export default function Settings() {
310
  size="sm"
311
  className="max-h-full shrink-0"
312
  >
313
- Save
314
  </Button>
315
  </form>
316
  </div>
 
10
  import { useBackendState } from '@/stores/state'
11
 
12
  import { SettingsIcon } from 'lucide-react'
13
+ import { useTranslation } from "react-i18next";
14
 
15
  /**
16
  * Component that displays a checkbox with a label.
 
205
  [setTempApiKey]
206
  )
207
 
208
+ const { t } = useTranslation();
209
+
210
  return (
211
  <Popover open={opened} onOpenChange={setOpened}>
212
  <PopoverTrigger asChild>
213
+ <Button variant={controlButtonVariant} tooltip={t("graphPanel.sideBar.settings.settings")} size="icon">
214
  <SettingsIcon />
215
  </Button>
216
  </PopoverTrigger>
 
224
  <LabeledCheckBox
225
  checked={enableHealthCheck}
226
  onCheckedChange={setEnableHealthCheck}
227
+ label={t("graphPanel.sideBar.settings.healthCheck")}
228
  />
229
 
230
  <Separator />
 
232
  <LabeledCheckBox
233
  checked={showPropertyPanel}
234
  onCheckedChange={setShowPropertyPanel}
235
+ label={t("graphPanel.sideBar.settings.showPropertyPanel")}
236
  />
237
  <LabeledCheckBox
238
  checked={showNodeSearchBar}
239
  onCheckedChange={setShowNodeSearchBar}
240
+ label={t("graphPanel.sideBar.settings.showSearchBar")}
241
  />
242
 
243
  <Separator />
 
245
  <LabeledCheckBox
246
  checked={showNodeLabel}
247
  onCheckedChange={setShowNodeLabel}
248
+ label={t("graphPanel.sideBar.settings.showNodeLabel")}
249
  />
250
  <LabeledCheckBox
251
  checked={enableNodeDrag}
252
  onCheckedChange={setEnableNodeDrag}
253
+ label={t("graphPanel.sideBar.settings.nodeDraggable")}
254
  />
255
 
256
  <Separator />
 
258
  <LabeledCheckBox
259
  checked={showEdgeLabel}
260
  onCheckedChange={setShowEdgeLabel}
261
+ label={t("graphPanel.sideBar.settings.showEdgeLabel")}
262
  />
263
  <LabeledCheckBox
264
  checked={enableHideUnselectedEdges}
265
  onCheckedChange={setEnableHideUnselectedEdges}
266
+ label={t("graphPanel.sideBar.settings.hideUnselectedEdges")}
267
  />
268
  <LabeledCheckBox
269
  checked={enableEdgeEvents}
270
  onCheckedChange={setEnableEdgeEvents}
271
+ label={t("graphPanel.sideBar.settings.edgeEvents")}
272
  />
273
 
274
  <Separator />
275
  <LabeledNumberInput
276
+ label={t("graphPanel.sideBar.settings.maxQueryDepth")}
277
  min={1}
278
  value={graphQueryMaxDepth}
279
  onEditFinished={setGraphQueryMaxDepth}
280
  />
281
  <LabeledNumberInput
282
+ label={t("graphPanel.sideBar.settings.minDegree")}
283
  min={0}
284
  value={graphMinDegree}
285
  onEditFinished={setGraphMinDegree}
286
  />
287
  <LabeledNumberInput
288
+ label={t("graphPanel.sideBar.settings.maxLayoutIterations")}
289
  min={1}
290
  max={20}
291
  value={graphLayoutMaxIterations}
292
  onEditFinished={setGraphLayoutMaxIterations}
293
  />
 
294
  <Separator />
295
 
296
  <div className="flex flex-col gap-2">
297
+ <label className="text-sm font-medium">{t("graphPanel.sideBar.settings.apiKey")}</label>
298
  <form className="flex h-6 gap-2" onSubmit={(e) => e.preventDefault()}>
299
  <div className="w-0 flex-1">
300
  <Input
301
  type="password"
302
  value={tempApiKey}
303
  onChange={handleTempApiKeyChange}
304
+ placeholder={t("graphPanel.sideBar.settings.enterYourAPIkey")}
305
  className="max-h-full w-full min-w-0"
306
  autoComplete="off"
307
  />
 
312
  size="sm"
313
  className="max-h-full shrink-0"
314
  >
315
+ {t("graphPanel.sideBar.settings.save")}
316
  </Button>
317
  </form>
318
  </div>
lightrag_webui/src/components/graph/StatusCard.tsx CHANGED
@@ -1,58 +1,60 @@
1
  import { LightragStatus } from '@/api/lightrag'
 
2
 
3
  const StatusCard = ({ status }: { status: LightragStatus | null }) => {
 
4
  if (!status) {
5
- return <div className="text-muted-foreground text-sm">Status information unavailable</div>
6
  }
7
 
8
  return (
9
  <div className="min-w-[300px] space-y-3 text-sm">
10
  <div className="space-y-1">
11
- <h4 className="font-medium">Storage Info</h4>
12
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
13
- <span>Working Directory:</span>
14
  <span className="truncate">{status.working_directory}</span>
15
- <span>Input Directory:</span>
16
  <span className="truncate">{status.input_directory}</span>
17
  </div>
18
  </div>
19
 
20
  <div className="space-y-1">
21
- <h4 className="font-medium">LLM Configuration</h4>
22
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
23
- <span>LLM Binding:</span>
24
  <span>{status.configuration.llm_binding}</span>
25
- <span>LLM Binding Host:</span>
26
  <span>{status.configuration.llm_binding_host}</span>
27
- <span>LLM Model:</span>
28
  <span>{status.configuration.llm_model}</span>
29
- <span>Max Tokens:</span>
30
  <span>{status.configuration.max_tokens}</span>
31
  </div>
32
  </div>
33
 
34
  <div className="space-y-1">
35
- <h4 className="font-medium">Embedding Configuration</h4>
36
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
37
- <span>Embedding Binding:</span>
38
  <span>{status.configuration.embedding_binding}</span>
39
- <span>Embedding Binding Host:</span>
40
  <span>{status.configuration.embedding_binding_host}</span>
41
- <span>Embedding Model:</span>
42
  <span>{status.configuration.embedding_model}</span>
43
  </div>
44
  </div>
45
 
46
  <div className="space-y-1">
47
- <h4 className="font-medium">Storage Configuration</h4>
48
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
49
- <span>KV Storage:</span>
50
  <span>{status.configuration.kv_storage}</span>
51
- <span>Doc Status Storage:</span>
52
  <span>{status.configuration.doc_status_storage}</span>
53
- <span>Graph Storage:</span>
54
  <span>{status.configuration.graph_storage}</span>
55
- <span>Vector Storage:</span>
56
  <span>{status.configuration.vector_storage}</span>
57
  </div>
58
  </div>
 
1
  import { LightragStatus } from '@/api/lightrag'
2
+ import { useTranslation } from 'react-i18next'
3
 
4
  const StatusCard = ({ status }: { status: LightragStatus | null }) => {
5
+ const { t } = useTranslation()
6
  if (!status) {
7
+ return <div className="text-muted-foreground text-sm">{t('graphPanel.statusCard.unavailable')}</div>
8
  }
9
 
10
  return (
11
  <div className="min-w-[300px] space-y-3 text-sm">
12
  <div className="space-y-1">
13
+ <h4 className="font-medium">{t('graphPanel.statusCard.storageInfo')}</h4>
14
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
15
+ <span>{t('graphPanel.statusCard.workingDirectory')}:</span>
16
  <span className="truncate">{status.working_directory}</span>
17
+ <span>{t('graphPanel.statusCard.inputDirectory')}:</span>
18
  <span className="truncate">{status.input_directory}</span>
19
  </div>
20
  </div>
21
 
22
  <div className="space-y-1">
23
+ <h4 className="font-medium">{t('graphPanel.statusCard.llmConfig')}</h4>
24
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
25
+ <span>{t('graphPanel.statusCard.llmBinding')}:</span>
26
  <span>{status.configuration.llm_binding}</span>
27
+ <span>{t('graphPanel.statusCard.llmBindingHost')}:</span>
28
  <span>{status.configuration.llm_binding_host}</span>
29
+ <span>{t('graphPanel.statusCard.llmModel')}:</span>
30
  <span>{status.configuration.llm_model}</span>
31
+ <span>{t('graphPanel.statusCard.maxTokens')}:</span>
32
  <span>{status.configuration.max_tokens}</span>
33
  </div>
34
  </div>
35
 
36
  <div className="space-y-1">
37
+ <h4 className="font-medium">{t('graphPanel.statusCard.embeddingConfig')}</h4>
38
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
39
+ <span>{t('graphPanel.statusCard.embeddingBinding')}:</span>
40
  <span>{status.configuration.embedding_binding}</span>
41
+ <span>{t('graphPanel.statusCard.embeddingBindingHost')}:</span>
42
  <span>{status.configuration.embedding_binding_host}</span>
43
+ <span>{t('graphPanel.statusCard.embeddingModel')}:</span>
44
  <span>{status.configuration.embedding_model}</span>
45
  </div>
46
  </div>
47
 
48
  <div className="space-y-1">
49
+ <h4 className="font-medium">{t('graphPanel.statusCard.storageConfig')}</h4>
50
  <div className="text-muted-foreground grid grid-cols-2 gap-1">
51
+ <span>{t('graphPanel.statusCard.kvStorage')}:</span>
52
  <span>{status.configuration.kv_storage}</span>
53
+ <span>{t('graphPanel.statusCard.docStatusStorage')}:</span>
54
  <span>{status.configuration.doc_status_storage}</span>
55
+ <span>{t('graphPanel.statusCard.graphStorage')}:</span>
56
  <span>{status.configuration.graph_storage}</span>
57
+ <span>{t('graphPanel.statusCard.vectorStorage')}:</span>
58
  <span>{status.configuration.vector_storage}</span>
59
  </div>
60
  </div>
lightrag_webui/src/components/graph/StatusIndicator.tsx CHANGED
@@ -3,8 +3,10 @@ import { useBackendState } from '@/stores/state'
3
  import { useEffect, useState } from 'react'
4
  import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/Popover'
5
  import StatusCard from '@/components/graph/StatusCard'
 
6
 
7
  const StatusIndicator = () => {
 
8
  const health = useBackendState.use.health()
9
  const lastCheckTime = useBackendState.use.lastCheckTime()
10
  const status = useBackendState.use.status()
@@ -33,7 +35,7 @@ const StatusIndicator = () => {
33
  )}
34
  />
35
  <span className="text-muted-foreground text-xs">
36
- {health ? 'Connected' : 'Disconnected'}
37
  </span>
38
  </div>
39
  </PopoverTrigger>
 
3
  import { useEffect, useState } from 'react'
4
  import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/Popover'
5
  import StatusCard from '@/components/graph/StatusCard'
6
+ import { useTranslation } from 'react-i18next'
7
 
8
  const StatusIndicator = () => {
9
+ const { t } = useTranslation()
10
  const health = useBackendState.use.health()
11
  const lastCheckTime = useBackendState.use.lastCheckTime()
12
  const status = useBackendState.use.status()
 
35
  )}
36
  />
37
  <span className="text-muted-foreground text-xs">
38
+ {health ? t('graphPanel.statusIndicator.connected') : t('graphPanel.statusIndicator.disconnected')}
39
  </span>
40
  </div>
41
  </PopoverTrigger>
lightrag_webui/src/components/graph/ZoomControl.tsx CHANGED
@@ -3,12 +3,14 @@ import { useCallback } from 'react'
3
  import Button from '@/components/ui/Button'
4
  import { ZoomInIcon, ZoomOutIcon, FullscreenIcon } from 'lucide-react'
5
  import { controlButtonVariant } from '@/lib/constants'
 
6
 
7
  /**
8
  * Component that provides zoom controls for the graph viewer.
9
  */
10
  const ZoomControl = () => {
11
  const { zoomIn, zoomOut, reset } = useCamera({ duration: 200, factor: 1.5 })
 
12
 
13
  const handleZoomIn = useCallback(() => zoomIn(), [zoomIn])
14
  const handleZoomOut = useCallback(() => zoomOut(), [zoomOut])
@@ -16,16 +18,16 @@ const ZoomControl = () => {
16
 
17
  return (
18
  <>
19
- <Button variant={controlButtonVariant} onClick={handleZoomIn} tooltip="Zoom In" size="icon">
20
  <ZoomInIcon />
21
  </Button>
22
- <Button variant={controlButtonVariant} onClick={handleZoomOut} tooltip="Zoom Out" size="icon">
23
  <ZoomOutIcon />
24
  </Button>
25
  <Button
26
  variant={controlButtonVariant}
27
  onClick={handleResetZoom}
28
- tooltip="Reset Zoom"
29
  size="icon"
30
  >
31
  <FullscreenIcon />
 
3
  import Button from '@/components/ui/Button'
4
  import { ZoomInIcon, ZoomOutIcon, FullscreenIcon } from 'lucide-react'
5
  import { controlButtonVariant } from '@/lib/constants'
6
+ import { useTranslation } from "react-i18next";
7
 
8
  /**
9
  * Component that provides zoom controls for the graph viewer.
10
  */
11
  const ZoomControl = () => {
12
  const { zoomIn, zoomOut, reset } = useCamera({ duration: 200, factor: 1.5 })
13
+ const { t } = useTranslation();
14
 
15
  const handleZoomIn = useCallback(() => zoomIn(), [zoomIn])
16
  const handleZoomOut = useCallback(() => zoomOut(), [zoomOut])
 
18
 
19
  return (
20
  <>
21
+ <Button variant={controlButtonVariant} onClick={handleZoomIn} tooltip={t("graphPanel.sideBar.zoomControl.zoomIn")} size="icon">
22
  <ZoomInIcon />
23
  </Button>
24
+ <Button variant={controlButtonVariant} onClick={handleZoomOut} tooltip={t("graphPanel.sideBar.zoomControl.zoomOut")} size="icon">
25
  <ZoomOutIcon />
26
  </Button>
27
  <Button
28
  variant={controlButtonVariant}
29
  onClick={handleResetZoom}
30
+ tooltip={t("graphPanel.sideBar.zoomControl.resetZoom")}
31
  size="icon"
32
  >
33
  <FullscreenIcon />
lightrag_webui/src/components/retrieval/ChatMessage.tsx CHANGED
@@ -15,18 +15,21 @@ import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'
15
  import { oneLight, oneDark } from 'react-syntax-highlighter/dist/cjs/styles/prism'
16
 
17
  import { LoaderIcon, CopyIcon } from 'lucide-react'
 
18
 
19
  export type MessageWithError = Message & {
20
  isError?: boolean
21
  }
22
 
23
  export const ChatMessage = ({ message }: { message: MessageWithError }) => {
 
 
24
  const handleCopyMarkdown = useCallback(async () => {
25
  if (message.content) {
26
  try {
27
  await navigator.clipboard.writeText(message.content)
28
  } catch (err) {
29
- console.error('Failed to copy:', err)
30
  }
31
  }
32
  }, [message])
@@ -57,7 +60,7 @@ export const ChatMessage = ({ message }: { message: MessageWithError }) => {
57
  <Button
58
  onClick={handleCopyMarkdown}
59
  className="absolute right-0 bottom-0 size-6 rounded-md opacity-20 transition-opacity hover:opacity-100"
60
- tooltip="Copy to clipboard"
61
  variant="default"
62
  size="icon"
63
  >
 
15
  import { oneLight, oneDark } from 'react-syntax-highlighter/dist/cjs/styles/prism'
16
 
17
  import { LoaderIcon, CopyIcon } from 'lucide-react'
18
+ import { useTranslation } from 'react-i18next'
19
 
20
  export type MessageWithError = Message & {
21
  isError?: boolean
22
  }
23
 
24
  export const ChatMessage = ({ message }: { message: MessageWithError }) => {
25
+ const { t } = useTranslation()
26
+
27
  const handleCopyMarkdown = useCallback(async () => {
28
  if (message.content) {
29
  try {
30
  await navigator.clipboard.writeText(message.content)
31
  } catch (err) {
32
+ console.error(t('chat.copyError'), err)
33
  }
34
  }
35
  }, [message])
 
60
  <Button
61
  onClick={handleCopyMarkdown}
62
  className="absolute right-0 bottom-0 size-6 rounded-md opacity-20 transition-opacity hover:opacity-100"
63
+ tooltip={t('retrievePanel.chatMessage.copyTooltip')}
64
  variant="default"
65
  size="icon"
66
  >
lightrag_webui/src/components/retrieval/QuerySettings.tsx CHANGED
@@ -14,8 +14,10 @@ import {
14
  SelectValue
15
  } from '@/components/ui/Select'
16
  import { useSettingsStore } from '@/stores/settings'
 
17
 
18
  export default function QuerySettings() {
 
19
  const querySettings = useSettingsStore((state) => state.querySettings)
20
 
21
  const handleChange = useCallback((key: keyof QueryRequest, value: any) => {
@@ -25,8 +27,8 @@ export default function QuerySettings() {
25
  return (
26
  <Card className="flex shrink-0 flex-col">
27
  <CardHeader className="px-4 pt-4 pb-2">
28
- <CardTitle>Parameters</CardTitle>
29
- <CardDescription>Configure your query parameters</CardDescription>
30
  </CardHeader>
31
  <CardContent className="m-0 flex grow flex-col p-0 text-xs">
32
  <div className="relative size-full">
@@ -35,8 +37,8 @@ export default function QuerySettings() {
35
  <>
36
  <Text
37
  className="ml-1"
38
- text="Query Mode"
39
- tooltip="Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval"
40
  side="left"
41
  />
42
  <Select
@@ -48,11 +50,11 @@ export default function QuerySettings() {
48
  </SelectTrigger>
49
  <SelectContent>
50
  <SelectGroup>
51
- <SelectItem value="naive">Naive</SelectItem>
52
- <SelectItem value="local">Local</SelectItem>
53
- <SelectItem value="global">Global</SelectItem>
54
- <SelectItem value="hybrid">Hybrid</SelectItem>
55
- <SelectItem value="mix">Mix</SelectItem>
56
  </SelectGroup>
57
  </SelectContent>
58
  </Select>
@@ -62,8 +64,8 @@ export default function QuerySettings() {
62
  <>
63
  <Text
64
  className="ml-1"
65
- text="Response Format"
66
- tooltip="Defines the response format. Examples:\n• Multiple Paragraphs\n• Single Paragraph\n• Bullet Points"
67
  side="left"
68
  />
69
  <Select
@@ -75,9 +77,9 @@ export default function QuerySettings() {
75
  </SelectTrigger>
76
  <SelectContent>
77
  <SelectGroup>
78
- <SelectItem value="Multiple Paragraphs">Multiple Paragraphs</SelectItem>
79
- <SelectItem value="Single Paragraph">Single Paragraph</SelectItem>
80
- <SelectItem value="Bullet Points">Bullet Points</SelectItem>
81
  </SelectGroup>
82
  </SelectContent>
83
  </Select>
@@ -87,8 +89,8 @@ export default function QuerySettings() {
87
  <>
88
  <Text
89
  className="ml-1"
90
- text="Top K Results"
91
- tooltip="Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode"
92
  side="left"
93
  />
94
  <NumberInput
@@ -97,7 +99,7 @@ export default function QuerySettings() {
97
  value={querySettings.top_k}
98
  onValueChange={(v) => handleChange('top_k', v)}
99
  min={1}
100
- placeholder="Number of results"
101
  />
102
  </>
103
 
@@ -106,8 +108,8 @@ export default function QuerySettings() {
106
  <>
107
  <Text
108
  className="ml-1"
109
- text="Max Tokens for Text Unit"
110
- tooltip="Maximum number of tokens allowed for each retrieved text chunk"
111
  side="left"
112
  />
113
  <NumberInput
@@ -116,14 +118,14 @@ export default function QuerySettings() {
116
  value={querySettings.max_token_for_text_unit}
117
  onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
118
  min={1}
119
- placeholder="Max tokens for text unit"
120
  />
121
  </>
122
 
123
  <>
124
  <Text
125
- text="Max Tokens for Global Context"
126
- tooltip="Maximum number of tokens allocated for relationship descriptions in global retrieval"
127
  side="left"
128
  />
129
  <NumberInput
@@ -132,15 +134,15 @@ export default function QuerySettings() {
132
  value={querySettings.max_token_for_global_context}
133
  onValueChange={(v) => handleChange('max_token_for_global_context', v)}
134
  min={1}
135
- placeholder="Max tokens for global context"
136
  />
137
  </>
138
 
139
  <>
140
  <Text
141
  className="ml-1"
142
- text="Max Tokens for Local Context"
143
- tooltip="Maximum number of tokens allocated for entity descriptions in local retrieval"
144
  side="left"
145
  />
146
  <NumberInput
@@ -149,7 +151,7 @@ export default function QuerySettings() {
149
  value={querySettings.max_token_for_local_context}
150
  onValueChange={(v) => handleChange('max_token_for_local_context', v)}
151
  min={1}
152
- placeholder="Max tokens for local context"
153
  />
154
  </>
155
  </>
@@ -158,8 +160,8 @@ export default function QuerySettings() {
158
  <>
159
  <Text
160
  className="ml-1"
161
- text="History Turns"
162
- tooltip="Number of complete conversation turns (user-assistant pairs) to consider in the response context"
163
  side="left"
164
  />
165
  <NumberInput
@@ -170,7 +172,7 @@ export default function QuerySettings() {
170
  value={querySettings.history_turns}
171
  onValueChange={(v) => handleChange('history_turns', v)}
172
  min={0}
173
- placeholder="Number of history turns"
174
  />
175
  </>
176
 
@@ -179,8 +181,8 @@ export default function QuerySettings() {
179
  <>
180
  <Text
181
  className="ml-1"
182
- text="High-Level Keywords"
183
- tooltip="List of high-level keywords to prioritize in retrieval. Separate with commas"
184
  side="left"
185
  />
186
  <Input
@@ -194,15 +196,15 @@ export default function QuerySettings() {
194
  .filter((k) => k !== '')
195
  handleChange('hl_keywords', keywords)
196
  }}
197
- placeholder="Enter keywords"
198
  />
199
  </>
200
 
201
  <>
202
  <Text
203
  className="ml-1"
204
- text="Low-Level Keywords"
205
- tooltip="List of low-level keywords to refine retrieval focus. Separate with commas"
206
  side="left"
207
  />
208
  <Input
@@ -216,7 +218,7 @@ export default function QuerySettings() {
216
  .filter((k) => k !== '')
217
  handleChange('ll_keywords', keywords)
218
  }}
219
- placeholder="Enter keywords"
220
  />
221
  </>
222
  </>
@@ -226,8 +228,8 @@ export default function QuerySettings() {
226
  <div className="flex items-center gap-2">
227
  <Text
228
  className="ml-1"
229
- text="Only Need Context"
230
- tooltip="If True, only returns the retrieved context without generating a response"
231
  side="left"
232
  />
233
  <div className="grow" />
@@ -242,8 +244,8 @@ export default function QuerySettings() {
242
  <div className="flex items-center gap-2">
243
  <Text
244
  className="ml-1"
245
- text="Only Need Prompt"
246
- tooltip="If True, only returns the generated prompt without producing a response"
247
  side="left"
248
  />
249
  <div className="grow" />
@@ -258,8 +260,8 @@ export default function QuerySettings() {
258
  <div className="flex items-center gap-2">
259
  <Text
260
  className="ml-1"
261
- text="Stream Response"
262
- tooltip="If True, enables streaming output for real-time responses"
263
  side="left"
264
  />
265
  <div className="grow" />
 
14
  SelectValue
15
  } from '@/components/ui/Select'
16
  import { useSettingsStore } from '@/stores/settings'
17
+ import { useTranslation } from 'react-i18next'
18
 
19
  export default function QuerySettings() {
20
+ const { t } = useTranslation()
21
  const querySettings = useSettingsStore((state) => state.querySettings)
22
 
23
  const handleChange = useCallback((key: keyof QueryRequest, value: any) => {
 
27
  return (
28
  <Card className="flex shrink-0 flex-col">
29
  <CardHeader className="px-4 pt-4 pb-2">
30
+ <CardTitle>{t('retrievePanel.querySettings.parametersTitle')}</CardTitle>
31
+ <CardDescription>{t('retrievePanel.querySettings.parametersDescription')}</CardDescription>
32
  </CardHeader>
33
  <CardContent className="m-0 flex grow flex-col p-0 text-xs">
34
  <div className="relative size-full">
 
37
  <>
38
  <Text
39
  className="ml-1"
40
+ text={t('retrievePanel.querySettings.queryMode')}
41
+ tooltip={t('retrievePanel.querySettings.queryModeTooltip')}
42
  side="left"
43
  />
44
  <Select
 
50
  </SelectTrigger>
51
  <SelectContent>
52
  <SelectGroup>
53
+ <SelectItem value="naive">{t('retrievePanel.querySettings.queryModeOptions.naive')}</SelectItem>
54
+ <SelectItem value="local">{t('retrievePanel.querySettings.queryModeOptions.local')}</SelectItem>
55
+ <SelectItem value="global">{t('retrievePanel.querySettings.queryModeOptions.global')}</SelectItem>
56
+ <SelectItem value="hybrid">{t('retrievePanel.querySettings.queryModeOptions.hybrid')}</SelectItem>
57
+ <SelectItem value="mix">{t('retrievePanel.querySettings.queryModeOptions.mix')}</SelectItem>
58
  </SelectGroup>
59
  </SelectContent>
60
  </Select>
 
64
  <>
65
  <Text
66
  className="ml-1"
67
+ text={t('retrievePanel.querySettings.responseFormat')}
68
+ tooltip={t('retrievePanel.querySettings.responseFormatTooltip')}
69
  side="left"
70
  />
71
  <Select
 
77
  </SelectTrigger>
78
  <SelectContent>
79
  <SelectGroup>
80
+ <SelectItem value="Multiple Paragraphs">{t('retrievePanel.querySettings.responseFormatOptions.multipleParagraphs')}</SelectItem>
81
+ <SelectItem value="Single Paragraph">{t('retrievePanel.querySettings.responseFormatOptions.singleParagraph')}</SelectItem>
82
+ <SelectItem value="Bullet Points">{t('retrievePanel.querySettings.responseFormatOptions.bulletPoints')}</SelectItem>
83
  </SelectGroup>
84
  </SelectContent>
85
  </Select>
 
89
  <>
90
  <Text
91
  className="ml-1"
92
+ text={t('retrievePanel.querySettings.topK')}
93
+ tooltip={t('retrievePanel.querySettings.topKTooltip')}
94
  side="left"
95
  />
96
  <NumberInput
 
99
  value={querySettings.top_k}
100
  onValueChange={(v) => handleChange('top_k', v)}
101
  min={1}
102
+ placeholder={t('retrievePanel.querySettings.topKPlaceholder')}
103
  />
104
  </>
105
 
 
108
  <>
109
  <Text
110
  className="ml-1"
111
+ text={t('retrievePanel.querySettings.maxTokensTextUnit')}
112
+ tooltip={t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}
113
  side="left"
114
  />
115
  <NumberInput
 
118
  value={querySettings.max_token_for_text_unit}
119
  onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
120
  min={1}
121
+ placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')}
122
  />
123
  </>
124
 
125
  <>
126
  <Text
127
+ text={t('retrievePanel.querySettings.maxTokensGlobalContext')}
128
+ tooltip={t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}
129
  side="left"
130
  />
131
  <NumberInput
 
134
  value={querySettings.max_token_for_global_context}
135
  onValueChange={(v) => handleChange('max_token_for_global_context', v)}
136
  min={1}
137
+ placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')}
138
  />
139
  </>
140
 
141
  <>
142
  <Text
143
  className="ml-1"
144
+ text={t('retrievePanel.querySettings.maxTokensLocalContext')}
145
+ tooltip={t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}
146
  side="left"
147
  />
148
  <NumberInput
 
151
  value={querySettings.max_token_for_local_context}
152
  onValueChange={(v) => handleChange('max_token_for_local_context', v)}
153
  min={1}
154
+ placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')}
155
  />
156
  </>
157
  </>
 
160
  <>
161
  <Text
162
  className="ml-1"
163
+ text={t('retrievePanel.querySettings.historyTurns')}
164
+ tooltip={t('retrievePanel.querySettings.historyTurnsTooltip')}
165
  side="left"
166
  />
167
  <NumberInput
 
172
  value={querySettings.history_turns}
173
  onValueChange={(v) => handleChange('history_turns', v)}
174
  min={0}
175
+ placeholder={t('retrievePanel.querySettings.historyTurnsPlaceholder')}
176
  />
177
  </>
178
 
 
181
  <>
182
  <Text
183
  className="ml-1"
184
+ text={t('retrievePanel.querySettings.hlKeywords')}
185
+ tooltip={t('retrievePanel.querySettings.hlKeywordsTooltip')}
186
  side="left"
187
  />
188
  <Input
 
196
  .filter((k) => k !== '')
197
  handleChange('hl_keywords', keywords)
198
  }}
199
+ placeholder={t('retrievePanel.querySettings.hlkeywordsPlaceHolder')}
200
  />
201
  </>
202
 
203
  <>
204
  <Text
205
  className="ml-1"
206
+ text={t('retrievePanel.querySettings.llKeywords')}
207
+ tooltip={t('retrievePanel.querySettings.llKeywordsTooltip')}
208
  side="left"
209
  />
210
  <Input
 
218
  .filter((k) => k !== '')
219
  handleChange('ll_keywords', keywords)
220
  }}
221
+ placeholder={t('retrievePanel.querySettings.hlkeywordsPlaceHolder')}
222
  />
223
  </>
224
  </>
 
228
  <div className="flex items-center gap-2">
229
  <Text
230
  className="ml-1"
231
+ text={t('retrievePanel.querySettings.onlyNeedContext')}
232
+ tooltip={t('retrievePanel.querySettings.onlyNeedContextTooltip')}
233
  side="left"
234
  />
235
  <div className="grow" />
 
244
  <div className="flex items-center gap-2">
245
  <Text
246
  className="ml-1"
247
+ text={t('retrievePanel.querySettings.onlyNeedPrompt')}
248
+ tooltip={t('retrievePanel.querySettings.onlyNeedPromptTooltip')}
249
  side="left"
250
  />
251
  <div className="grow" />
 
260
  <div className="flex items-center gap-2">
261
  <Text
262
  className="ml-1"
263
+ text={t('retrievePanel.querySettings.streamResponse')}
264
+ tooltip={t('retrievePanel.querySettings.streamResponseTooltip')}
265
  side="left"
266
  />
267
  <div className="grow" />
lightrag_webui/src/features/DocumentManager.tsx CHANGED
@@ -1,4 +1,5 @@
1
  import { useState, useEffect, useCallback } from 'react'
 
2
  import Button from '@/components/ui/Button'
3
  import {
4
  Table,
@@ -22,6 +23,7 @@ import { useBackendState } from '@/stores/state'
22
  import { RefreshCwIcon } from 'lucide-react'
23
 
24
  export default function DocumentManager() {
 
25
  const health = useBackendState.use.health()
26
  const [docs, setDocs] = useState<DocsStatusesResponse | null>(null)
27
 
@@ -44,7 +46,7 @@ export default function DocumentManager() {
44
  setDocs(null)
45
  }
46
  } catch (err) {
47
- toast.error('Failed to load documents\n' + errorMessage(err))
48
  }
49
  }, [setDocs])
50
 
@@ -57,7 +59,7 @@ export default function DocumentManager() {
57
  const { status } = await scanNewDocuments()
58
  toast.message(status)
59
  } catch (err) {
60
- toast.error('Failed to load documents\n' + errorMessage(err))
61
  }
62
  }, [])
63
 
@@ -69,7 +71,7 @@ export default function DocumentManager() {
69
  try {
70
  await fetchDocuments()
71
  } catch (err) {
72
- toast.error('Failed to get scan progress\n' + errorMessage(err))
73
  }
74
  }, 5000)
75
  return () => clearInterval(interval)
@@ -78,7 +80,7 @@ export default function DocumentManager() {
78
  return (
79
  <Card className="!size-full !rounded-none !border-none">
80
  <CardHeader>
81
- <CardTitle className="text-lg">Document Management</CardTitle>
82
  </CardHeader>
83
  <CardContent className="space-y-4">
84
  <div className="flex gap-2">
@@ -86,10 +88,10 @@ export default function DocumentManager() {
86
  variant="outline"
87
  onClick={scanDocuments}
88
  side="bottom"
89
- tooltip="Scan documents"
90
  size="sm"
91
  >
92
- <RefreshCwIcon /> Scan
93
  </Button>
94
  <div className="flex-1" />
95
  <ClearDocumentsDialog />
@@ -98,29 +100,29 @@ export default function DocumentManager() {
98
 
99
  <Card>
100
  <CardHeader>
101
- <CardTitle>Uploaded documents</CardTitle>
102
- <CardDescription>view the uploaded documents here</CardDescription>
103
  </CardHeader>
104
 
105
  <CardContent>
106
  {!docs && (
107
  <EmptyCard
108
- title="No documents uploaded"
109
- description="upload documents to see them here"
110
  />
111
  )}
112
  {docs && (
113
  <Table>
114
  <TableHeader>
115
  <TableRow>
116
- <TableHead>ID</TableHead>
117
- <TableHead>Summary</TableHead>
118
- <TableHead>Status</TableHead>
119
- <TableHead>Length</TableHead>
120
- <TableHead>Chunks</TableHead>
121
- <TableHead>Created</TableHead>
122
- <TableHead>Updated</TableHead>
123
- <TableHead>Metadata</TableHead>
124
  </TableRow>
125
  </TableHeader>
126
  <TableBody className="text-sm">
@@ -137,13 +139,13 @@ export default function DocumentManager() {
137
  </TableCell>
138
  <TableCell>
139
  {status === 'processed' && (
140
- <span className="text-green-600">Completed</span>
141
  )}
142
  {status === 'processing' && (
143
- <span className="text-blue-600">Processing</span>
144
  )}
145
- {status === 'pending' && <span className="text-yellow-600">Pending</span>}
146
- {status === 'failed' && <span className="text-red-600">Failed</span>}
147
  {doc.error && (
148
  <span className="ml-2 text-red-500" title={doc.error}>
149
  ⚠️
 
1
  import { useState, useEffect, useCallback } from 'react'
2
+ import { useTranslation } from 'react-i18next'
3
  import Button from '@/components/ui/Button'
4
  import {
5
  Table,
 
23
  import { RefreshCwIcon } from 'lucide-react'
24
 
25
  export default function DocumentManager() {
26
+ const { t } = useTranslation()
27
  const health = useBackendState.use.health()
28
  const [docs, setDocs] = useState<DocsStatusesResponse | null>(null)
29
 
 
46
  setDocs(null)
47
  }
48
  } catch (err) {
49
+ toast.error(t('documentPanel.documentManager.errors.loadFailed', { error: errorMessage(err) }))
50
  }
51
  }, [setDocs])
52
 
 
59
  const { status } = await scanNewDocuments()
60
  toast.message(status)
61
  } catch (err) {
62
+ toast.error(t('documentPanel.documentManager.errors.scanFailed', { error: errorMessage(err) }))
63
  }
64
  }, [])
65
 
 
71
  try {
72
  await fetchDocuments()
73
  } catch (err) {
74
+ toast.error(t('documentPanel.documentManager.errors.scanProgressFailed', { error: errorMessage(err) }))
75
  }
76
  }, 5000)
77
  return () => clearInterval(interval)
 
80
  return (
81
  <Card className="!size-full !rounded-none !border-none">
82
  <CardHeader>
83
+ <CardTitle className="text-lg">{t('documentPanel.documentManager.title')}</CardTitle>
84
  </CardHeader>
85
  <CardContent className="space-y-4">
86
  <div className="flex gap-2">
 
88
  variant="outline"
89
  onClick={scanDocuments}
90
  side="bottom"
91
+ tooltip={t('documentPanel.documentManager.scanTooltip')}
92
  size="sm"
93
  >
94
+ <RefreshCwIcon /> {t('documentPanel.documentManager.scanButton')}
95
  </Button>
96
  <div className="flex-1" />
97
  <ClearDocumentsDialog />
 
100
 
101
  <Card>
102
  <CardHeader>
103
+ <CardTitle>{t('documentPanel.documentManager.uploadedTitle')}</CardTitle>
104
+ <CardDescription>{t('documentPanel.documentManager.uploadedDescription')}</CardDescription>
105
  </CardHeader>
106
 
107
  <CardContent>
108
  {!docs && (
109
  <EmptyCard
110
+ title={t('documentPanel.documentManager.emptyTitle')}
111
+ description={t('documentPanel.documentManager.emptyDescription')}
112
  />
113
  )}
114
  {docs && (
115
  <Table>
116
  <TableHeader>
117
  <TableRow>
118
+ <TableHead>{t('documentPanel.documentManager.columns.id')}</TableHead>
119
+ <TableHead>{t('documentPanel.documentManager.columns.summary')}</TableHead>
120
+ <TableHead>{t('documentPanel.documentManager.columns.status')}</TableHead>
121
+ <TableHead>{t('documentPanel.documentManager.columns.length')}</TableHead>
122
+ <TableHead>{t('documentPanel.documentManager.columns.chunks')}</TableHead>
123
+ <TableHead>{t('documentPanel.documentManager.columns.created')}</TableHead>
124
+ <TableHead>{t('documentPanel.documentManager.columns.updated')}</TableHead>
125
+ <TableHead>{t('documentPanel.documentManager.columns.metadata')}</TableHead>
126
  </TableRow>
127
  </TableHeader>
128
  <TableBody className="text-sm">
 
139
  </TableCell>
140
  <TableCell>
141
  {status === 'processed' && (
142
+ <span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
143
  )}
144
  {status === 'processing' && (
145
+ <span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
146
  )}
147
+ {status === 'pending' && <span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>}
148
+ {status === 'failed' && <span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>}
149
  {doc.error && (
150
  <span className="ml-2 text-red-500" title={doc.error}>
151
  ⚠️
lightrag_webui/src/features/RetrievalTesting.tsx CHANGED
@@ -8,8 +8,10 @@ import { useDebounce } from '@/hooks/useDebounce'
8
  import QuerySettings from '@/components/retrieval/QuerySettings'
9
  import { ChatMessage, MessageWithError } from '@/components/retrieval/ChatMessage'
10
  import { EraserIcon, SendIcon } from 'lucide-react'
 
11
 
12
  export default function RetrievalTesting() {
 
13
  const [messages, setMessages] = useState<MessageWithError[]>(
14
  () => useSettingsStore.getState().retrievalHistory || []
15
  )
@@ -89,7 +91,7 @@ export default function RetrievalTesting() {
89
  }
90
  } catch (err) {
91
  // Handle error
92
- updateAssistantMessage(`Error: Failed to get response\n${errorMessage(err)}`, true)
93
  } finally {
94
  // Clear loading and add messages to state
95
  setIsLoading(false)
@@ -98,7 +100,7 @@ export default function RetrievalTesting() {
98
  .setRetrievalHistory([...prevMessages, userMessage, assistantMessage])
99
  }
100
  },
101
- [inputValue, isLoading, messages, setMessages]
102
  )
103
 
104
  const debouncedMessages = useDebounce(messages, 100)
@@ -117,7 +119,7 @@ export default function RetrievalTesting() {
117
  <div className="flex min-h-0 flex-1 flex-col gap-2">
118
  {messages.length === 0 ? (
119
  <div className="text-muted-foreground flex h-full items-center justify-center text-lg">
120
- Start a retrieval by typing your query below
121
  </div>
122
  ) : (
123
  messages.map((message, idx) => (
@@ -143,18 +145,18 @@ export default function RetrievalTesting() {
143
  size="sm"
144
  >
145
  <EraserIcon />
146
- Clear
147
  </Button>
148
  <Input
149
  className="flex-1"
150
  value={inputValue}
151
  onChange={(e) => setInputValue(e.target.value)}
152
- placeholder="Type your query..."
153
  disabled={isLoading}
154
  />
155
  <Button type="submit" variant="default" disabled={isLoading} size="sm">
156
  <SendIcon />
157
- Send
158
  </Button>
159
  </form>
160
  </div>
 
8
  import QuerySettings from '@/components/retrieval/QuerySettings'
9
  import { ChatMessage, MessageWithError } from '@/components/retrieval/ChatMessage'
10
  import { EraserIcon, SendIcon } from 'lucide-react'
11
+ import { useTranslation } from 'react-i18next'
12
 
13
  export default function RetrievalTesting() {
14
+ const { t } = useTranslation()
15
  const [messages, setMessages] = useState<MessageWithError[]>(
16
  () => useSettingsStore.getState().retrievalHistory || []
17
  )
 
91
  }
92
  } catch (err) {
93
  // Handle error
94
+ updateAssistantMessage(`${t('retrievePanel.retrieval.error')}\n${errorMessage(err)}`, true)
95
  } finally {
96
  // Clear loading and add messages to state
97
  setIsLoading(false)
 
100
  .setRetrievalHistory([...prevMessages, userMessage, assistantMessage])
101
  }
102
  },
103
+ [inputValue, isLoading, messages, setMessages, t]
104
  )
105
 
106
  const debouncedMessages = useDebounce(messages, 100)
 
119
  <div className="flex min-h-0 flex-1 flex-col gap-2">
120
  {messages.length === 0 ? (
121
  <div className="text-muted-foreground flex h-full items-center justify-center text-lg">
122
+ {t('retrievePanel.retrieval.startPrompt')}
123
  </div>
124
  ) : (
125
  messages.map((message, idx) => (
 
145
  size="sm"
146
  >
147
  <EraserIcon />
148
+ {t('retrievePanel.retrieval.clear')}
149
  </Button>
150
  <Input
151
  className="flex-1"
152
  value={inputValue}
153
  onChange={(e) => setInputValue(e.target.value)}
154
+ placeholder={t('retrievePanel.retrieval.placeholder')}
155
  disabled={isLoading}
156
  />
157
  <Button type="submit" variant="default" disabled={isLoading} size="sm">
158
  <SendIcon />
159
+ {t('retrievePanel.retrieval.send')}
160
  </Button>
161
  </form>
162
  </div>
lightrag_webui/src/features/SiteHeader.tsx CHANGED
@@ -4,6 +4,7 @@ import ThemeToggle from '@/components/ThemeToggle'
4
  import { TabsList, TabsTrigger } from '@/components/ui/Tabs'
5
  import { useSettingsStore } from '@/stores/settings'
6
  import { cn } from '@/lib/utils'
 
7
 
8
  import { ZapIcon, GithubIcon } from 'lucide-react'
9
 
@@ -29,21 +30,22 @@ function NavigationTab({ value, currentTab, children }: NavigationTabProps) {
29
 
30
  function TabsNavigation() {
31
  const currentTab = useSettingsStore.use.currentTab()
 
32
 
33
  return (
34
  <div className="flex h-8 self-center">
35
  <TabsList className="h-full gap-2">
36
  <NavigationTab value="documents" currentTab={currentTab}>
37
- Documents
38
  </NavigationTab>
39
  <NavigationTab value="knowledge-graph" currentTab={currentTab}>
40
- Knowledge Graph
41
  </NavigationTab>
42
  <NavigationTab value="retrieval" currentTab={currentTab}>
43
- Retrieval
44
  </NavigationTab>
45
  <NavigationTab value="api" currentTab={currentTab}>
46
- API
47
  </NavigationTab>
48
  </TabsList>
49
  </div>
@@ -51,6 +53,7 @@ function TabsNavigation() {
51
  }
52
 
53
  export default function SiteHeader() {
 
54
  return (
55
  <header className="border-border/40 bg-background/95 supports-[backdrop-filter]:bg-background/60 sticky top-0 z-50 flex h-10 w-full border-b px-4 backdrop-blur">
56
  <a href="/" className="mr-6 flex items-center gap-2">
@@ -64,7 +67,7 @@ export default function SiteHeader() {
64
  </div>
65
 
66
  <nav className="flex items-center">
67
- <Button variant="ghost" size="icon" side="bottom" tooltip="Project Repository">
68
  <a href={SiteInfo.github} target="_blank" rel="noopener noreferrer">
69
  <GithubIcon className="size-4" aria-hidden="true" />
70
  </a>
 
4
  import { TabsList, TabsTrigger } from '@/components/ui/Tabs'
5
  import { useSettingsStore } from '@/stores/settings'
6
  import { cn } from '@/lib/utils'
7
+ import { useTranslation } from 'react-i18next'
8
 
9
  import { ZapIcon, GithubIcon } from 'lucide-react'
10
 
 
30
 
31
  function TabsNavigation() {
32
  const currentTab = useSettingsStore.use.currentTab()
33
+ const { t } = useTranslation()
34
 
35
  return (
36
  <div className="flex h-8 self-center">
37
  <TabsList className="h-full gap-2">
38
  <NavigationTab value="documents" currentTab={currentTab}>
39
+ {t('header.documents')}
40
  </NavigationTab>
41
  <NavigationTab value="knowledge-graph" currentTab={currentTab}>
42
+ {t('header.knowledgeGraph')}
43
  </NavigationTab>
44
  <NavigationTab value="retrieval" currentTab={currentTab}>
45
+ {t('header.retrieval')}
46
  </NavigationTab>
47
  <NavigationTab value="api" currentTab={currentTab}>
48
+ {t('header.api')}
49
  </NavigationTab>
50
  </TabsList>
51
  </div>
 
53
  }
54
 
55
  export default function SiteHeader() {
56
+ const { t } = useTranslation()
57
  return (
58
  <header className="border-border/40 bg-background/95 supports-[backdrop-filter]:bg-background/60 sticky top-0 z-50 flex h-10 w-full border-b px-4 backdrop-blur">
59
  <a href="/" className="mr-6 flex items-center gap-2">
 
67
  </div>
68
 
69
  <nav className="flex items-center">
70
+ <Button variant="ghost" size="icon" side="bottom" tooltip={t('header.projectRepository')}>
71
  <a href={SiteInfo.github} target="_blank" rel="noopener noreferrer">
72
  <GithubIcon className="size-4" aria-hidden="true" />
73
  </a>
lightrag_webui/src/i18n.js ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import i18n from "i18next";
2
+ import { initReactI18next } from "react-i18next";
3
+
4
+ import en from "./locales/en.json";
5
+ import zh from "./locales/zh.json";
6
+
7
+ i18n
8
+ .use(initReactI18next)
9
+ .init({
10
+ resources: {
11
+ en: { translation: en },
12
+ zh: { translation: zh }
13
+ },
14
+ lng: "en", // default
15
+ fallbackLng: "en",
16
+ interpolation: {
17
+ escapeValue: false
18
+ }
19
+ });
20
+
21
+ export default i18n;
lightrag_webui/src/locales/en.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "header": {
3
+ "documents": "Documents",
4
+ "knowledgeGraph": "Knowledge Graph",
5
+ "retrieval": "Retrieval",
6
+ "api": "API",
7
+ "projectRepository": "Project Repository",
8
+ "themeToggle": {
9
+ "switchToLight": "Switch to light theme",
10
+ "switchToDark": "Switch to dark theme"
11
+ }
12
+ },
13
+ "documentPanel": {
14
+ "clearDocuments": {
15
+ "button": "Clear",
16
+ "tooltip": "Clear documents",
17
+ "title": "Clear Documents",
18
+ "confirm": "Do you really want to clear all documents?",
19
+ "confirmButton": "YES",
20
+ "success": "Documents cleared successfully",
21
+ "failed": "Clear Documents Failed:\n{{message}}",
22
+ "error": "Clear Documents Failed:\n{{error}}"
23
+ },
24
+ "uploadDocuments": {
25
+ "button": "Upload",
26
+ "tooltip": "Upload documents",
27
+ "title": "Upload Documents",
28
+ "description": "Drag and drop your documents here or click to browse.",
29
+ "uploading": "Uploading {{name}}: {{percent}}%",
30
+ "success": "Upload Success:\n{{name}} uploaded successfully",
31
+ "failed": "Upload Failed:\n{{name}}\n{{message}}",
32
+ "error": "Upload Failed:\n{{name}}\n{{error}}",
33
+ "generalError": "Upload Failed\n{{error}}",
34
+ "fileTypes": "Supported types: TXT, MD, DOCX, PDF, PPTX, RTF, ODT, EPUB, HTML, HTM, TEX, JSON, XML, YAML, YML, CSV, LOG, CONF, INI, PROPERTIES, SQL, BAT, SH, C, CPP, PY, JAVA, JS, TS, SWIFT, GO, RB, PHP, CSS, SCSS, LESS"
35
+ },
36
+ "documentManager": {
37
+ "title": "Document Management",
38
+ "scanButton": "Scan",
39
+ "scanTooltip": "Scan documents",
40
+ "uploadedTitle": "Uploaded Documents",
41
+ "uploadedDescription": "List of uploaded documents and their statuses.",
42
+ "emptyTitle": "No Documents",
43
+ "emptyDescription": "There are no uploaded documents yet.",
44
+ "columns": {
45
+ "id": "ID",
46
+ "summary": "Summary",
47
+ "status": "Status",
48
+ "length": "Length",
49
+ "chunks": "Chunks",
50
+ "created": "Created",
51
+ "updated": "Updated",
52
+ "metadata": "Metadata"
53
+ },
54
+ "status": {
55
+ "completed": "Completed",
56
+ "processing": "Processing",
57
+ "pending": "Pending",
58
+ "failed": "Failed"
59
+ },
60
+ "errors": {
61
+ "loadFailed": "Failed to load documents\n{{error}}",
62
+ "scanFailed": "Failed to scan documents\n{{error}}",
63
+ "scanProgressFailed": "Failed to get scan progress\n{{error}}"
64
+ }
65
+ }
66
+ },
67
+ "graphPanel": {
68
+ "sideBar": {
69
+ "settings": {
70
+ "settings": "Settings",
71
+ "healthCheck": "Health Check",
72
+ "showPropertyPanel": "Show Property Panel",
73
+ "showSearchBar": "Show Search Bar",
74
+ "showNodeLabel": "Show Node Label",
75
+ "nodeDraggable": "Node Draggable",
76
+ "showEdgeLabel": "Show Edge Label",
77
+ "hideUnselectedEdges": "Hide Unselected Edges",
78
+ "edgeEvents": "Edge Events",
79
+ "maxQueryDepth": "Max Query Depth",
80
+ "minDegree": "Minimum Degree",
81
+ "maxLayoutIterations": "Max Layout Iterations",
82
+ "apiKey": "API Key",
83
+ "enterYourAPIkey": "Enter your API key",
84
+ "save": "Save"
85
+ },
86
+
87
+ "zoomControl": {
88
+ "zoomIn": "Zoom In",
89
+ "zoomOut": "Zoom Out",
90
+ "resetZoom": "Reset Zoom"
91
+ },
92
+
93
+ "layoutsControl": {
94
+ "startAnimation": "Start the layout animation",
95
+ "stopAnimation": "Stop the layout animation",
96
+ "layoutGraph": "Layout Graph",
97
+ "layouts": {
98
+ "Circular": "Circular",
99
+ "Circlepack": "Circlepack",
100
+ "Random": "Random",
101
+ "Noverlaps": "Noverlaps",
102
+ "Force Directed": "Force Directed",
103
+ "Force Atlas": "Force Atlas"
104
+ }
105
+ },
106
+
107
+ "fullScreenControl": {
108
+ "fullScreen": "Full Screen",
109
+ "windowed": "Windowed"
110
+ }
111
+ },
112
+ "statusIndicator": {
113
+ "connected": "Connected",
114
+ "disconnected": "Disconnected"
115
+ },
116
+ "statusCard": {
117
+ "unavailable": "Status information unavailable",
118
+ "storageInfo": "Storage Info",
119
+ "workingDirectory": "Working Directory",
120
+ "inputDirectory": "Input Directory",
121
+ "llmConfig": "LLM Configuration",
122
+ "llmBinding": "LLM Binding",
123
+ "llmBindingHost": "LLM Binding Host",
124
+ "llmModel": "LLM Model",
125
+ "maxTokens": "Max Tokens",
126
+ "embeddingConfig": "Embedding Configuration",
127
+ "embeddingBinding": "Embedding Binding",
128
+ "embeddingBindingHost": "Embedding Binding Host",
129
+ "embeddingModel": "Embedding Model",
130
+ "storageConfig": "Storage Configuration",
131
+ "kvStorage": "KV Storage",
132
+ "docStatusStorage": "Doc Status Storage",
133
+ "graphStorage": "Graph Storage",
134
+ "vectorStorage": "Vector Storage"
135
+ },
136
+ "propertiesView": {
137
+ "node": {
138
+ "title": "Node",
139
+ "id": "ID",
140
+ "labels": "Labels",
141
+ "degree": "Degree",
142
+ "properties": "Properties",
143
+ "relationships": "Relationships"
144
+ },
145
+ "edge": {
146
+ "title": "Relationship",
147
+ "id": "ID",
148
+ "type": "Type",
149
+ "source": "Source",
150
+ "target": "Target",
151
+ "properties": "Properties"
152
+ }
153
+ },
154
+ "search": {
155
+ "placeholder": "Search nodes...",
156
+ "message": "And {count} others"
157
+ },
158
+ "graphLabels": {
159
+ "selectTooltip": "Select query label",
160
+ "noLabels": "No labels found",
161
+ "label": "Label",
162
+ "placeholder": "Search labels...",
163
+ "andOthers": "And {count} others"
164
+ }
165
+ },
166
+ "retrievePanel": {
167
+ "chatMessage": {
168
+ "copyTooltip": "Copy to clipboard",
169
+ "copyError": "Failed to copy text to clipboard"
170
+ },
171
+ "retrieval": {
172
+ "startPrompt": "Start a retrieval by typing your query below",
173
+ "clear": "Clear",
174
+ "send": "Send",
175
+ "placeholder": "Type your query...",
176
+ "error": "Error: Failed to get response"
177
+ },
178
+ "querySettings": {
179
+ "parametersTitle": "Parameters",
180
+ "parametersDescription": "Configure your query parameters",
181
+
182
+ "queryMode": "Query Mode",
183
+ "queryModeTooltip": "Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval",
184
+ "queryModeOptions": {
185
+ "naive": "Naive",
186
+ "local": "Local",
187
+ "global": "Global",
188
+ "hybrid": "Hybrid",
189
+ "mix": "Mix"
190
+ },
191
+
192
+ "responseFormat": "Response Format",
193
+ "responseFormatTooltip": "Defines the response format. Examples:\n• Multiple Paragraphs\n• Single Paragraph\n• Bullet Points",
194
+ "responseFormatOptions": {
195
+ "multipleParagraphs": "Multiple Paragraphs",
196
+ "singleParagraph": "Single Paragraph",
197
+ "bulletPoints": "Bullet Points"
198
+ },
199
+
200
+ "topK": "Top K Results",
201
+ "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
202
+ "topKPlaceholder": "Number of results",
203
+
204
+ "maxTokensTextUnit": "Max Tokens for Text Unit",
205
+ "maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk",
206
+
207
+ "maxTokensGlobalContext": "Max Tokens for Global Context",
208
+ "maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval",
209
+
210
+ "maxTokensLocalContext": "Max Tokens for Local Context",
211
+ "maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval",
212
+
213
+ "historyTurns": "History Turns",
214
+ "historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
215
+ "historyTurnsPlaceholder": "Number of history turns",
216
+
217
+ "hlKeywords": "High-Level Keywords",
218
+ "hlKeywordsTooltip": "List of high-level keywords to prioritize in retrieval. Separate with commas",
219
+ "hlkeywordsPlaceHolder": "Enter keywords",
220
+
221
+ "llKeywords": "Low-Level Keywords",
222
+ "llKeywordsTooltip": "List of low-level keywords to refine retrieval focus. Separate with commas",
223
+
224
+ "onlyNeedContext": "Only Need Context",
225
+ "onlyNeedContextTooltip": "If True, only returns the retrieved context without generating a response",
226
+
227
+ "onlyNeedPrompt": "Only Need Prompt",
228
+ "onlyNeedPromptTooltip": "If True, only returns the generated prompt without producing a response",
229
+
230
+ "streamResponse": "Stream Response",
231
+ "streamResponseTooltip": "If True, enables streaming output for real-time responses"
232
+ }
233
+ }
234
+ }
lightrag_webui/src/locales/zh.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "header": {
3
+ "documents": "文档",
4
+ "knowledgeGraph": "知识图谱",
5
+ "retrieval": "检索",
6
+ "api": "API",
7
+ "projectRepository": "项目仓库",
8
+ "themeToggle": {
9
+ "switchToLight": "切换到亮色主题",
10
+ "switchToDark": "切换到暗色主题"
11
+ }
12
+ },
13
+ "documentPanel": {
14
+ "clearDocuments": {
15
+ "button": "清除",
16
+ "tooltip": "清除文档",
17
+ "title": "清除文档",
18
+ "confirm": "您确定要清除所有文档吗?",
19
+ "confirmButton": "确定",
20
+ "success": "文档已成功清除",
21
+ "failed": "清除文档失败:\n{{message}}",
22
+ "error": "清除文档失败:\n{{error}}"
23
+ },
24
+ "uploadDocuments": {
25
+ "button": "上传",
26
+ "tooltip": "上传文档",
27
+ "title": "上传文档",
28
+ "description": "拖放文档到此处或点击浏览。",
29
+ "uploading": "正在上传 {{name}}: {{percent}}%",
30
+ "success": "上传成功:\n{{name}} 上传成功",
31
+ "failed": "上传失败:\n{{name}}\n{{message}}",
32
+ "error": "上传失败:\n{{name}}\n{{error}}",
33
+ "generalError": "上传失败\n{{error}}",
34
+ "fileTypes": "支持的文件类型: TXT, MD, DOCX, PDF, PPTX, RTF, ODT, EPUB, HTML, HTM, TEX, JSON, XML, YAML, YML, CSV, LOG, CONF, INI, PROPERTIES, SQL, BAT, SH, C, CPP, PY, JAVA, JS, TS, SWIFT, GO, RB, PHP, CSS, SCSS, LESS"
35
+ },
36
+ "documentManager": {
37
+ "title": "文档管理",
38
+ "scanButton": "扫描",
39
+ "scanTooltip": "扫描文档",
40
+ "uploadedTitle": "已上传文档",
41
+ "uploadedDescription": "已上传文档及其状态列表。",
42
+ "emptyTitle": "暂无文档",
43
+ "emptyDescription": "尚未上传任何文档。",
44
+ "columns": {
45
+ "id": "ID",
46
+ "summary": "摘要",
47
+ "status": "状态",
48
+ "length": "长度",
49
+ "chunks": "分块",
50
+ "created": "创建时间",
51
+ "updated": "更新时间",
52
+ "metadata": "元数据"
53
+ },
54
+ "status": {
55
+ "completed": "已完成",
56
+ "processing": "处理中",
57
+ "pending": "待处理",
58
+ "failed": "失败"
59
+ },
60
+ "errors": {
61
+ "loadFailed": "加载文档失败\n{{error}}",
62
+ "scanFailed": "扫描文档失败\n{{error}}",
63
+ "scanProgressFailed": "获取扫描进度失败\n{{error}}"
64
+ }
65
+ }
66
+ },
67
+ "graphPanel": {
68
+ "sideBar": {
69
+ "settings": {
70
+ "settings": "设置",
71
+ "healthCheck": "健康检查",
72
+ "showPropertyPanel": "显示属性面板",
73
+ "showSearchBar": "显示搜索栏",
74
+ "showNodeLabel": "显示节点标签",
75
+ "nodeDraggable": "节点可拖动",
76
+ "showEdgeLabel": "显示边标签",
77
+ "hideUnselectedEdges": "隐藏未选中边",
78
+ "edgeEvents": "边事件",
79
+ "maxQueryDepth": "最大查询深度",
80
+ "minDegree": "最小度数",
81
+ "maxLayoutIterations": "最大布局迭代次数",
82
+ "apiKey": "API 密钥",
83
+ "enterYourAPIkey": "输入您的 API 密钥",
84
+ "save": "保存"
85
+ },
86
+
87
+ "zoomControl": {
88
+ "zoomIn": "放大",
89
+ "zoomOut": "缩小",
90
+ "resetZoom": "重置缩放"
91
+ },
92
+
93
+ "layoutsControl": {
94
+ "startAnimation": "开始布局动画",
95
+ "stopAnimation": "停止布局动画",
96
+ "layoutGraph": "布局图",
97
+ "layouts": {
98
+ "Circular": "环形布局",
99
+ "Circlepack": "圆形打包布局",
100
+ "Random": "随机布局",
101
+ "Noverlaps": "无重叠布局",
102
+ "Force Directed": "力导向布局",
103
+ "Force Atlas": "力导向图谱布局"
104
+ }
105
+ },
106
+
107
+ "fullScreenControl": {
108
+ "fullScreen": "全屏",
109
+ "windowed": "窗口模式"
110
+ }
111
+ },
112
+ "statusIndicator": {
113
+ "connected": "已连接",
114
+ "disconnected": "未连接"
115
+ },
116
+ "statusCard": {
117
+ "unavailable": "状态信息不可用",
118
+ "storageInfo": "存储信息",
119
+ "workingDirectory": "工作目录",
120
+ "inputDirectory": "输入目录",
121
+ "llmConfig": "LLM 配置",
122
+ "llmBinding": "LLM 绑定",
123
+ "llmBindingHost": "LLM 绑定主机",
124
+ "llmModel": "LLM 模型",
125
+ "maxTokens": "最大 Token 数",
126
+ "embeddingConfig": "嵌入配置",
127
+ "embeddingBinding": "嵌入绑定",
128
+ "embeddingBindingHost": "嵌入绑定主机",
129
+ "embeddingModel": "嵌入模型",
130
+ "storageConfig": "存储配置",
131
+ "kvStorage": "KV 存储",
132
+ "docStatusStorage": "文档状态存储",
133
+ "graphStorage": "图存储",
134
+ "vectorStorage": "向量存储"
135
+ },
136
+ "propertiesView": {
137
+ "node": {
138
+ "title": "节点",
139
+ "id": "ID",
140
+ "labels": "标签",
141
+ "degree": "度数",
142
+ "properties": "属性",
143
+ "relationships": "关系"
144
+ },
145
+ "edge": {
146
+ "title": "关系",
147
+ "id": "ID",
148
+ "type": "类型",
149
+ "source": "源",
150
+ "target": "目标",
151
+ "properties": "属性"
152
+ }
153
+ },
154
+ "search": {
155
+ "placeholder": "搜索节点...",
156
+ "message": "以及其它 {count} 项"
157
+ },
158
+ "graphLabels": {
159
+ "selectTooltip": "选择查询标签",
160
+ "noLabels": "未找到标签",
161
+ "label": "标签",
162
+ "placeholder": "搜索标签...",
163
+ "andOthers": "以及其它 {count} 个"
164
+ }
165
+ },
166
+ "retrievePanel": {
167
+ "chatMessage": {
168
+ "copyTooltip": "复制到剪贴板",
169
+ "copyError": "无法复制文本到剪贴板"
170
+ },
171
+
172
+ "retrieval": {
173
+ "startPrompt": "在下面输入您的查询以开始检索",
174
+ "clear": "清除",
175
+ "send": "发送",
176
+ "placeholder": "输入您的查询...",
177
+ "error": "错误:无法获取响应"
178
+ },
179
+ "querySettings": {
180
+ "parametersTitle": "参数设置",
181
+ "parametersDescription": "配置查询参数",
182
+
183
+ "queryMode": "查询模式",
184
+ "queryModeTooltip": "选择检索策略:\n• 朴素:不使用高级技术的基本搜索\n• 本地:基于上下文的信息检索\n• 全局:利用全局知识库\n• 混合:结合本地和全局检索\n• 综合:集成知识图谱与向量检索",
185
+ "queryModeOptions": {
186
+ "naive": "朴素",
187
+ "local": "本地",
188
+ "global": "全局",
189
+ "hybrid": "混合",
190
+ "mix": "综合"
191
+ },
192
+
193
+ "responseFormat": "响应格式",
194
+ "responseFormatTooltip": "定义响应格式。例如:\n• 多个段落\n• 单个段落\n• 项目符号",
195
+ "responseFormatOptions": {
196
+ "multipleParagraphs": "多个段落",
197
+ "singleParagraph": "单个段落",
198
+ "bulletPoints": "项目符号"
199
+ },
200
+
201
+ "topK": "Top K 结果数",
202
+ "topKTooltip": "要检索的前 K 个项目数量。在“本地”模式下表示实体,在“全局”模式下表示关系",
203
+ "topKPlaceholder": "结果数",
204
+
205
+ "maxTokensTextUnit": "文本单元最大 Token 数",
206
+ "maxTokensTextUnitTooltip": "每个检索到的文本块允许的最大 Token 数",
207
+
208
+ "maxTokensGlobalContext": "全局上下文最大 Token 数",
209
+ "maxTokensGlobalContextTooltip": "在全局检索中为关系描述分配的最大 Token 数",
210
+
211
+ "maxTokensLocalContext": "本地上下文最大 Token 数",
212
+ "maxTokensLocalContextTooltip": "在本地检索中为实体描述分配的最大 Token 数",
213
+
214
+ "historyTurns": "历史轮次",
215
+ "historyTurnsTooltip": "在响应上下文中考虑的完整对话轮次(用户-助手对)",
216
+ "historyTurnsPlaceholder": "历史轮次的数量",
217
+
218
+ "hlKeywords": "高级关键词",
219
+ "hlKeywordsTooltip": "检索时优先考虑的高级关键词。请用逗号分隔",
220
+ "hlkeywordsPlaceHolder": "输入关键词",
221
+
222
+ "llKeywords": "低级关键词",
223
+ "llKeywordsTooltip": "用于优化检索焦点的低级关键词。请用逗号分隔",
224
+
225
+ "onlyNeedContext": "仅需要上下文",
226
+ "onlyNeedContextTooltip": "如果为 True,则仅返回检索到的上下文,而不会生成回复",
227
+
228
+ "onlyNeedPrompt": "仅需要提示",
229
+ "onlyNeedPromptTooltip": "如果为 True,则仅返回生成的提示,而不会生成回复",
230
+
231
+ "streamResponse": "流式响应",
232
+ "streamResponseTooltip": "如果为 True,则启用流式输出以获得实时响应"
233
+ }
234
+ }
235
+ }
lightrag_webui/src/main.tsx CHANGED
@@ -2,6 +2,8 @@ import { StrictMode } from 'react'
2
  import { createRoot } from 'react-dom/client'
3
  import './index.css'
4
  import App from './App.tsx'
 
 
5
 
6
  createRoot(document.getElementById('root')!).render(
7
  <StrictMode>
 
2
  import { createRoot } from 'react-dom/client'
3
  import './index.css'
4
  import App from './App.tsx'
5
+ import "./i18n";
6
+
7
 
8
  createRoot(document.getElementById('root')!).render(
9
  <StrictMode>