Daniel.y commited on
Commit
979410d
Β·
unverified Β·
2 Parent(s): cf4f38f 2a4661a

Merge pull request #1318 from danielaskdd/main

Browse files

Add ENABLE_LLM_CACHE env support and fix mix_kg_vector_query return value error when only_need_context is enabled

env.example CHANGED
@@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
40
  # MAX_TOKEN_ENTITY_DESC=4000
41
 
42
  ### Settings for document indexing
43
- ENABLE_LLM_CACHE_FOR_EXTRACT=true
44
  SUMMARY_LANGUAGE=English
45
  # CHUNK_SIZE=1200
46
  # CHUNK_OVERLAP_SIZE=100
@@ -64,6 +63,8 @@ TEMPERATURE=0.5
64
  MAX_ASYNC=4
65
  ### Max tokens send to LLM (less than context size of the model)
66
  MAX_TOKENS=32768
 
 
67
 
68
  ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
69
  LLM_BINDING=ollama
 
40
  # MAX_TOKEN_ENTITY_DESC=4000
41
 
42
  ### Settings for document indexing
 
43
  SUMMARY_LANGUAGE=English
44
  # CHUNK_SIZE=1200
45
  # CHUNK_OVERLAP_SIZE=100
 
63
  MAX_ASYNC=4
64
  ### Max tokens send to LLM (less than context size of the model)
65
  MAX_TOKENS=32768
66
+ ENABLE_LLM_CACHE=true
67
+ ENABLE_LLM_CACHE_FOR_EXTRACT=true
68
 
69
  ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
70
  LLM_BINDING=ollama
lightrag/api/config.py CHANGED
@@ -297,6 +297,7 @@ def parse_args() -> argparse.Namespace:
297
  args.enable_llm_cache_for_extract = get_env_value(
298
  "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
299
  )
 
300
 
301
  # Inject LLM temperature configuration
302
  args.temperature = get_env_value("TEMPERATURE", 0.5, float)
 
297
  args.enable_llm_cache_for_extract = get_env_value(
298
  "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
299
  )
300
+ args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
301
 
302
  # Inject LLM temperature configuration
303
  args.temperature = get_env_value("TEMPERATURE", 0.5, float)
lightrag/api/lightrag_server.py CHANGED
@@ -316,6 +316,7 @@ def create_app(args):
316
  "cosine_better_than_threshold": args.cosine_threshold
317
  },
318
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
 
319
  embedding_cache_config={
320
  "enabled": True,
321
  "similarity_threshold": 0.95,
@@ -347,6 +348,7 @@ def create_app(args):
347
  "cosine_better_than_threshold": args.cosine_threshold
348
  },
349
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
 
350
  embedding_cache_config={
351
  "enabled": True,
352
  "similarity_threshold": 0.95,
@@ -469,6 +471,7 @@ def create_app(args):
469
  "graph_storage": args.graph_storage,
470
  "vector_storage": args.vector_storage,
471
  "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
 
472
  },
473
  "auth_mode": auth_mode,
474
  "pipeline_busy": pipeline_status.get("busy", False),
 
316
  "cosine_better_than_threshold": args.cosine_threshold
317
  },
318
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
319
+ enable_llm_cache=args.enable_llm_cache,
320
  embedding_cache_config={
321
  "enabled": True,
322
  "similarity_threshold": 0.95,
 
348
  "cosine_better_than_threshold": args.cosine_threshold
349
  },
350
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
351
+ enable_llm_cache=args.enable_llm_cache,
352
  embedding_cache_config={
353
  "enabled": True,
354
  "similarity_threshold": 0.95,
 
471
  "graph_storage": args.graph_storage,
472
  "vector_storage": args.vector_storage,
473
  "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
474
+ "enable_llm_cache": args.enable_llm_cache,
475
  },
476
  "auth_mode": auth_mode,
477
  "pipeline_busy": pipeline_status.get("busy", False),
lightrag/api/utils_api.py CHANGED
@@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
229
  ASCIIColors.yellow(f"{args.max_async}")
230
  ASCIIColors.white(" β”œβ”€ Max Tokens: ", end="")
231
  ASCIIColors.yellow(f"{args.max_tokens}")
232
- ASCIIColors.white(" └─ Timeout: ", end="")
233
  ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
 
 
 
 
234
 
235
  # Embedding Configuration
236
  ASCIIColors.magenta("\nπŸ“Š Embedding Configuration:")
@@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
257
  ASCIIColors.yellow(f"{args.chunk_overlap_size}")
258
  ASCIIColors.white(" β”œβ”€ Cosine Threshold: ", end="")
259
  ASCIIColors.yellow(f"{args.cosine_threshold}")
260
- ASCIIColors.white(" β”œβ”€ Top-K: ", end="")
261
  ASCIIColors.yellow(f"{args.top_k}")
262
- ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
263
- ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
264
 
265
  # System Configuration
266
  ASCIIColors.magenta("\nπŸ’Ύ Storage Configuration:")
 
229
  ASCIIColors.yellow(f"{args.max_async}")
230
  ASCIIColors.white(" β”œβ”€ Max Tokens: ", end="")
231
  ASCIIColors.yellow(f"{args.max_tokens}")
232
+ ASCIIColors.white(" β”œβ”€ Timeout: ", end="")
233
  ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
234
+ ASCIIColors.white(" β”œβ”€ LLM Cache Enabled: ", end="")
235
+ ASCIIColors.yellow(f"{args.enable_llm_cache}")
236
+ ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
237
+ ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
238
 
239
  # Embedding Configuration
240
  ASCIIColors.magenta("\nπŸ“Š Embedding Configuration:")
 
261
  ASCIIColors.yellow(f"{args.chunk_overlap_size}")
262
  ASCIIColors.white(" β”œβ”€ Cosine Threshold: ", end="")
263
  ASCIIColors.yellow(f"{args.cosine_threshold}")
264
+ ASCIIColors.white(" └─ Top-K: ", end="")
265
  ASCIIColors.yellow(f"{args.top_k}")
 
 
266
 
267
  # System Configuration
268
  ASCIIColors.magenta("\nπŸ’Ύ Storage Configuration:")
lightrag/operate.py CHANGED
@@ -1072,7 +1072,14 @@ async def mix_kg_vector_query(
1072
  return PROMPTS["fail_response"]
1073
 
1074
  if query_param.only_need_context:
1075
- return {"kg_context": kg_context, "vector_context": vector_context}
 
 
 
 
 
 
 
1076
 
1077
  # 5. Construct hybrid prompt
1078
  sys_prompt = (
 
1072
  return PROMPTS["fail_response"]
1073
 
1074
  if query_param.only_need_context:
1075
+ context_str = f"""
1076
+ -----Knowledge Graph Context-----
1077
+ {kg_context if kg_context else "No relevant knowledge graph information found"}
1078
+
1079
+ -----Vector Context-----
1080
+ {vector_context if vector_context else "No relevant text information found"}
1081
+ """.strip()
1082
+ return context_str
1083
 
1084
  # 5. Construct hybrid prompt
1085
  sys_prompt = (