Merge pull request #1318 from danielaskdd/main
Browse filesAdd ENABLE_LLM_CACHE env support and fix mix_kg_vector_query return value error when only_need_context is enabled
- env.example +2 -1
- lightrag/api/config.py +1 -0
- lightrag/api/lightrag_server.py +3 -0
- lightrag/api/utils_api.py +6 -4
- lightrag/operate.py +8 -1
env.example
CHANGED
|
@@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
|
|
| 40 |
# MAX_TOKEN_ENTITY_DESC=4000
|
| 41 |
|
| 42 |
### Settings for document indexing
|
| 43 |
-
ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
| 44 |
SUMMARY_LANGUAGE=English
|
| 45 |
# CHUNK_SIZE=1200
|
| 46 |
# CHUNK_OVERLAP_SIZE=100
|
|
@@ -64,6 +63,8 @@ TEMPERATURE=0.5
|
|
| 64 |
MAX_ASYNC=4
|
| 65 |
### Max tokens send to LLM (less than context size of the model)
|
| 66 |
MAX_TOKENS=32768
|
|
|
|
|
|
|
| 67 |
|
| 68 |
### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
|
| 69 |
LLM_BINDING=ollama
|
|
|
|
| 40 |
# MAX_TOKEN_ENTITY_DESC=4000
|
| 41 |
|
| 42 |
### Settings for document indexing
|
|
|
|
| 43 |
SUMMARY_LANGUAGE=English
|
| 44 |
# CHUNK_SIZE=1200
|
| 45 |
# CHUNK_OVERLAP_SIZE=100
|
|
|
|
| 63 |
MAX_ASYNC=4
|
| 64 |
### Max tokens send to LLM (less than context size of the model)
|
| 65 |
MAX_TOKENS=32768
|
| 66 |
+
ENABLE_LLM_CACHE=true
|
| 67 |
+
ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
| 68 |
|
| 69 |
### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
|
| 70 |
LLM_BINDING=ollama
|
lightrag/api/config.py
CHANGED
|
@@ -297,6 +297,7 @@ def parse_args() -> argparse.Namespace:
|
|
| 297 |
args.enable_llm_cache_for_extract = get_env_value(
|
| 298 |
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
|
| 299 |
)
|
|
|
|
| 300 |
|
| 301 |
# Inject LLM temperature configuration
|
| 302 |
args.temperature = get_env_value("TEMPERATURE", 0.5, float)
|
|
|
|
| 297 |
args.enable_llm_cache_for_extract = get_env_value(
|
| 298 |
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
|
| 299 |
)
|
| 300 |
+
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
|
| 301 |
|
| 302 |
# Inject LLM temperature configuration
|
| 303 |
args.temperature = get_env_value("TEMPERATURE", 0.5, float)
|
lightrag/api/lightrag_server.py
CHANGED
|
@@ -316,6 +316,7 @@ def create_app(args):
|
|
| 316 |
"cosine_better_than_threshold": args.cosine_threshold
|
| 317 |
},
|
| 318 |
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
|
|
|
| 319 |
embedding_cache_config={
|
| 320 |
"enabled": True,
|
| 321 |
"similarity_threshold": 0.95,
|
|
@@ -347,6 +348,7 @@ def create_app(args):
|
|
| 347 |
"cosine_better_than_threshold": args.cosine_threshold
|
| 348 |
},
|
| 349 |
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
|
|
|
| 350 |
embedding_cache_config={
|
| 351 |
"enabled": True,
|
| 352 |
"similarity_threshold": 0.95,
|
|
@@ -469,6 +471,7 @@ def create_app(args):
|
|
| 469 |
"graph_storage": args.graph_storage,
|
| 470 |
"vector_storage": args.vector_storage,
|
| 471 |
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
|
|
|
| 472 |
},
|
| 473 |
"auth_mode": auth_mode,
|
| 474 |
"pipeline_busy": pipeline_status.get("busy", False),
|
|
|
|
| 316 |
"cosine_better_than_threshold": args.cosine_threshold
|
| 317 |
},
|
| 318 |
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
| 319 |
+
enable_llm_cache=args.enable_llm_cache,
|
| 320 |
embedding_cache_config={
|
| 321 |
"enabled": True,
|
| 322 |
"similarity_threshold": 0.95,
|
|
|
|
| 348 |
"cosine_better_than_threshold": args.cosine_threshold
|
| 349 |
},
|
| 350 |
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
| 351 |
+
enable_llm_cache=args.enable_llm_cache,
|
| 352 |
embedding_cache_config={
|
| 353 |
"enabled": True,
|
| 354 |
"similarity_threshold": 0.95,
|
|
|
|
| 471 |
"graph_storage": args.graph_storage,
|
| 472 |
"vector_storage": args.vector_storage,
|
| 473 |
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
| 474 |
+
"enable_llm_cache": args.enable_llm_cache,
|
| 475 |
},
|
| 476 |
"auth_mode": auth_mode,
|
| 477 |
"pipeline_busy": pipeline_status.get("busy", False),
|
lightrag/api/utils_api.py
CHANGED
|
@@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
| 229 |
ASCIIColors.yellow(f"{args.max_async}")
|
| 230 |
ASCIIColors.white(" ββ Max Tokens: ", end="")
|
| 231 |
ASCIIColors.yellow(f"{args.max_tokens}")
|
| 232 |
-
ASCIIColors.white("
|
| 233 |
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
# Embedding Configuration
|
| 236 |
ASCIIColors.magenta("\nπ Embedding Configuration:")
|
|
@@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
| 257 |
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
|
| 258 |
ASCIIColors.white(" ββ Cosine Threshold: ", end="")
|
| 259 |
ASCIIColors.yellow(f"{args.cosine_threshold}")
|
| 260 |
-
ASCIIColors.white("
|
| 261 |
ASCIIColors.yellow(f"{args.top_k}")
|
| 262 |
-
ASCIIColors.white(" ββ LLM Cache for Extraction Enabled: ", end="")
|
| 263 |
-
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
|
| 264 |
|
| 265 |
# System Configuration
|
| 266 |
ASCIIColors.magenta("\nπΎ Storage Configuration:")
|
|
|
|
| 229 |
ASCIIColors.yellow(f"{args.max_async}")
|
| 230 |
ASCIIColors.white(" ββ Max Tokens: ", end="")
|
| 231 |
ASCIIColors.yellow(f"{args.max_tokens}")
|
| 232 |
+
ASCIIColors.white(" ββ Timeout: ", end="")
|
| 233 |
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
| 234 |
+
ASCIIColors.white(" ββ LLM Cache Enabled: ", end="")
|
| 235 |
+
ASCIIColors.yellow(f"{args.enable_llm_cache}")
|
| 236 |
+
ASCIIColors.white(" ββ LLM Cache for Extraction Enabled: ", end="")
|
| 237 |
+
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
|
| 238 |
|
| 239 |
# Embedding Configuration
|
| 240 |
ASCIIColors.magenta("\nπ Embedding Configuration:")
|
|
|
|
| 261 |
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
|
| 262 |
ASCIIColors.white(" ββ Cosine Threshold: ", end="")
|
| 263 |
ASCIIColors.yellow(f"{args.cosine_threshold}")
|
| 264 |
+
ASCIIColors.white(" ββ Top-K: ", end="")
|
| 265 |
ASCIIColors.yellow(f"{args.top_k}")
|
|
|
|
|
|
|
| 266 |
|
| 267 |
# System Configuration
|
| 268 |
ASCIIColors.magenta("\nπΎ Storage Configuration:")
|
lightrag/operate.py
CHANGED
|
@@ -1072,7 +1072,14 @@ async def mix_kg_vector_query(
|
|
| 1072 |
return PROMPTS["fail_response"]
|
| 1073 |
|
| 1074 |
if query_param.only_need_context:
|
| 1075 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1076 |
|
| 1077 |
# 5. Construct hybrid prompt
|
| 1078 |
sys_prompt = (
|
|
|
|
| 1072 |
return PROMPTS["fail_response"]
|
| 1073 |
|
| 1074 |
if query_param.only_need_context:
|
| 1075 |
+
context_str = f"""
|
| 1076 |
+
-----Knowledge Graph Context-----
|
| 1077 |
+
{kg_context if kg_context else "No relevant knowledge graph information found"}
|
| 1078 |
+
|
| 1079 |
+
-----Vector Context-----
|
| 1080 |
+
{vector_context if vector_context else "No relevant text information found"}
|
| 1081 |
+
""".strip()
|
| 1082 |
+
return context_str
|
| 1083 |
|
| 1084 |
# 5. Construct hybrid prompt
|
| 1085 |
sys_prompt = (
|