yangdx
commited on
Commit
·
5bdce1f
1
Parent(s):
16b9161
Fix cache persistence bugs
Browse files- lightrag/operate.py +76 -69
- lightrag/utils.py +13 -14
lightrag/operate.py
CHANGED
|
@@ -859,20 +859,22 @@ async def kg_query(
|
|
| 859 |
.strip()
|
| 860 |
)
|
| 861 |
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
|
|
|
|
|
|
| 876 |
return response
|
| 877 |
|
| 878 |
|
|
@@ -989,19 +991,21 @@ async def extract_keywords_only(
|
|
| 989 |
"high_level_keywords": hl_keywords,
|
| 990 |
"low_level_keywords": ll_keywords,
|
| 991 |
}
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
|
|
|
|
|
|
| 1005 |
return hl_keywords, ll_keywords
|
| 1006 |
|
| 1007 |
|
|
@@ -1205,20 +1209,21 @@ async def mix_kg_vector_query(
|
|
| 1205 |
.strip()
|
| 1206 |
)
|
| 1207 |
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
-
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
|
|
|
| 1222 |
|
| 1223 |
return response
|
| 1224 |
|
|
@@ -1945,20 +1950,21 @@ async def naive_query(
|
|
| 1945 |
.strip()
|
| 1946 |
)
|
| 1947 |
|
| 1948 |
-
|
| 1949 |
-
|
| 1950 |
-
|
| 1951 |
-
|
| 1952 |
-
|
| 1953 |
-
|
| 1954 |
-
|
| 1955 |
-
|
| 1956 |
-
|
| 1957 |
-
|
| 1958 |
-
|
| 1959 |
-
|
| 1960 |
-
|
| 1961 |
-
|
|
|
|
| 1962 |
|
| 1963 |
return response
|
| 1964 |
|
|
@@ -2093,20 +2099,21 @@ async def kg_query_with_keywords(
|
|
| 2093 |
.strip()
|
| 2094 |
)
|
| 2095 |
|
| 2096 |
-
|
| 2097 |
-
|
| 2098 |
-
|
| 2099 |
-
|
| 2100 |
-
|
| 2101 |
-
|
| 2102 |
-
|
| 2103 |
-
|
| 2104 |
-
|
| 2105 |
-
|
| 2106 |
-
|
| 2107 |
-
|
| 2108 |
-
|
| 2109 |
-
|
|
|
|
| 2110 |
|
| 2111 |
return response
|
| 2112 |
|
|
|
|
| 859 |
.strip()
|
| 860 |
)
|
| 861 |
|
| 862 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
| 863 |
+
# Save to cache
|
| 864 |
+
await save_to_cache(
|
| 865 |
+
hashing_kv,
|
| 866 |
+
CacheData(
|
| 867 |
+
args_hash=args_hash,
|
| 868 |
+
content=response,
|
| 869 |
+
prompt=query,
|
| 870 |
+
quantized=quantized,
|
| 871 |
+
min_val=min_val,
|
| 872 |
+
max_val=max_val,
|
| 873 |
+
mode=query_param.mode,
|
| 874 |
+
cache_type="query",
|
| 875 |
+
),
|
| 876 |
+
)
|
| 877 |
+
|
| 878 |
return response
|
| 879 |
|
| 880 |
|
|
|
|
| 991 |
"high_level_keywords": hl_keywords,
|
| 992 |
"low_level_keywords": ll_keywords,
|
| 993 |
}
|
| 994 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
| 995 |
+
await save_to_cache(
|
| 996 |
+
hashing_kv,
|
| 997 |
+
CacheData(
|
| 998 |
+
args_hash=args_hash,
|
| 999 |
+
content=json.dumps(cache_data),
|
| 1000 |
+
prompt=text,
|
| 1001 |
+
quantized=quantized,
|
| 1002 |
+
min_val=min_val,
|
| 1003 |
+
max_val=max_val,
|
| 1004 |
+
mode=param.mode,
|
| 1005 |
+
cache_type="keywords",
|
| 1006 |
+
),
|
| 1007 |
+
)
|
| 1008 |
+
|
| 1009 |
return hl_keywords, ll_keywords
|
| 1010 |
|
| 1011 |
|
|
|
|
| 1209 |
.strip()
|
| 1210 |
)
|
| 1211 |
|
| 1212 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
| 1213 |
+
# 7. Save cache - Only cache after collecting complete response
|
| 1214 |
+
await save_to_cache(
|
| 1215 |
+
hashing_kv,
|
| 1216 |
+
CacheData(
|
| 1217 |
+
args_hash=args_hash,
|
| 1218 |
+
content=response,
|
| 1219 |
+
prompt=query,
|
| 1220 |
+
quantized=quantized,
|
| 1221 |
+
min_val=min_val,
|
| 1222 |
+
max_val=max_val,
|
| 1223 |
+
mode="mix",
|
| 1224 |
+
cache_type="query",
|
| 1225 |
+
),
|
| 1226 |
+
)
|
| 1227 |
|
| 1228 |
return response
|
| 1229 |
|
|
|
|
| 1950 |
.strip()
|
| 1951 |
)
|
| 1952 |
|
| 1953 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
| 1954 |
+
# Save to cache
|
| 1955 |
+
await save_to_cache(
|
| 1956 |
+
hashing_kv,
|
| 1957 |
+
CacheData(
|
| 1958 |
+
args_hash=args_hash,
|
| 1959 |
+
content=response,
|
| 1960 |
+
prompt=query,
|
| 1961 |
+
quantized=quantized,
|
| 1962 |
+
min_val=min_val,
|
| 1963 |
+
max_val=max_val,
|
| 1964 |
+
mode=query_param.mode,
|
| 1965 |
+
cache_type="query",
|
| 1966 |
+
),
|
| 1967 |
+
)
|
| 1968 |
|
| 1969 |
return response
|
| 1970 |
|
|
|
|
| 2099 |
.strip()
|
| 2100 |
)
|
| 2101 |
|
| 2102 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
| 2103 |
+
# 7. Save cache - 只有在收集完整响应后才缓存
|
| 2104 |
+
await save_to_cache(
|
| 2105 |
+
hashing_kv,
|
| 2106 |
+
CacheData(
|
| 2107 |
+
args_hash=args_hash,
|
| 2108 |
+
content=response,
|
| 2109 |
+
prompt=query,
|
| 2110 |
+
quantized=quantized,
|
| 2111 |
+
min_val=min_val,
|
| 2112 |
+
max_val=max_val,
|
| 2113 |
+
mode=query_param.mode,
|
| 2114 |
+
cache_type="query",
|
| 2115 |
+
),
|
| 2116 |
+
)
|
| 2117 |
|
| 2118 |
return response
|
| 2119 |
|
lightrag/utils.py
CHANGED
|
@@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|
| 731 |
hashing_kv: The key-value storage for caching
|
| 732 |
cache_data: The cache data to save
|
| 733 |
"""
|
| 734 |
-
if not hashing_kv.global_config.get("enable_llm_cache"):
|
| 735 |
-
return
|
| 736 |
-
|
| 737 |
# Skip if storage is None or content is a streaming response
|
| 738 |
if hashing_kv is None or not cache_data.content:
|
| 739 |
return
|
|
@@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|
| 776 |
"original_prompt": cache_data.prompt,
|
| 777 |
}
|
| 778 |
|
|
|
|
|
|
|
| 779 |
# Only upsert if there's actual new content
|
| 780 |
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
| 781 |
|
|
@@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache(
|
|
| 1314 |
|
| 1315 |
res: str = await use_llm_func(input_text, **kwargs)
|
| 1316 |
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
-
|
| 1321 |
-
|
| 1322 |
-
|
| 1323 |
-
|
| 1324 |
-
|
| 1325 |
-
|
| 1326 |
-
)
|
| 1327 |
-
|
| 1328 |
return res
|
| 1329 |
|
| 1330 |
# When cache is disabled, directly call LLM
|
|
|
|
| 731 |
hashing_kv: The key-value storage for caching
|
| 732 |
cache_data: The cache data to save
|
| 733 |
"""
|
|
|
|
|
|
|
|
|
|
| 734 |
# Skip if storage is None or content is a streaming response
|
| 735 |
if hashing_kv is None or not cache_data.content:
|
| 736 |
return
|
|
|
|
| 773 |
"original_prompt": cache_data.prompt,
|
| 774 |
}
|
| 775 |
|
| 776 |
+
logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}")
|
| 777 |
+
|
| 778 |
# Only upsert if there's actual new content
|
| 779 |
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
| 780 |
|
|
|
|
| 1313 |
|
| 1314 |
res: str = await use_llm_func(input_text, **kwargs)
|
| 1315 |
|
| 1316 |
+
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
| 1317 |
+
await save_to_cache(
|
| 1318 |
+
llm_response_cache,
|
| 1319 |
+
CacheData(
|
| 1320 |
+
args_hash=arg_hash,
|
| 1321 |
+
content=res,
|
| 1322 |
+
prompt=_prompt,
|
| 1323 |
+
cache_type=cache_type,
|
| 1324 |
+
),
|
| 1325 |
+
)
|
| 1326 |
+
|
| 1327 |
return res
|
| 1328 |
|
| 1329 |
# When cache is disabled, directly call LLM
|