yangdx
commited on
Commit
·
d71c66a
1
Parent(s):
4c55d5d
Fix cache persistence bugs
Browse files- lightrag/operate.py +76 -69
- lightrag/utils.py +13 -14
lightrag/operate.py
CHANGED
@@ -859,20 +859,22 @@ async def kg_query(
|
|
859 |
.strip()
|
860 |
)
|
861 |
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
|
868 |
-
|
869 |
-
|
870 |
-
|
871 |
-
|
872 |
-
|
873 |
-
|
874 |
-
|
875 |
-
|
|
|
|
|
876 |
return response
|
877 |
|
878 |
|
@@ -989,19 +991,21 @@ async def extract_keywords_only(
|
|
989 |
"high_level_keywords": hl_keywords,
|
990 |
"low_level_keywords": ll_keywords,
|
991 |
}
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
-
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
|
|
|
|
1005 |
return hl_keywords, ll_keywords
|
1006 |
|
1007 |
|
@@ -1205,20 +1209,21 @@ async def mix_kg_vector_query(
|
|
1205 |
.strip()
|
1206 |
)
|
1207 |
|
1208 |
-
|
1209 |
-
|
1210 |
-
|
1211 |
-
|
1212 |
-
|
1213 |
-
|
1214 |
-
|
1215 |
-
|
1216 |
-
|
1217 |
-
|
1218 |
-
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
|
|
1222 |
|
1223 |
return response
|
1224 |
|
@@ -1973,20 +1978,21 @@ async def naive_query(
|
|
1973 |
.strip()
|
1974 |
)
|
1975 |
|
1976 |
-
|
1977 |
-
|
1978 |
-
|
1979 |
-
|
1980 |
-
|
1981 |
-
|
1982 |
-
|
1983 |
-
|
1984 |
-
|
1985 |
-
|
1986 |
-
|
1987 |
-
|
1988 |
-
|
1989 |
-
|
|
|
1990 |
|
1991 |
return response
|
1992 |
|
@@ -2121,20 +2127,21 @@ async def kg_query_with_keywords(
|
|
2121 |
.strip()
|
2122 |
)
|
2123 |
|
2124 |
-
|
2125 |
-
|
2126 |
-
|
2127 |
-
|
2128 |
-
|
2129 |
-
|
2130 |
-
|
2131 |
-
|
2132 |
-
|
2133 |
-
|
2134 |
-
|
2135 |
-
|
2136 |
-
|
2137 |
-
|
|
|
2138 |
|
2139 |
return response
|
2140 |
|
|
|
859 |
.strip()
|
860 |
)
|
861 |
|
862 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
863 |
+
# Save to cache
|
864 |
+
await save_to_cache(
|
865 |
+
hashing_kv,
|
866 |
+
CacheData(
|
867 |
+
args_hash=args_hash,
|
868 |
+
content=response,
|
869 |
+
prompt=query,
|
870 |
+
quantized=quantized,
|
871 |
+
min_val=min_val,
|
872 |
+
max_val=max_val,
|
873 |
+
mode=query_param.mode,
|
874 |
+
cache_type="query",
|
875 |
+
),
|
876 |
+
)
|
877 |
+
|
878 |
return response
|
879 |
|
880 |
|
|
|
991 |
"high_level_keywords": hl_keywords,
|
992 |
"low_level_keywords": ll_keywords,
|
993 |
}
|
994 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
995 |
+
await save_to_cache(
|
996 |
+
hashing_kv,
|
997 |
+
CacheData(
|
998 |
+
args_hash=args_hash,
|
999 |
+
content=json.dumps(cache_data),
|
1000 |
+
prompt=text,
|
1001 |
+
quantized=quantized,
|
1002 |
+
min_val=min_val,
|
1003 |
+
max_val=max_val,
|
1004 |
+
mode=param.mode,
|
1005 |
+
cache_type="keywords",
|
1006 |
+
),
|
1007 |
+
)
|
1008 |
+
|
1009 |
return hl_keywords, ll_keywords
|
1010 |
|
1011 |
|
|
|
1209 |
.strip()
|
1210 |
)
|
1211 |
|
1212 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
1213 |
+
# 7. Save cache - Only cache after collecting complete response
|
1214 |
+
await save_to_cache(
|
1215 |
+
hashing_kv,
|
1216 |
+
CacheData(
|
1217 |
+
args_hash=args_hash,
|
1218 |
+
content=response,
|
1219 |
+
prompt=query,
|
1220 |
+
quantized=quantized,
|
1221 |
+
min_val=min_val,
|
1222 |
+
max_val=max_val,
|
1223 |
+
mode="mix",
|
1224 |
+
cache_type="query",
|
1225 |
+
),
|
1226 |
+
)
|
1227 |
|
1228 |
return response
|
1229 |
|
|
|
1978 |
.strip()
|
1979 |
)
|
1980 |
|
1981 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
1982 |
+
# Save to cache
|
1983 |
+
await save_to_cache(
|
1984 |
+
hashing_kv,
|
1985 |
+
CacheData(
|
1986 |
+
args_hash=args_hash,
|
1987 |
+
content=response,
|
1988 |
+
prompt=query,
|
1989 |
+
quantized=quantized,
|
1990 |
+
min_val=min_val,
|
1991 |
+
max_val=max_val,
|
1992 |
+
mode=query_param.mode,
|
1993 |
+
cache_type="query",
|
1994 |
+
),
|
1995 |
+
)
|
1996 |
|
1997 |
return response
|
1998 |
|
|
|
2127 |
.strip()
|
2128 |
)
|
2129 |
|
2130 |
+
if hashing_kv.global_config.get("enable_llm_cache"):
|
2131 |
+
# 7. Save cache - 只有在收集完整响应后才缓存
|
2132 |
+
await save_to_cache(
|
2133 |
+
hashing_kv,
|
2134 |
+
CacheData(
|
2135 |
+
args_hash=args_hash,
|
2136 |
+
content=response,
|
2137 |
+
prompt=query,
|
2138 |
+
quantized=quantized,
|
2139 |
+
min_val=min_val,
|
2140 |
+
max_val=max_val,
|
2141 |
+
mode=query_param.mode,
|
2142 |
+
cache_type="query",
|
2143 |
+
),
|
2144 |
+
)
|
2145 |
|
2146 |
return response
|
2147 |
|
lightrag/utils.py
CHANGED
@@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|
731 |
hashing_kv: The key-value storage for caching
|
732 |
cache_data: The cache data to save
|
733 |
"""
|
734 |
-
if not hashing_kv.global_config.get("enable_llm_cache"):
|
735 |
-
return
|
736 |
-
|
737 |
# Skip if storage is None or content is a streaming response
|
738 |
if hashing_kv is None or not cache_data.content:
|
739 |
return
|
@@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|
776 |
"original_prompt": cache_data.prompt,
|
777 |
}
|
778 |
|
|
|
|
|
779 |
# Only upsert if there's actual new content
|
780 |
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
781 |
|
@@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache(
|
|
1314 |
|
1315 |
res: str = await use_llm_func(input_text, **kwargs)
|
1316 |
|
1317 |
-
|
1318 |
-
|
1319 |
-
|
1320 |
-
|
1321 |
-
|
1322 |
-
|
1323 |
-
|
1324 |
-
|
1325 |
-
|
1326 |
-
)
|
1327 |
-
|
1328 |
return res
|
1329 |
|
1330 |
# When cache is disabled, directly call LLM
|
|
|
731 |
hashing_kv: The key-value storage for caching
|
732 |
cache_data: The cache data to save
|
733 |
"""
|
|
|
|
|
|
|
734 |
# Skip if storage is None or content is a streaming response
|
735 |
if hashing_kv is None or not cache_data.content:
|
736 |
return
|
|
|
773 |
"original_prompt": cache_data.prompt,
|
774 |
}
|
775 |
|
776 |
+
logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}")
|
777 |
+
|
778 |
# Only upsert if there's actual new content
|
779 |
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
780 |
|
|
|
1313 |
|
1314 |
res: str = await use_llm_func(input_text, **kwargs)
|
1315 |
|
1316 |
+
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
1317 |
+
await save_to_cache(
|
1318 |
+
llm_response_cache,
|
1319 |
+
CacheData(
|
1320 |
+
args_hash=arg_hash,
|
1321 |
+
content=res,
|
1322 |
+
prompt=_prompt,
|
1323 |
+
cache_type=cache_type,
|
1324 |
+
),
|
1325 |
+
)
|
1326 |
+
|
1327 |
return res
|
1328 |
|
1329 |
# When cache is disabled, directly call LLM
|