yangdx commited on
Commit
d71c66a
·
1 Parent(s): 4c55d5d

Fix cache persistence bugs

Browse files
Files changed (2) hide show
  1. lightrag/operate.py +76 -69
  2. lightrag/utils.py +13 -14
lightrag/operate.py CHANGED
@@ -859,20 +859,22 @@ async def kg_query(
859
  .strip()
860
  )
861
 
862
- # Save to cache
863
- await save_to_cache(
864
- hashing_kv,
865
- CacheData(
866
- args_hash=args_hash,
867
- content=response,
868
- prompt=query,
869
- quantized=quantized,
870
- min_val=min_val,
871
- max_val=max_val,
872
- mode=query_param.mode,
873
- cache_type="query",
874
- ),
875
- )
 
 
876
  return response
877
 
878
 
@@ -989,19 +991,21 @@ async def extract_keywords_only(
989
  "high_level_keywords": hl_keywords,
990
  "low_level_keywords": ll_keywords,
991
  }
992
- await save_to_cache(
993
- hashing_kv,
994
- CacheData(
995
- args_hash=args_hash,
996
- content=json.dumps(cache_data),
997
- prompt=text,
998
- quantized=quantized,
999
- min_val=min_val,
1000
- max_val=max_val,
1001
- mode=param.mode,
1002
- cache_type="keywords",
1003
- ),
1004
- )
 
 
1005
  return hl_keywords, ll_keywords
1006
 
1007
 
@@ -1205,20 +1209,21 @@ async def mix_kg_vector_query(
1205
  .strip()
1206
  )
1207
 
1208
- # 7. Save cache - Only cache after collecting complete response
1209
- await save_to_cache(
1210
- hashing_kv,
1211
- CacheData(
1212
- args_hash=args_hash,
1213
- content=response,
1214
- prompt=query,
1215
- quantized=quantized,
1216
- min_val=min_val,
1217
- max_val=max_val,
1218
- mode="mix",
1219
- cache_type="query",
1220
- ),
1221
- )
 
1222
 
1223
  return response
1224
 
@@ -1973,20 +1978,21 @@ async def naive_query(
1973
  .strip()
1974
  )
1975
 
1976
- # Save to cache
1977
- await save_to_cache(
1978
- hashing_kv,
1979
- CacheData(
1980
- args_hash=args_hash,
1981
- content=response,
1982
- prompt=query,
1983
- quantized=quantized,
1984
- min_val=min_val,
1985
- max_val=max_val,
1986
- mode=query_param.mode,
1987
- cache_type="query",
1988
- ),
1989
- )
 
1990
 
1991
  return response
1992
 
@@ -2121,20 +2127,21 @@ async def kg_query_with_keywords(
2121
  .strip()
2122
  )
2123
 
2124
- # 7. Save cache - 只有在收集完整响应后才缓存
2125
- await save_to_cache(
2126
- hashing_kv,
2127
- CacheData(
2128
- args_hash=args_hash,
2129
- content=response,
2130
- prompt=query,
2131
- quantized=quantized,
2132
- min_val=min_val,
2133
- max_val=max_val,
2134
- mode=query_param.mode,
2135
- cache_type="query",
2136
- ),
2137
- )
 
2138
 
2139
  return response
2140
 
 
859
  .strip()
860
  )
861
 
862
+ if hashing_kv.global_config.get("enable_llm_cache"):
863
+ # Save to cache
864
+ await save_to_cache(
865
+ hashing_kv,
866
+ CacheData(
867
+ args_hash=args_hash,
868
+ content=response,
869
+ prompt=query,
870
+ quantized=quantized,
871
+ min_val=min_val,
872
+ max_val=max_val,
873
+ mode=query_param.mode,
874
+ cache_type="query",
875
+ ),
876
+ )
877
+
878
  return response
879
 
880
 
 
991
  "high_level_keywords": hl_keywords,
992
  "low_level_keywords": ll_keywords,
993
  }
994
+ if hashing_kv.global_config.get("enable_llm_cache"):
995
+ await save_to_cache(
996
+ hashing_kv,
997
+ CacheData(
998
+ args_hash=args_hash,
999
+ content=json.dumps(cache_data),
1000
+ prompt=text,
1001
+ quantized=quantized,
1002
+ min_val=min_val,
1003
+ max_val=max_val,
1004
+ mode=param.mode,
1005
+ cache_type="keywords",
1006
+ ),
1007
+ )
1008
+
1009
  return hl_keywords, ll_keywords
1010
 
1011
 
 
1209
  .strip()
1210
  )
1211
 
1212
+ if hashing_kv.global_config.get("enable_llm_cache"):
1213
+ # 7. Save cache - Only cache after collecting complete response
1214
+ await save_to_cache(
1215
+ hashing_kv,
1216
+ CacheData(
1217
+ args_hash=args_hash,
1218
+ content=response,
1219
+ prompt=query,
1220
+ quantized=quantized,
1221
+ min_val=min_val,
1222
+ max_val=max_val,
1223
+ mode="mix",
1224
+ cache_type="query",
1225
+ ),
1226
+ )
1227
 
1228
  return response
1229
 
 
1978
  .strip()
1979
  )
1980
 
1981
+ if hashing_kv.global_config.get("enable_llm_cache"):
1982
+ # Save to cache
1983
+ await save_to_cache(
1984
+ hashing_kv,
1985
+ CacheData(
1986
+ args_hash=args_hash,
1987
+ content=response,
1988
+ prompt=query,
1989
+ quantized=quantized,
1990
+ min_val=min_val,
1991
+ max_val=max_val,
1992
+ mode=query_param.mode,
1993
+ cache_type="query",
1994
+ ),
1995
+ )
1996
 
1997
  return response
1998
 
 
2127
  .strip()
2128
  )
2129
 
2130
+ if hashing_kv.global_config.get("enable_llm_cache"):
2131
+ # 7. Save cache - 只有在收集完整响应后才缓存
2132
+ await save_to_cache(
2133
+ hashing_kv,
2134
+ CacheData(
2135
+ args_hash=args_hash,
2136
+ content=response,
2137
+ prompt=query,
2138
+ quantized=quantized,
2139
+ min_val=min_val,
2140
+ max_val=max_val,
2141
+ mode=query_param.mode,
2142
+ cache_type="query",
2143
+ ),
2144
+ )
2145
 
2146
  return response
2147
 
lightrag/utils.py CHANGED
@@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
731
  hashing_kv: The key-value storage for caching
732
  cache_data: The cache data to save
733
  """
734
- if not hashing_kv.global_config.get("enable_llm_cache"):
735
- return
736
-
737
  # Skip if storage is None or content is a streaming response
738
  if hashing_kv is None or not cache_data.content:
739
  return
@@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
776
  "original_prompt": cache_data.prompt,
777
  }
778
 
 
 
779
  # Only upsert if there's actual new content
780
  await hashing_kv.upsert({cache_data.mode: mode_cache})
781
 
@@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache(
1314
 
1315
  res: str = await use_llm_func(input_text, **kwargs)
1316
 
1317
- # Save to cache
1318
- logger.info(f" == LLM cache == saving {arg_hash}")
1319
- await save_to_cache(
1320
- llm_response_cache,
1321
- CacheData(
1322
- args_hash=arg_hash,
1323
- content=res,
1324
- prompt=_prompt,
1325
- cache_type=cache_type,
1326
- ),
1327
- )
1328
  return res
1329
 
1330
  # When cache is disabled, directly call LLM
 
731
  hashing_kv: The key-value storage for caching
732
  cache_data: The cache data to save
733
  """
 
 
 
734
  # Skip if storage is None or content is a streaming response
735
  if hashing_kv is None or not cache_data.content:
736
  return
 
773
  "original_prompt": cache_data.prompt,
774
  }
775
 
776
+ logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}")
777
+
778
  # Only upsert if there's actual new content
779
  await hashing_kv.upsert({cache_data.mode: mode_cache})
780
 
 
1313
 
1314
  res: str = await use_llm_func(input_text, **kwargs)
1315
 
1316
+ if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
1317
+ await save_to_cache(
1318
+ llm_response_cache,
1319
+ CacheData(
1320
+ args_hash=arg_hash,
1321
+ content=res,
1322
+ prompt=_prompt,
1323
+ cache_type=cache_type,
1324
+ ),
1325
+ )
1326
+
1327
  return res
1328
 
1329
  # When cache is disabled, directly call LLM