yangdx commited on
Commit
5bdce1f
·
1 Parent(s): 16b9161

Fix cache persistence bugs

Browse files
Files changed (2) hide show
  1. lightrag/operate.py +76 -69
  2. lightrag/utils.py +13 -14
lightrag/operate.py CHANGED
@@ -859,20 +859,22 @@ async def kg_query(
859
  .strip()
860
  )
861
 
862
- # Save to cache
863
- await save_to_cache(
864
- hashing_kv,
865
- CacheData(
866
- args_hash=args_hash,
867
- content=response,
868
- prompt=query,
869
- quantized=quantized,
870
- min_val=min_val,
871
- max_val=max_val,
872
- mode=query_param.mode,
873
- cache_type="query",
874
- ),
875
- )
 
 
876
  return response
877
 
878
 
@@ -989,19 +991,21 @@ async def extract_keywords_only(
989
  "high_level_keywords": hl_keywords,
990
  "low_level_keywords": ll_keywords,
991
  }
992
- await save_to_cache(
993
- hashing_kv,
994
- CacheData(
995
- args_hash=args_hash,
996
- content=json.dumps(cache_data),
997
- prompt=text,
998
- quantized=quantized,
999
- min_val=min_val,
1000
- max_val=max_val,
1001
- mode=param.mode,
1002
- cache_type="keywords",
1003
- ),
1004
- )
 
 
1005
  return hl_keywords, ll_keywords
1006
 
1007
 
@@ -1205,20 +1209,21 @@ async def mix_kg_vector_query(
1205
  .strip()
1206
  )
1207
 
1208
- # 7. Save cache - Only cache after collecting complete response
1209
- await save_to_cache(
1210
- hashing_kv,
1211
- CacheData(
1212
- args_hash=args_hash,
1213
- content=response,
1214
- prompt=query,
1215
- quantized=quantized,
1216
- min_val=min_val,
1217
- max_val=max_val,
1218
- mode="mix",
1219
- cache_type="query",
1220
- ),
1221
- )
 
1222
 
1223
  return response
1224
 
@@ -1945,20 +1950,21 @@ async def naive_query(
1945
  .strip()
1946
  )
1947
 
1948
- # Save to cache
1949
- await save_to_cache(
1950
- hashing_kv,
1951
- CacheData(
1952
- args_hash=args_hash,
1953
- content=response,
1954
- prompt=query,
1955
- quantized=quantized,
1956
- min_val=min_val,
1957
- max_val=max_val,
1958
- mode=query_param.mode,
1959
- cache_type="query",
1960
- ),
1961
- )
 
1962
 
1963
  return response
1964
 
@@ -2093,20 +2099,21 @@ async def kg_query_with_keywords(
2093
  .strip()
2094
  )
2095
 
2096
- # 7. Save cache - 只有在收集完整响应后才缓存
2097
- await save_to_cache(
2098
- hashing_kv,
2099
- CacheData(
2100
- args_hash=args_hash,
2101
- content=response,
2102
- prompt=query,
2103
- quantized=quantized,
2104
- min_val=min_val,
2105
- max_val=max_val,
2106
- mode=query_param.mode,
2107
- cache_type="query",
2108
- ),
2109
- )
 
2110
 
2111
  return response
2112
 
 
859
  .strip()
860
  )
861
 
862
+ if hashing_kv.global_config.get("enable_llm_cache"):
863
+ # Save to cache
864
+ await save_to_cache(
865
+ hashing_kv,
866
+ CacheData(
867
+ args_hash=args_hash,
868
+ content=response,
869
+ prompt=query,
870
+ quantized=quantized,
871
+ min_val=min_val,
872
+ max_val=max_val,
873
+ mode=query_param.mode,
874
+ cache_type="query",
875
+ ),
876
+ )
877
+
878
  return response
879
 
880
 
 
991
  "high_level_keywords": hl_keywords,
992
  "low_level_keywords": ll_keywords,
993
  }
994
+ if hashing_kv.global_config.get("enable_llm_cache"):
995
+ await save_to_cache(
996
+ hashing_kv,
997
+ CacheData(
998
+ args_hash=args_hash,
999
+ content=json.dumps(cache_data),
1000
+ prompt=text,
1001
+ quantized=quantized,
1002
+ min_val=min_val,
1003
+ max_val=max_val,
1004
+ mode=param.mode,
1005
+ cache_type="keywords",
1006
+ ),
1007
+ )
1008
+
1009
  return hl_keywords, ll_keywords
1010
 
1011
 
 
1209
  .strip()
1210
  )
1211
 
1212
+ if hashing_kv.global_config.get("enable_llm_cache"):
1213
+ # 7. Save cache - Only cache after collecting complete response
1214
+ await save_to_cache(
1215
+ hashing_kv,
1216
+ CacheData(
1217
+ args_hash=args_hash,
1218
+ content=response,
1219
+ prompt=query,
1220
+ quantized=quantized,
1221
+ min_val=min_val,
1222
+ max_val=max_val,
1223
+ mode="mix",
1224
+ cache_type="query",
1225
+ ),
1226
+ )
1227
 
1228
  return response
1229
 
 
1950
  .strip()
1951
  )
1952
 
1953
+ if hashing_kv.global_config.get("enable_llm_cache"):
1954
+ # Save to cache
1955
+ await save_to_cache(
1956
+ hashing_kv,
1957
+ CacheData(
1958
+ args_hash=args_hash,
1959
+ content=response,
1960
+ prompt=query,
1961
+ quantized=quantized,
1962
+ min_val=min_val,
1963
+ max_val=max_val,
1964
+ mode=query_param.mode,
1965
+ cache_type="query",
1966
+ ),
1967
+ )
1968
 
1969
  return response
1970
 
 
2099
  .strip()
2100
  )
2101
 
2102
+ if hashing_kv.global_config.get("enable_llm_cache"):
2103
+ # 7. Save cache - 只有在收集完整响应后才缓存
2104
+ await save_to_cache(
2105
+ hashing_kv,
2106
+ CacheData(
2107
+ args_hash=args_hash,
2108
+ content=response,
2109
+ prompt=query,
2110
+ quantized=quantized,
2111
+ min_val=min_val,
2112
+ max_val=max_val,
2113
+ mode=query_param.mode,
2114
+ cache_type="query",
2115
+ ),
2116
+ )
2117
 
2118
  return response
2119
 
lightrag/utils.py CHANGED
@@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
731
  hashing_kv: The key-value storage for caching
732
  cache_data: The cache data to save
733
  """
734
- if not hashing_kv.global_config.get("enable_llm_cache"):
735
- return
736
-
737
  # Skip if storage is None or content is a streaming response
738
  if hashing_kv is None or not cache_data.content:
739
  return
@@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
776
  "original_prompt": cache_data.prompt,
777
  }
778
 
 
 
779
  # Only upsert if there's actual new content
780
  await hashing_kv.upsert({cache_data.mode: mode_cache})
781
 
@@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache(
1314
 
1315
  res: str = await use_llm_func(input_text, **kwargs)
1316
 
1317
- # Save to cache
1318
- logger.info(f" == LLM cache == saving {arg_hash}")
1319
- await save_to_cache(
1320
- llm_response_cache,
1321
- CacheData(
1322
- args_hash=arg_hash,
1323
- content=res,
1324
- prompt=_prompt,
1325
- cache_type=cache_type,
1326
- ),
1327
- )
1328
  return res
1329
 
1330
  # When cache is disabled, directly call LLM
 
731
  hashing_kv: The key-value storage for caching
732
  cache_data: The cache data to save
733
  """
 
 
 
734
  # Skip if storage is None or content is a streaming response
735
  if hashing_kv is None or not cache_data.content:
736
  return
 
773
  "original_prompt": cache_data.prompt,
774
  }
775
 
776
+ logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}")
777
+
778
  # Only upsert if there's actual new content
779
  await hashing_kv.upsert({cache_data.mode: mode_cache})
780
 
 
1313
 
1314
  res: str = await use_llm_func(input_text, **kwargs)
1315
 
1316
+ if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
1317
+ await save_to_cache(
1318
+ llm_response_cache,
1319
+ CacheData(
1320
+ args_hash=arg_hash,
1321
+ content=res,
1322
+ prompt=_prompt,
1323
+ cache_type=cache_type,
1324
+ ),
1325
+ )
1326
+
1327
  return res
1328
 
1329
  # When cache is disabled, directly call LLM