Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

yangdx commited on Mar 9

Commit

d1215f8

1 Parent(s): 2e31f26

Fix linting

Browse files

Files changed (6) hide show

lightrag/api/utils_api.py +2 -4
lightrag/kg/json_doc_status_impl.py +6 -2
lightrag/kg/json_kv_impl.py +22 -11
lightrag/kg/shared_storage.py +2 -0
lightrag/lightrag.py +3 -1
lightrag/utils.py +9 -7

lightrag/api/utils_api.py CHANGED Viewed

@@ -359,12 +359,10 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
     args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
     # Inject LLM cache configuration
     args.enable_llm_cache_for_extract = get_env_value(
-        "ENABLE_LLM_CACHE_FOR_EXTRACT",
-        False,
-        bool
     )
     ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name

     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
     args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
     # Inject LLM cache configuration
     args.enable_llm_cache_for_extract = get_env_value(
+        "ENABLE_LLM_CACHE_FOR_EXTRACT", False, bool
     )
     ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name

lightrag/kg/json_doc_status_impl.py CHANGED Viewed

@@ -96,11 +96,15 @@ class JsonDocStatusStorage(DocStatusStorage):
     async def index_done_callback(self) -> None:
         async with self._storage_lock:
-            if (is_multiprocess and self.storage_updated.value) or (not is_multiprocess and self.storage_updated):
                 data_dict = (
                     dict(self._data) if hasattr(self._data, "_getvalue") else self._data
                 )
-                logger.info(f"Process {os.getpid()} doc status writting {len(data_dict)} records to {self.namespace}")
                 write_json(data_dict, self._file_name)
                 await clear_all_update_flags(self.namespace)

     async def index_done_callback(self) -> None:
         async with self._storage_lock:
+            if (is_multiprocess and self.storage_updated.value) or (
+                not is_multiprocess and self.storage_updated
+            ):
                 data_dict = (
                     dict(self._data) if hasattr(self._data, "_getvalue") else self._data
                 )
+                logger.info(
+                    f"Process {os.getpid()} doc status writting {len(data_dict)} records to {self.namespace}"
+                )
                 write_json(data_dict, self._file_name)
                 await clear_all_update_flags(self.namespace)

lightrag/kg/json_kv_impl.py CHANGED Viewed

@@ -44,21 +44,28 @@ class JsonKVStorage(BaseKVStorage):
                 loaded_data = load_json(self._file_name) or {}
                 async with self._storage_lock:
                     self._data.update(loaded_data)
                     # Calculate data count based on namespace
                     if self.namespace.endswith("cache"):
                         # For cache namespaces, sum the cache entries across all cache types
-                        data_count = sum(len(first_level_dict) for first_level_dict in loaded_data.values()
-                                        if isinstance(first_level_dict, dict))
                     else:
                         # For non-cache namespaces, use the original count method
                         data_count = len(loaded_data)
-                    logger.info(f"Process {os.getpid()} KV load {self.namespace} with {data_count} records")
     async def index_done_callback(self) -> None:
         async with self._storage_lock:
-            if (is_multiprocess and self.storage_updated.value) or (not is_multiprocess and self.storage_updated):
                 data_dict = (
                     dict(self._data) if hasattr(self._data, "_getvalue") else self._data
                 )
@@ -66,17 +73,21 @@ class JsonKVStorage(BaseKVStorage):
                 # Calculate data count based on namespace
                 if self.namespace.endswith("cache"):
                     # # For cache namespaces, sum the cache entries across all cache types
-                    data_count = sum(len(first_level_dict) for first_level_dict in data_dict.values()
-                                    if isinstance(first_level_dict, dict))
                 else:
                     # For non-cache namespaces, use the original count method
                     data_count = len(data_dict)
-                logger.info(f"Process {os.getpid()} KV writting {data_count} records to {self.namespace}")
                 write_json(data_dict, self._file_name)
                 await clear_all_update_flags(self.namespace)
     async def get_all(self) -> dict[str, Any]:
         """Get all data from storage

                 loaded_data = load_json(self._file_name) or {}
                 async with self._storage_lock:
                     self._data.update(loaded_data)
                     # Calculate data count based on namespace
                     if self.namespace.endswith("cache"):
                         # For cache namespaces, sum the cache entries across all cache types
+                        data_count = sum(
+                            len(first_level_dict)
+                            for first_level_dict in loaded_data.values()
+                            if isinstance(first_level_dict, dict)
+                        )
                     else:
                         # For non-cache namespaces, use the original count method
                         data_count = len(loaded_data)
+                    logger.info(
+                        f"Process {os.getpid()} KV load {self.namespace} with {data_count} records"
+                    )
     async def index_done_callback(self) -> None:
         async with self._storage_lock:
+            if (is_multiprocess and self.storage_updated.value) or (
+                not is_multiprocess and self.storage_updated
+            ):
                 data_dict = (
                     dict(self._data) if hasattr(self._data, "_getvalue") else self._data
                 )
                 # Calculate data count based on namespace
                 if self.namespace.endswith("cache"):
                     # # For cache namespaces, sum the cache entries across all cache types
+                    data_count = sum(
+                        len(first_level_dict)
+                        for first_level_dict in data_dict.values()
+                        if isinstance(first_level_dict, dict)
+                    )
                 else:
                     # For non-cache namespaces, use the original count method
                     data_count = len(data_dict)
+                logger.info(
+                    f"Process {os.getpid()} KV writting {data_count} records to {self.namespace}"
+                )
                 write_json(data_dict, self._file_name)
                 await clear_all_update_flags(self.namespace)
     async def get_all(self) -> dict[str, Any]:
         """Get all data from storage

lightrag/kg/shared_storage.py CHANGED Viewed

@@ -344,6 +344,7 @@ async def set_all_update_flags(namespace: str):
             else:
                 _update_flags[namespace][i] = True
 async def clear_all_update_flags(namespace: str):
     """Clear all update flag of namespace indicating all workers need to reload data from files"""
     global _update_flags
@@ -360,6 +361,7 @@ async def clear_all_update_flags(namespace: str):
             else:
                 _update_flags[namespace][i] = False
 async def get_all_update_flags_status() -> Dict[str, list]:
     """
     Get update flags status for all namespaces.

             else:
                 _update_flags[namespace][i] = True
 async def clear_all_update_flags(namespace: str):
     """Clear all update flag of namespace indicating all workers need to reload data from files"""
     global _update_flags
             else:
                 _update_flags[namespace][i] = False
 async def get_all_update_flags_status() -> Dict[str, list]:
     """
     Get update flags status for all namespaces.

lightrag/lightrag.py CHANGED Viewed

@@ -354,7 +354,9 @@ class LightRAG:
             namespace=make_namespace(
                 self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
             ),
-            global_config=asdict(self),  # Add global_config to ensure cache works properly
             embedding_func=self.embedding_func,
         )

             namespace=make_namespace(
                 self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
             ),
+            global_config=asdict(
+                self
+            ),  # Add global_config to ensure cache works properly
             embedding_func=self.embedding_func,
         )

lightrag/utils.py CHANGED Viewed

@@ -706,7 +706,7 @@ class CacheData:
 async def save_to_cache(hashing_kv, cache_data: CacheData):
     """Save data to cache, with improved handling for streaming responses and duplicate content.
     Args:
         hashing_kv: The key-value storage for caching
         cache_data: The cache data to save
@@ -714,12 +714,12 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
     # Skip if storage is None or content is a streaming response
     if hashing_kv is None or not cache_data.content:
         return
     # If content is a streaming response, don't cache it
     if hasattr(cache_data.content, "__aiter__"):
         logger.debug("Streaming response detected, skipping cache")
         return
     # Get existing cache data
     if exists_func(hashing_kv, "get_by_mode_and_id"):
         mode_cache = (
@@ -728,14 +728,16 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
         )
     else:
         mode_cache = await hashing_kv.get_by_id(cache_data.mode) or {}
     # Check if we already have identical content cached
     if cache_data.args_hash in mode_cache:
         existing_content = mode_cache[cache_data.args_hash].get("return")
         if existing_content == cache_data.content:
-            logger.info(f"Cache content unchanged for {cache_data.args_hash}, skipping update")
             return
     # Update cache with new content
     mode_cache[cache_data.args_hash] = {
         "return": cache_data.content,
@@ -750,7 +752,7 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
         "embedding_max": cache_data.max_val,
         "original_prompt": cache_data.prompt,
     }
     # Only upsert if there's actual new content
     await hashing_kv.upsert({cache_data.mode: mode_cache})

 async def save_to_cache(hashing_kv, cache_data: CacheData):
     """Save data to cache, with improved handling for streaming responses and duplicate content.
     Args:
         hashing_kv: The key-value storage for caching
         cache_data: The cache data to save
     # Skip if storage is None or content is a streaming response
     if hashing_kv is None or not cache_data.content:
         return
     # If content is a streaming response, don't cache it
     if hasattr(cache_data.content, "__aiter__"):
         logger.debug("Streaming response detected, skipping cache")
         return
     # Get existing cache data
     if exists_func(hashing_kv, "get_by_mode_and_id"):
         mode_cache = (
         )
     else:
         mode_cache = await hashing_kv.get_by_id(cache_data.mode) or {}
     # Check if we already have identical content cached
     if cache_data.args_hash in mode_cache:
         existing_content = mode_cache[cache_data.args_hash].get("return")
         if existing_content == cache_data.content:
+            logger.info(
+                f"Cache content unchanged for {cache_data.args_hash}, skipping update"
+            )
             return
     # Update cache with new content
     mode_cache[cache_data.args_hash] = {
         "return": cache_data.content,
         "embedding_max": cache_data.max_val,
         "original_prompt": cache_data.prompt,
     }
     # Only upsert if there's actual new content
     await hashing_kv.upsert({cache_data.mode: mode_cache})