Spaces:

rm-lht
/

lightrag

Configuration error

yangdx commited on Mar 21

Commit

e05be8f

1 Parent(s): 02bb176

Files are now processed in batches in auto scan

Files changed (2) hide show

lightrag/api/routers/document_routes.py CHANGED Viewed

@@ -472,11 +472,30 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
         total_files = len(new_files)
         logger.info(f"Found {total_files} new files to index.")
-        if new_files:
-            await pipeline_index_files(rag, new_files)
     except Exception as e:
         logger.error(f"Error during scanning process: {str(e)}")
 def create_document_routes(

         total_files = len(new_files)
         logger.info(f"Found {total_files} new files to index.")
+        if not new_files:
+            return
+        # Get MAX_PARALLEL_INSERT from global_args
+        max_parallel = global_args["max_parallel_insert"]
+        # Calculate batch size as 2 * MAX_PARALLEL_INSERT
+        batch_size = 2 * max_parallel
+        # Process files in batches
+        for i in range(0, total_files, batch_size):
+            batch_files = new_files[i:i+batch_size]
+            batch_num = i // batch_size + 1
+            total_batches = (total_files + batch_size - 1) // batch_size
+            logger.info(f"Processing batch {batch_num}/{total_batches} with {len(batch_files)} files")
+            await pipeline_index_files(rag, batch_files)
+            # Log progress
+            processed = min(i + batch_size, total_files)
+            logger.info(f"Processed {processed}/{total_files} files ({processed/total_files*100:.1f}%)")
     except Exception as e:
         logger.error(f"Error during scanning process: {str(e)}")
+        logger.error(traceback.format_exc())
 def create_document_routes(

lightrag/api/utils_api.py CHANGED Viewed

@@ -365,6 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
         "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
     )
     # Handle openai-ollama special case
     if args.llm_binding == "openai-ollama":
         args.llm_binding = "openai"

         "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
     )
+    # Get MAX_PARALLEL_INSERT from environment
+    global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int)
     # Handle openai-ollama special case
     if args.llm_binding == "openai-ollama":
         args.llm_binding = "openai"