yangdx commited on
Commit
e05be8f
·
1 Parent(s): 02bb176

Files are now processed in batches in auto scan

Browse files
lightrag/api/routers/document_routes.py CHANGED
@@ -472,11 +472,30 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
472
  total_files = len(new_files)
473
  logger.info(f"Found {total_files} new files to index.")
474
 
475
- if new_files:
476
- await pipeline_index_files(rag, new_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
  except Exception as e:
479
  logger.error(f"Error during scanning process: {str(e)}")
 
480
 
481
 
482
  def create_document_routes(
 
472
  total_files = len(new_files)
473
  logger.info(f"Found {total_files} new files to index.")
474
 
475
+ if not new_files:
476
+ return
477
+
478
+ # Get MAX_PARALLEL_INSERT from global_args
479
+ max_parallel = global_args["max_parallel_insert"]
480
+ # Calculate batch size as 2 * MAX_PARALLEL_INSERT
481
+ batch_size = 2 * max_parallel
482
+
483
+ # Process files in batches
484
+ for i in range(0, total_files, batch_size):
485
+ batch_files = new_files[i:i+batch_size]
486
+ batch_num = i // batch_size + 1
487
+ total_batches = (total_files + batch_size - 1) // batch_size
488
+
489
+ logger.info(f"Processing batch {batch_num}/{total_batches} with {len(batch_files)} files")
490
+ await pipeline_index_files(rag, batch_files)
491
+
492
+ # Log progress
493
+ processed = min(i + batch_size, total_files)
494
+ logger.info(f"Processed {processed}/{total_files} files ({processed/total_files*100:.1f}%)")
495
 
496
  except Exception as e:
497
  logger.error(f"Error during scanning process: {str(e)}")
498
+ logger.error(traceback.format_exc())
499
 
500
 
501
  def create_document_routes(
lightrag/api/utils_api.py CHANGED
@@ -365,6 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
365
  "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
366
  )
367
 
 
 
 
368
  # Handle openai-ollama special case
369
  if args.llm_binding == "openai-ollama":
370
  args.llm_binding = "openai"
 
365
  "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
366
  )
367
 
368
+ # Get MAX_PARALLEL_INSERT from environment
369
+ global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int)
370
+
371
  # Handle openai-ollama special case
372
  if args.llm_binding == "openai-ollama":
373
  args.llm_binding = "openai"