yangdx
commited on
Commit
·
e05be8f
1
Parent(s):
02bb176
Files are now processed in batches in auto scan
Browse files
lightrag/api/routers/document_routes.py
CHANGED
@@ -472,11 +472,30 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
|
|
472 |
total_files = len(new_files)
|
473 |
logger.info(f"Found {total_files} new files to index.")
|
474 |
|
475 |
-
if new_files:
|
476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
except Exception as e:
|
479 |
logger.error(f"Error during scanning process: {str(e)}")
|
|
|
480 |
|
481 |
|
482 |
def create_document_routes(
|
|
|
472 |
total_files = len(new_files)
|
473 |
logger.info(f"Found {total_files} new files to index.")
|
474 |
|
475 |
+
if not new_files:
|
476 |
+
return
|
477 |
+
|
478 |
+
# Get MAX_PARALLEL_INSERT from global_args
|
479 |
+
max_parallel = global_args["max_parallel_insert"]
|
480 |
+
# Calculate batch size as 2 * MAX_PARALLEL_INSERT
|
481 |
+
batch_size = 2 * max_parallel
|
482 |
+
|
483 |
+
# Process files in batches
|
484 |
+
for i in range(0, total_files, batch_size):
|
485 |
+
batch_files = new_files[i:i+batch_size]
|
486 |
+
batch_num = i // batch_size + 1
|
487 |
+
total_batches = (total_files + batch_size - 1) // batch_size
|
488 |
+
|
489 |
+
logger.info(f"Processing batch {batch_num}/{total_batches} with {len(batch_files)} files")
|
490 |
+
await pipeline_index_files(rag, batch_files)
|
491 |
+
|
492 |
+
# Log progress
|
493 |
+
processed = min(i + batch_size, total_files)
|
494 |
+
logger.info(f"Processed {processed}/{total_files} files ({processed/total_files*100:.1f}%)")
|
495 |
|
496 |
except Exception as e:
|
497 |
logger.error(f"Error during scanning process: {str(e)}")
|
498 |
+
logger.error(traceback.format_exc())
|
499 |
|
500 |
|
501 |
def create_document_routes(
|
lightrag/api/utils_api.py
CHANGED
@@ -365,6 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
|
|
365 |
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
|
366 |
)
|
367 |
|
|
|
|
|
|
|
368 |
# Handle openai-ollama special case
|
369 |
if args.llm_binding == "openai-ollama":
|
370 |
args.llm_binding = "openai"
|
|
|
365 |
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
|
366 |
)
|
367 |
|
368 |
+
# Get MAX_PARALLEL_INSERT from environment
|
369 |
+
global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int)
|
370 |
+
|
371 |
# Handle openai-ollama special case
|
372 |
if args.llm_binding == "openai-ollama":
|
373 |
args.llm_binding = "openai"
|