Feat: Add document deletion by ID API endpoint
Browse files- New DELETE endpoint for document removal
- Implements doc_id-based deletion
- Handles pipeline status during operation
- Includes proper error handling
- Updates pipeline status messages
lightrag/api/routers/document_routes.py
CHANGED
@@ -12,7 +12,14 @@ import pipmaster as pm
|
|
12 |
from datetime import datetime, timezone
|
13 |
from pathlib import Path
|
14 |
from typing import Dict, List, Optional, Any, Literal
|
15 |
-
from fastapi import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
from pydantic import BaseModel, Field, field_validator
|
17 |
|
18 |
from lightrag import LightRAG
|
@@ -252,6 +259,10 @@ Attributes:
|
|
252 |
"""
|
253 |
|
254 |
|
|
|
|
|
|
|
|
|
255 |
class DocStatusResponse(BaseModel):
|
256 |
id: str = Field(description="Document identifier")
|
257 |
content_summary: str = Field(description="Summary of document content")
|
@@ -1318,6 +1329,100 @@ def create_document_routes(
|
|
1318 |
logger.error(traceback.format_exc())
|
1319 |
raise HTTPException(status_code=500, detail=str(e))
|
1320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1321 |
@router.post(
|
1322 |
"/clear_cache",
|
1323 |
response_model=ClearCacheResponse,
|
@@ -1372,3 +1477,4 @@ def create_document_routes(
|
|
1372 |
raise HTTPException(status_code=500, detail=str(e))
|
1373 |
|
1374 |
return router
|
|
|
|
12 |
from datetime import datetime, timezone
|
13 |
from pathlib import Path
|
14 |
from typing import Dict, List, Optional, Any, Literal
|
15 |
+
from fastapi import (
|
16 |
+
APIRouter,
|
17 |
+
BackgroundTasks,
|
18 |
+
Depends,
|
19 |
+
File,
|
20 |
+
HTTPException,
|
21 |
+
UploadFile,
|
22 |
+
)
|
23 |
from pydantic import BaseModel, Field, field_validator
|
24 |
|
25 |
from lightrag import LightRAG
|
|
|
259 |
"""
|
260 |
|
261 |
|
262 |
+
class DeleteDocRequest(BaseModel):
|
263 |
+
doc_id: str = Field(..., description="The ID of the document to delete.")
|
264 |
+
|
265 |
+
|
266 |
class DocStatusResponse(BaseModel):
|
267 |
id: str = Field(description="Document identifier")
|
268 |
content_summary: str = Field(description="Summary of document content")
|
|
|
1329 |
logger.error(traceback.format_exc())
|
1330 |
raise HTTPException(status_code=500, detail=str(e))
|
1331 |
|
1332 |
+
class DeleteDocByIdResponse(BaseModel):
|
1333 |
+
"""Response model for single document deletion operation."""
|
1334 |
+
|
1335 |
+
status: Literal["success", "fail", "not_found", "busy"] = Field(
|
1336 |
+
description="Status of the deletion operation"
|
1337 |
+
)
|
1338 |
+
message: str = Field(description="Message describing the operation result")
|
1339 |
+
doc_id: Optional[str] = Field(default=None, description="The ID of the document.")
|
1340 |
+
|
1341 |
+
@router.delete(
|
1342 |
+
"/delete_by_doc_id",
|
1343 |
+
response_model=DeleteDocByIdResponse,
|
1344 |
+
dependencies=[Depends(combined_auth)],
|
1345 |
+
summary="Delete a document and all its associated data by its ID.",
|
1346 |
+
)
|
1347 |
+
async def delete_by_doc_id(
|
1348 |
+
delete_request: DeleteDocRequest,
|
1349 |
+
) -> DeleteDocByIdResponse:
|
1350 |
+
"""
|
1351 |
+
Deletes a specific document and all its associated data, including its status,
|
1352 |
+
text chunks, vector embeddings, and any related graph data.
|
1353 |
+
|
1354 |
+
This operation is irreversible and will interact with the pipeline status.
|
1355 |
+
|
1356 |
+
Args:
|
1357 |
+
delete_request (DeleteDocRequest): The request containing the document ID.
|
1358 |
+
|
1359 |
+
Returns:
|
1360 |
+
DeleteDocByIdResponse: The result of the deletion operation.
|
1361 |
+
- status="success": The document was successfully deleted.
|
1362 |
+
- status="not_found": The document with the specified ID was not found.
|
1363 |
+
- status="fail": The deletion operation failed.
|
1364 |
+
- status="busy": The pipeline is busy with another operation.
|
1365 |
+
|
1366 |
+
Raises:
|
1367 |
+
HTTPException:
|
1368 |
+
- 500: If an unexpected internal error occurs.
|
1369 |
+
"""
|
1370 |
+
from lightrag.kg.shared_storage import (
|
1371 |
+
get_namespace_data,
|
1372 |
+
get_pipeline_status_lock,
|
1373 |
+
)
|
1374 |
+
|
1375 |
+
doc_id = delete_request.doc_id
|
1376 |
+
pipeline_status = await get_namespace_data("pipeline_status")
|
1377 |
+
pipeline_status_lock = get_pipeline_status_lock()
|
1378 |
+
|
1379 |
+
async with pipeline_status_lock:
|
1380 |
+
if pipeline_status.get("busy", False):
|
1381 |
+
return DeleteDocByIdResponse(
|
1382 |
+
status="busy",
|
1383 |
+
message="Cannot delete document while pipeline is busy",
|
1384 |
+
doc_id=doc_id,
|
1385 |
+
)
|
1386 |
+
pipeline_status.update(
|
1387 |
+
{
|
1388 |
+
"busy": True,
|
1389 |
+
"job_name": f"Deleting Document: {doc_id}",
|
1390 |
+
"job_start": datetime.now().isoformat(),
|
1391 |
+
"latest_message": "Starting document deletion process",
|
1392 |
+
}
|
1393 |
+
)
|
1394 |
+
# Use slice assignment to clear the list in place
|
1395 |
+
pipeline_status["history_messages"][:] = [
|
1396 |
+
f"Starting deletion for doc_id: {doc_id}"
|
1397 |
+
]
|
1398 |
+
|
1399 |
+
try:
|
1400 |
+
result = await rag.adelete_by_doc_id(doc_id)
|
1401 |
+
response_data = {
|
1402 |
+
"doc_id": result.doc_id,
|
1403 |
+
"message": result.message,
|
1404 |
+
"status": result.status,
|
1405 |
+
}
|
1406 |
+
if "history_messages" in pipeline_status:
|
1407 |
+
pipeline_status["history_messages"].append(result.message)
|
1408 |
+
return DeleteDocByIdResponse(**response_data)
|
1409 |
+
|
1410 |
+
except Exception as e:
|
1411 |
+
error_msg = f"Error deleting document {doc_id}: {str(e)}"
|
1412 |
+
logger.error(error_msg)
|
1413 |
+
logger.error(traceback.format_exc())
|
1414 |
+
if "history_messages" in pipeline_status:
|
1415 |
+
pipeline_status["history_messages"].append(error_msg)
|
1416 |
+
# Re-raise as HTTPException for consistent error handling by FastAPI
|
1417 |
+
raise HTTPException(status_code=500, detail=error_msg)
|
1418 |
+
finally:
|
1419 |
+
async with pipeline_status_lock:
|
1420 |
+
pipeline_status["busy"] = False
|
1421 |
+
completion_msg = f"Document deletion process for {doc_id} completed."
|
1422 |
+
pipeline_status["latest_message"] = completion_msg
|
1423 |
+
if "history_messages" in pipeline_status:
|
1424 |
+
pipeline_status["history_messages"].append(completion_msg)
|
1425 |
+
|
1426 |
@router.post(
|
1427 |
"/clear_cache",
|
1428 |
response_model=ClearCacheResponse,
|
|
|
1477 |
raise HTTPException(status_code=500, detail=str(e))
|
1478 |
|
1479 |
return router
|
1480 |
+
|