gzdaniel commited on
Commit
79cddc1
·
1 Parent(s): d89aefe

Feat: Add document deletion by ID API endpoint

Browse files

- New DELETE endpoint for document removal
- Implements doc_id-based deletion
- Handles pipeline status during operation
- Includes proper error handling
- Updates pipeline status messages

lightrag/api/routers/document_routes.py CHANGED
@@ -12,7 +12,14 @@ import pipmaster as pm
12
  from datetime import datetime, timezone
13
  from pathlib import Path
14
  from typing import Dict, List, Optional, Any, Literal
15
- from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
 
 
 
 
 
 
 
16
  from pydantic import BaseModel, Field, field_validator
17
 
18
  from lightrag import LightRAG
@@ -252,6 +259,10 @@ Attributes:
252
  """
253
 
254
 
 
 
 
 
255
  class DocStatusResponse(BaseModel):
256
  id: str = Field(description="Document identifier")
257
  content_summary: str = Field(description="Summary of document content")
@@ -1318,6 +1329,100 @@ def create_document_routes(
1318
  logger.error(traceback.format_exc())
1319
  raise HTTPException(status_code=500, detail=str(e))
1320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1321
  @router.post(
1322
  "/clear_cache",
1323
  response_model=ClearCacheResponse,
@@ -1372,3 +1477,4 @@ def create_document_routes(
1372
  raise HTTPException(status_code=500, detail=str(e))
1373
 
1374
  return router
 
 
12
  from datetime import datetime, timezone
13
  from pathlib import Path
14
  from typing import Dict, List, Optional, Any, Literal
15
+ from fastapi import (
16
+ APIRouter,
17
+ BackgroundTasks,
18
+ Depends,
19
+ File,
20
+ HTTPException,
21
+ UploadFile,
22
+ )
23
  from pydantic import BaseModel, Field, field_validator
24
 
25
  from lightrag import LightRAG
 
259
  """
260
 
261
 
262
+ class DeleteDocRequest(BaseModel):
263
+ doc_id: str = Field(..., description="The ID of the document to delete.")
264
+
265
+
266
  class DocStatusResponse(BaseModel):
267
  id: str = Field(description="Document identifier")
268
  content_summary: str = Field(description="Summary of document content")
 
1329
  logger.error(traceback.format_exc())
1330
  raise HTTPException(status_code=500, detail=str(e))
1331
 
1332
+ class DeleteDocByIdResponse(BaseModel):
1333
+ """Response model for single document deletion operation."""
1334
+
1335
+ status: Literal["success", "fail", "not_found", "busy"] = Field(
1336
+ description="Status of the deletion operation"
1337
+ )
1338
+ message: str = Field(description="Message describing the operation result")
1339
+ doc_id: Optional[str] = Field(default=None, description="The ID of the document.")
1340
+
1341
+ @router.delete(
1342
+ "/delete_by_doc_id",
1343
+ response_model=DeleteDocByIdResponse,
1344
+ dependencies=[Depends(combined_auth)],
1345
+ summary="Delete a document and all its associated data by its ID.",
1346
+ )
1347
+ async def delete_by_doc_id(
1348
+ delete_request: DeleteDocRequest,
1349
+ ) -> DeleteDocByIdResponse:
1350
+ """
1351
+ Deletes a specific document and all its associated data, including its status,
1352
+ text chunks, vector embeddings, and any related graph data.
1353
+
1354
+ This operation is irreversible and will interact with the pipeline status.
1355
+
1356
+ Args:
1357
+ delete_request (DeleteDocRequest): The request containing the document ID.
1358
+
1359
+ Returns:
1360
+ DeleteDocByIdResponse: The result of the deletion operation.
1361
+ - status="success": The document was successfully deleted.
1362
+ - status="not_found": The document with the specified ID was not found.
1363
+ - status="fail": The deletion operation failed.
1364
+ - status="busy": The pipeline is busy with another operation.
1365
+
1366
+ Raises:
1367
+ HTTPException:
1368
+ - 500: If an unexpected internal error occurs.
1369
+ """
1370
+ from lightrag.kg.shared_storage import (
1371
+ get_namespace_data,
1372
+ get_pipeline_status_lock,
1373
+ )
1374
+
1375
+ doc_id = delete_request.doc_id
1376
+ pipeline_status = await get_namespace_data("pipeline_status")
1377
+ pipeline_status_lock = get_pipeline_status_lock()
1378
+
1379
+ async with pipeline_status_lock:
1380
+ if pipeline_status.get("busy", False):
1381
+ return DeleteDocByIdResponse(
1382
+ status="busy",
1383
+ message="Cannot delete document while pipeline is busy",
1384
+ doc_id=doc_id,
1385
+ )
1386
+ pipeline_status.update(
1387
+ {
1388
+ "busy": True,
1389
+ "job_name": f"Deleting Document: {doc_id}",
1390
+ "job_start": datetime.now().isoformat(),
1391
+ "latest_message": "Starting document deletion process",
1392
+ }
1393
+ )
1394
+ # Use slice assignment to clear the list in place
1395
+ pipeline_status["history_messages"][:] = [
1396
+ f"Starting deletion for doc_id: {doc_id}"
1397
+ ]
1398
+
1399
+ try:
1400
+ result = await rag.adelete_by_doc_id(doc_id)
1401
+ response_data = {
1402
+ "doc_id": result.doc_id,
1403
+ "message": result.message,
1404
+ "status": result.status,
1405
+ }
1406
+ if "history_messages" in pipeline_status:
1407
+ pipeline_status["history_messages"].append(result.message)
1408
+ return DeleteDocByIdResponse(**response_data)
1409
+
1410
+ except Exception as e:
1411
+ error_msg = f"Error deleting document {doc_id}: {str(e)}"
1412
+ logger.error(error_msg)
1413
+ logger.error(traceback.format_exc())
1414
+ if "history_messages" in pipeline_status:
1415
+ pipeline_status["history_messages"].append(error_msg)
1416
+ # Re-raise as HTTPException for consistent error handling by FastAPI
1417
+ raise HTTPException(status_code=500, detail=error_msg)
1418
+ finally:
1419
+ async with pipeline_status_lock:
1420
+ pipeline_status["busy"] = False
1421
+ completion_msg = f"Document deletion process for {doc_id} completed."
1422
+ pipeline_status["latest_message"] = completion_msg
1423
+ if "history_messages" in pipeline_status:
1424
+ pipeline_status["history_messages"].append(completion_msg)
1425
+
1426
  @router.post(
1427
  "/clear_cache",
1428
  response_model=ClearCacheResponse,
 
1477
  raise HTTPException(status_code=500, detail=str(e))
1478
 
1479
  return router
1480
+