Benjamin L
commited on
Commit
·
7861013
1
Parent(s):
5400ea0
Adding file_source.s as optional attribute to text.s requests
Browse files
lightrag/api/routers/document_routes.py
CHANGED
@@ -84,22 +84,33 @@ class InsertTextRequest(BaseModel):
|
|
84 |
|
85 |
Attributes:
|
86 |
text: The text content to be inserted into the RAG system
|
|
|
87 |
"""
|
88 |
|
89 |
text: str = Field(
|
90 |
min_length=1,
|
91 |
description="The text to insert",
|
92 |
)
|
93 |
-
|
|
|
|
|
|
|
|
|
94 |
@field_validator("text", mode="after")
|
95 |
@classmethod
|
96 |
def strip_after(cls, text: str) -> str:
|
97 |
return text.strip()
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
99 |
class Config:
|
100 |
json_schema_extra = {
|
101 |
"example": {
|
102 |
-
"text": "This is a sample text to be inserted into the RAG system."
|
|
|
103 |
}
|
104 |
}
|
105 |
|
@@ -109,24 +120,37 @@ class InsertTextsRequest(BaseModel):
|
|
109 |
|
110 |
Attributes:
|
111 |
texts: List of text contents to be inserted into the RAG system
|
|
|
112 |
"""
|
113 |
|
114 |
texts: list[str] = Field(
|
115 |
min_length=1,
|
116 |
description="The texts to insert",
|
117 |
)
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
119 |
@field_validator("texts", mode="after")
|
120 |
@classmethod
|
121 |
def strip_after(cls, texts: list[str]) -> list[str]:
|
122 |
return [text.strip() for text in texts]
|
123 |
-
|
|
|
|
|
|
|
|
|
124 |
class Config:
|
125 |
json_schema_extra = {
|
126 |
"example": {
|
127 |
"texts": [
|
128 |
"This is the first text to be inserted.",
|
129 |
"This is the second text to be inserted.",
|
|
|
|
|
|
|
130 |
]
|
131 |
}
|
132 |
}
|
@@ -656,16 +680,20 @@ async def pipeline_index_files(rag: LightRAG, file_paths: List[Path]):
|
|
656 |
logger.error(traceback.format_exc())
|
657 |
|
658 |
|
659 |
-
async def pipeline_index_texts(rag: LightRAG, texts: List[str]):
|
660 |
"""Index a list of texts
|
661 |
|
662 |
Args:
|
663 |
rag: LightRAG instance
|
664 |
texts: The texts to index
|
|
|
665 |
"""
|
666 |
if not texts:
|
667 |
return
|
668 |
-
|
|
|
|
|
|
|
669 |
await rag.apipeline_process_enqueue_documents()
|
670 |
|
671 |
|
@@ -816,7 +844,7 @@ def create_document_routes(
|
|
816 |
HTTPException: If an error occurs during text processing (500).
|
817 |
"""
|
818 |
try:
|
819 |
-
background_tasks.add_task(pipeline_index_texts, rag, [request.text])
|
820 |
return InsertResponse(
|
821 |
status="success",
|
822 |
message="Text successfully received. Processing will continue in background.",
|
@@ -851,7 +879,7 @@ def create_document_routes(
|
|
851 |
HTTPException: If an error occurs during text processing (500).
|
852 |
"""
|
853 |
try:
|
854 |
-
background_tasks.add_task(pipeline_index_texts, rag, request.texts)
|
855 |
return InsertResponse(
|
856 |
status="success",
|
857 |
message="Text successfully received. Processing will continue in background.",
|
|
|
84 |
|
85 |
Attributes:
|
86 |
text: The text content to be inserted into the RAG system
|
87 |
+
file_source: Source of the text (optional)
|
88 |
"""
|
89 |
|
90 |
text: str = Field(
|
91 |
min_length=1,
|
92 |
description="The text to insert",
|
93 |
)
|
94 |
+
file_source: str = Field(
|
95 |
+
default=None,
|
96 |
+
min_length=0,
|
97 |
+
description="File Source"
|
98 |
+
)
|
99 |
@field_validator("text", mode="after")
|
100 |
@classmethod
|
101 |
def strip_after(cls, text: str) -> str:
|
102 |
return text.strip()
|
103 |
+
|
104 |
+
@field_validator("file_source", mode="after")
|
105 |
+
@classmethod
|
106 |
+
def strip_after(cls, file_source: str) -> str:
|
107 |
+
return file_source.strip()
|
108 |
+
|
109 |
class Config:
|
110 |
json_schema_extra = {
|
111 |
"example": {
|
112 |
+
"text": "This is a sample text to be inserted into the RAG system.",
|
113 |
+
"file_source": "Source of the text (optional)"
|
114 |
}
|
115 |
}
|
116 |
|
|
|
120 |
|
121 |
Attributes:
|
122 |
texts: List of text contents to be inserted into the RAG system
|
123 |
+
file_sources: Sources of the texts (optional)
|
124 |
"""
|
125 |
|
126 |
texts: list[str] = Field(
|
127 |
min_length=1,
|
128 |
description="The texts to insert",
|
129 |
)
|
130 |
+
file_sources: list[str] = Field(
|
131 |
+
default=None,
|
132 |
+
min_length=0,
|
133 |
+
description="Sources of the texts"
|
134 |
+
)
|
135 |
+
|
136 |
@field_validator("texts", mode="after")
|
137 |
@classmethod
|
138 |
def strip_after(cls, texts: list[str]) -> list[str]:
|
139 |
return [text.strip() for text in texts]
|
140 |
+
@field_validator("file_sources", mode="after")
|
141 |
+
@classmethod
|
142 |
+
def strip_after(cls, file_sources: list[str]) -> list[str]:
|
143 |
+
return [file_source.strip() for file_source in file_sources]
|
144 |
+
|
145 |
class Config:
|
146 |
json_schema_extra = {
|
147 |
"example": {
|
148 |
"texts": [
|
149 |
"This is the first text to be inserted.",
|
150 |
"This is the second text to be inserted.",
|
151 |
+
],
|
152 |
+
"file_sources": [
|
153 |
+
"First file source (optional)",
|
154 |
]
|
155 |
}
|
156 |
}
|
|
|
680 |
logger.error(traceback.format_exc())
|
681 |
|
682 |
|
683 |
+
async def pipeline_index_texts(rag: LightRAG, texts: List[str],file_sources: List[str]=None):
|
684 |
"""Index a list of texts
|
685 |
|
686 |
Args:
|
687 |
rag: LightRAG instance
|
688 |
texts: The texts to index
|
689 |
+
file_sources: Sources of the texts
|
690 |
"""
|
691 |
if not texts:
|
692 |
return
|
693 |
+
if file_sources is not None:
|
694 |
+
if len(file_sources) != 0 and len(file_sources) != len(texts):
|
695 |
+
[file_sources.append("unknown_source") for _ in range(len(file_sources),len(texts))]
|
696 |
+
await rag.apipeline_enqueue_documents(input=texts,file_paths=file_sources)
|
697 |
await rag.apipeline_process_enqueue_documents()
|
698 |
|
699 |
|
|
|
844 |
HTTPException: If an error occurs during text processing (500).
|
845 |
"""
|
846 |
try:
|
847 |
+
background_tasks.add_task(pipeline_index_texts, rag, [request.text],file_sources=[request.file_source])
|
848 |
return InsertResponse(
|
849 |
status="success",
|
850 |
message="Text successfully received. Processing will continue in background.",
|
|
|
879 |
HTTPException: If an error occurs during text processing (500).
|
880 |
"""
|
881 |
try:
|
882 |
+
background_tasks.add_task(pipeline_index_texts, rag, request.texts,file_sources=request.file_sources)
|
883 |
return InsertResponse(
|
884 |
status="success",
|
885 |
message="Text successfully received. Processing will continue in background.",
|