Benjamin L commited on
Commit
7861013
·
1 Parent(s): 5400ea0

Adding file_source.s as optional attribute to text.s requests

Browse files
lightrag/api/routers/document_routes.py CHANGED
@@ -84,22 +84,33 @@ class InsertTextRequest(BaseModel):
84
 
85
  Attributes:
86
  text: The text content to be inserted into the RAG system
 
87
  """
88
 
89
  text: str = Field(
90
  min_length=1,
91
  description="The text to insert",
92
  )
93
-
 
 
 
 
94
  @field_validator("text", mode="after")
95
  @classmethod
96
  def strip_after(cls, text: str) -> str:
97
  return text.strip()
98
-
 
 
 
 
 
99
  class Config:
100
  json_schema_extra = {
101
  "example": {
102
- "text": "This is a sample text to be inserted into the RAG system."
 
103
  }
104
  }
105
 
@@ -109,24 +120,37 @@ class InsertTextsRequest(BaseModel):
109
 
110
  Attributes:
111
  texts: List of text contents to be inserted into the RAG system
 
112
  """
113
 
114
  texts: list[str] = Field(
115
  min_length=1,
116
  description="The texts to insert",
117
  )
118
-
 
 
 
 
 
119
  @field_validator("texts", mode="after")
120
  @classmethod
121
  def strip_after(cls, texts: list[str]) -> list[str]:
122
  return [text.strip() for text in texts]
123
-
 
 
 
 
124
  class Config:
125
  json_schema_extra = {
126
  "example": {
127
  "texts": [
128
  "This is the first text to be inserted.",
129
  "This is the second text to be inserted.",
 
 
 
130
  ]
131
  }
132
  }
@@ -656,16 +680,20 @@ async def pipeline_index_files(rag: LightRAG, file_paths: List[Path]):
656
  logger.error(traceback.format_exc())
657
 
658
 
659
- async def pipeline_index_texts(rag: LightRAG, texts: List[str]):
660
  """Index a list of texts
661
 
662
  Args:
663
  rag: LightRAG instance
664
  texts: The texts to index
 
665
  """
666
  if not texts:
667
  return
668
- await rag.apipeline_enqueue_documents(texts)
 
 
 
669
  await rag.apipeline_process_enqueue_documents()
670
 
671
 
@@ -816,7 +844,7 @@ def create_document_routes(
816
  HTTPException: If an error occurs during text processing (500).
817
  """
818
  try:
819
- background_tasks.add_task(pipeline_index_texts, rag, [request.text])
820
  return InsertResponse(
821
  status="success",
822
  message="Text successfully received. Processing will continue in background.",
@@ -851,7 +879,7 @@ def create_document_routes(
851
  HTTPException: If an error occurs during text processing (500).
852
  """
853
  try:
854
- background_tasks.add_task(pipeline_index_texts, rag, request.texts)
855
  return InsertResponse(
856
  status="success",
857
  message="Text successfully received. Processing will continue in background.",
 
84
 
85
  Attributes:
86
  text: The text content to be inserted into the RAG system
87
+ file_source: Source of the text (optional)
88
  """
89
 
90
  text: str = Field(
91
  min_length=1,
92
  description="The text to insert",
93
  )
94
+ file_source: str = Field(
95
+ default=None,
96
+ min_length=0,
97
+ description="File Source"
98
+ )
99
  @field_validator("text", mode="after")
100
  @classmethod
101
  def strip_after(cls, text: str) -> str:
102
  return text.strip()
103
+
104
+ @field_validator("file_source", mode="after")
105
+ @classmethod
106
+ def strip_after(cls, file_source: str) -> str:
107
+ return file_source.strip()
108
+
109
  class Config:
110
  json_schema_extra = {
111
  "example": {
112
+ "text": "This is a sample text to be inserted into the RAG system.",
113
+ "file_source": "Source of the text (optional)"
114
  }
115
  }
116
 
 
120
 
121
  Attributes:
122
  texts: List of text contents to be inserted into the RAG system
123
+ file_sources: Sources of the texts (optional)
124
  """
125
 
126
  texts: list[str] = Field(
127
  min_length=1,
128
  description="The texts to insert",
129
  )
130
+ file_sources: list[str] = Field(
131
+ default=None,
132
+ min_length=0,
133
+ description="Sources of the texts"
134
+ )
135
+
136
  @field_validator("texts", mode="after")
137
  @classmethod
138
  def strip_after(cls, texts: list[str]) -> list[str]:
139
  return [text.strip() for text in texts]
140
+ @field_validator("file_sources", mode="after")
141
+ @classmethod
142
+ def strip_after(cls, file_sources: list[str]) -> list[str]:
143
+ return [file_source.strip() for file_source in file_sources]
144
+
145
  class Config:
146
  json_schema_extra = {
147
  "example": {
148
  "texts": [
149
  "This is the first text to be inserted.",
150
  "This is the second text to be inserted.",
151
+ ],
152
+ "file_sources": [
153
+ "First file source (optional)",
154
  ]
155
  }
156
  }
 
680
  logger.error(traceback.format_exc())
681
 
682
 
683
+ async def pipeline_index_texts(rag: LightRAG, texts: List[str],file_sources: List[str]=None):
684
  """Index a list of texts
685
 
686
  Args:
687
  rag: LightRAG instance
688
  texts: The texts to index
689
+ file_sources: Sources of the texts
690
  """
691
  if not texts:
692
  return
693
+ if file_sources is not None:
694
+ if len(file_sources) != 0 and len(file_sources) != len(texts):
695
+ [file_sources.append("unknown_source") for _ in range(len(file_sources),len(texts))]
696
+ await rag.apipeline_enqueue_documents(input=texts,file_paths=file_sources)
697
  await rag.apipeline_process_enqueue_documents()
698
 
699
 
 
844
  HTTPException: If an error occurs during text processing (500).
845
  """
846
  try:
847
+ background_tasks.add_task(pipeline_index_texts, rag, [request.text],file_sources=[request.file_source])
848
  return InsertResponse(
849
  status="success",
850
  message="Text successfully received. Processing will continue in background.",
 
879
  HTTPException: If an error occurs during text processing (500).
880
  """
881
  try:
882
+ background_tasks.add_task(pipeline_index_texts, rag, request.texts,file_sources=request.file_sources)
883
  return InsertResponse(
884
  status="success",
885
  message="Text successfully received. Processing will continue in background.",