yangdx commited on
Commit
1eb4cbb
·
1 Parent(s): 0009218

Sync modifications from main branch

Browse files
lightrag/api/routers/document_routes.py CHANGED
@@ -117,6 +117,37 @@ class DocumentManager:
117
  ".docx",
118
  ".pptx",
119
  ".xlsx",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  ),
121
  ):
122
  self.input_dir = Path(input_dir)
@@ -170,7 +201,41 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
170
 
171
  # Process based on file type
172
  match ext:
173
- case ".txt" | ".md":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  content = file.decode("utf-8")
175
  case ".pdf":
176
  if not pm.is_installed("pypdf2"):
 
117
  ".docx",
118
  ".pptx",
119
  ".xlsx",
120
+ ".rtf", # Rich Text Format
121
+ ".odt", # OpenDocument Text
122
+ ".tex", # LaTeX
123
+ ".epub", # Electronic Publication
124
+ ".html", # HyperText Markup Language
125
+ ".htm", # HyperText Markup Language
126
+ ".csv", # Comma-Separated Values
127
+ ".json", # JavaScript Object Notation
128
+ ".xml", # eXtensible Markup Language
129
+ ".yaml", # YAML Ain't Markup Language
130
+ ".yml", # YAML
131
+ ".log", # Log files
132
+ ".conf", # Configuration files
133
+ ".ini", # Initialization files
134
+ ".properties", # Java properties files
135
+ ".sql", # SQL scripts
136
+ ".bat", # Batch files
137
+ ".sh", # Shell scripts
138
+ ".c", # C source code
139
+ ".cpp", # C++ source code
140
+ ".py", # Python source code
141
+ ".java", # Java source code
142
+ ".js", # JavaScript source code
143
+ ".ts", # TypeScript source code
144
+ ".swift", # Swift source code
145
+ ".go", # Go source code
146
+ ".rb", # Ruby source code
147
+ ".php", # PHP source code
148
+ ".css", # Cascading Style Sheets
149
+ ".scss", # Sassy CSS
150
+ ".less", # LESS CSS
151
  ),
152
  ):
153
  self.input_dir = Path(input_dir)
 
201
 
202
  # Process based on file type
203
  match ext:
204
+ case (
205
+ ".txt"
206
+ | ".md"
207
+ | ".html"
208
+ | ".htm"
209
+ | ".tex"
210
+ | ".json"
211
+ | ".xml"
212
+ | ".yaml"
213
+ | ".yml"
214
+ | ".rtf"
215
+ | ".odt"
216
+ | ".epub"
217
+ | ".csv"
218
+ | ".log"
219
+ | ".conf"
220
+ | ".ini"
221
+ | ".properties"
222
+ | ".sql"
223
+ | ".bat"
224
+ | ".sh"
225
+ | ".c"
226
+ | ".cpp"
227
+ | ".py"
228
+ | ".java"
229
+ | ".js"
230
+ | ".ts"
231
+ | ".swift"
232
+ | ".go"
233
+ | ".rb"
234
+ | ".php"
235
+ | ".css"
236
+ | ".scss"
237
+ | ".less"
238
+ ):
239
  content = file.decode("utf-8")
240
  case ".pdf":
241
  if not pm.is_installed("pypdf2"):