alexkueck commited on
Commit
35124cf
1 Parent(s): b6feb0c

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +9 -5
utils.py CHANGED
@@ -282,7 +282,7 @@ def access_pdf(self, filename):
282
  return temp_path
283
 
284
  #besseren directory Loader als CustomLoader definieren, der den inhalt des dokuemnts, die seitenzahlen, die überschriften und die pfadezu den dokumenten extrahieren
285
- def create_directory_loader(file_type, directory_path):
286
  loaders = {
287
  '.pdf': load_pdf_with_metadata,
288
  '.word': load_word_with_metadata,
@@ -307,9 +307,10 @@ def create_directory_loader(file_type, directory_path):
307
 
308
  return CustomLoader(directory_path, file_type, loaders[file_type])
309
  """
310
- def __init__(self, directory_path, file_type, loader_func):
311
- self.directory_path = directory_path
312
  self.file_type = file_type
 
313
  self.loader_func = loader_func
314
 
315
  def load(self):
@@ -437,8 +438,11 @@ def document_loading_splitting():
437
 
438
 
439
  # Erstellen von DirectoryLoader für jeden Dateityp
440
- pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
441
- word_loader = create_directory_loader('.word', CHROMA_WORD)
 
 
 
442
 
443
 
444
 
 
282
  return temp_path
283
 
284
  #besseren directory Loader als CustomLoader definieren, der den inhalt des dokuemnts, die seitenzahlen, die überschriften und die pfadezu den dokumenten extrahieren
285
+ def create_custom_loader(file_type, file_list): #create_directory_loader(file_type, directory_path):
286
  loaders = {
287
  '.pdf': load_pdf_with_metadata,
288
  '.word': load_word_with_metadata,
 
307
 
308
  return CustomLoader(directory_path, file_type, loaders[file_type])
309
  """
310
+
311
+ def __init__(self, file_type, file_list, loader_func):
312
  self.file_type = file_type
313
+ self.file_list = file_list
314
  self.loader_func = loader_func
315
 
316
  def load(self):
 
438
 
439
 
440
  # Erstellen von DirectoryLoader für jeden Dateityp
441
+ # pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
442
+ #word_loader = create_directory_loader('.word', CHROMA_WORD)
443
+
444
+ pdf_loader = create_custom_loader('.pdf', pdf_files)
445
+ word_loader = create_custom_loader('.docx', word_files)
446
 
447
 
448