YchKhan commited on
Commit
dff91aa
1 Parent(s): 209c0f9

Update split_files_to_excel.py

Browse files
Files changed (1) hide show
  1. split_files_to_excel.py +2 -2
split_files_to_excel.py CHANGED
@@ -470,7 +470,7 @@ def split_doc_in_chunks(input_folder, base_folders):
470
  print("Treatment of pdf file", path)
471
  raw_chunks = split_pdf(path, input_folder)
472
  for raw_chunk in raw_chunks:
473
- #print(f"BASE zzzzz LIST : {base_folders} = i = {i}")
474
  raw_chunk.metadata["Base Folder"] = base_folders[i]
475
  chunks = group_chunks_by_section(raw_chunks)
476
  print(f"Document splitted in {len(chunks)} chunks")
@@ -585,7 +585,7 @@ def split_in_df(files):
585
  else:
586
  processed_files.append(file_path)
587
  base_folders.append("")
588
- #print(f"BASE FOLDERS LIST : {base_folders}")
589
  print("Finished processing zip files\nSplitting files into chunks...")
590
  documents = split_doc_in_chunks(processed_files, base_folders)
591
  re_docs = resplit_by_end_of_sentence(documents, 1000, 100, 1500)
 
470
  print("Treatment of pdf file", path)
471
  raw_chunks = split_pdf(path, input_folder)
472
  for raw_chunk in raw_chunks:
473
+ print(f"BASE zzzzz LIST : {base_folders} = i = {i}")
474
  raw_chunk.metadata["Base Folder"] = base_folders[i]
475
  chunks = group_chunks_by_section(raw_chunks)
476
  print(f"Document splitted in {len(chunks)} chunks")
 
585
  else:
586
  processed_files.append(file_path)
587
  base_folders.append("")
588
+ print(f"BASE FOLDERS LIST : {base_folders}")
589
  print("Finished processing zip files\nSplitting files into chunks...")
590
  documents = split_doc_in_chunks(processed_files, base_folders)
591
  re_docs = resplit_by_end_of_sentence(documents, 1000, 100, 1500)