File size: 533 Bytes
417c52d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
from file_converter import convert_pdfs_to_txt
from chunker import chunk_files_in_directory
def preprocess():
raw_dir = "data/raw"
converted_dir = "data/converted"
chunked_dir = "data/chunked"
print("[INFO] Preprocessing PDF files...")
convert_pdfs_to_txt(raw_dir, converted_dir)
chunk_files_in_directory(converted_dir, chunked_dir)
print(f"[INFO] Preprocessing complete. Converted files saved in [{converted_dir}], chunked files saved in [{chunked_dir}].")
if __name__ == "__main__":
preprocess() |