File size: 533 Bytes
417c52d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from file_converter import convert_pdfs_to_txt
from chunker import chunk_files_in_directory

def preprocess():
    raw_dir = "data/raw"
    converted_dir = "data/converted"
    chunked_dir = "data/chunked"

    print("[INFO] Preprocessing PDF files...")
    convert_pdfs_to_txt(raw_dir, converted_dir)
    chunk_files_in_directory(converted_dir, chunked_dir)

    print(f"[INFO] Preprocessing complete. Converted files saved in [{converted_dir}], chunked files saved in [{chunked_dir}].")

if __name__ == "__main__":
    preprocess()