mixtral-8x22b / split.py
endomorphosis's picture
Upload 2 files
8b4c074 verified
raw
history blame
1.24 kB
import os
import math
import json
CHUNK_SIZE = 2 * 1024**3 # 40GB
CHUNK_PATHS_FILE = "chunk_paths.json"
def split(filepath, chunk_size=CHUNK_SIZE):
basename = os.path.basename(filepath)
dirname = os.path.dirname(filepath)
extension = basename.split(".")[-1]
filename_no_ext = basename.split(".")[-2]
file_size = os.path.getsize(filepath)
num_chunks = math.ceil(file_size / chunk_size)
digit_count = len(str(num_chunks))
chunk_paths = []
for i in range(1, num_chunks+1):
start = (i-1) * chunk_size
chunk_filename = f"{filename_no_ext}-{str(i).zfill(digit_count)}-of-{str(num_chunks).zfill(digit_count)}.{extension}"
split_path = os.path.join(dirname, chunk_filename)
with open(filepath, "rb") as f_in:
f_in.seek(start)
chunk = f_in.read(chunk_size)
with open(split_path, "wb") as f_out:
f_out.write(chunk)
chunk_paths.append(split_path)
with open(CHUNK_PATHS_FILE, 'w') as f:
json.dump(chunk_paths, f)
return chunk_paths
main_filepath = "consolidated.safetensors" # File to be split
chunk_paths = split(main_filepath)