File size: 1,687 Bytes
476daa5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from shutil import move, copy2
import os
OR_DIR = '/mnt/EncryptedData1/Users/javier/ext_datasets/COMET_dataset/OSLO_COMET_CT/Formatted_128x128x128'
val_split = '/mnt/EncryptedData1/Users/javier/ext_datasets/COMET_dataset/OSLO_COMET_CT/For_validation.txt'
test_split = '/mnt/EncryptedData1/Users/javier/ext_datasets/COMET_dataset/OSLO_COMET_CT/For_testing.txt'
# Create out dirs
os.makedirs(os.path.join(OR_DIR, 'train'), exist_ok=True)
os.makedirs(os.path.join(OR_DIR, 'validation'), exist_ok=True)
os.makedirs(os.path.join(OR_DIR, 'test'), exist_ok=True)
# Copy all to train and then split into validation and test
list_of_files = [os.path.join(OR_DIR, f) for f in os.listdir(OR_DIR) if f.endswith('.h5')]
list_of_files.sort()
for f in list_of_files:
copy2(f, os.path.join(OR_DIR, 'train'))
# Get the indices for the validation and test subsets
with open(val_split, 'r') as f:
val_idcs = f.readlines()[0]
val_idcs = [int(e) for e in val_idcs.split(',')]
with open(test_split, 'r') as f:
test_indcs = f.readlines()[0]
test_indcs = [int(e) for e in test_indcs.split(',')]
# move the files from train to validation and test
for i in val_idcs:
move(os.path.join(OR_DIR, 'train', '{:05d}_CT.h5'.format(i)), os.path.join(OR_DIR, 'validation'))
print('Done moving the validation subset.')
for i in test_indcs:
move(os.path.join(OR_DIR, 'train', '{:05d}_CT.h5'.format(i)), os.path.join(OR_DIR, 'test'))
print('Done moving the validation subset.')
print('Done splitting the data')
print('Training samples: '+str(len(os.listdir(os.path.join(OR_DIR, 'train')))))
print('Validation samples: '+str(len(val_idcs)))
print('Test samples: '+str(len(test_indcs)))
|