File size: 1,147 Bytes
0140c70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import shutil

def find_all_files(path, suffix=".jpg"):
    target_files = []
    for cur_dir, _, files in os.walk(path, followlinks=True):
        for f in files:
            if f.endswith(suffix):
                target_files.append(os.path.join(cur_dir, f))
    print(f'find {len(target_files)} files...')
    return target_files

all_files = find_all_files('archive')
os.makedirs("archive_split", exist_ok=True)
os.makedirs("archive_split/train", exist_ok=True)
os.makedirs("archive_split/valid", exist_ok=True)
os.makedirs("archive_split/test", exist_ok=True)

import random
random.seed(2023)
random.shuffle(all_files)
train = all_files[:8000]
valid = all_files[8000:8000+500]
test = all_files[8000+500:8000+500+1500]

print("building train")
for file in train:
    shutil.move(file, os.path.join("archive_split/train", file.split("/")[-1]))
print("building valid")
for file in valid:
    shutil.move(file, os.path.join("archive_split/valid", file.split("/")[-1]))
print("building test")
for file in test:
    shutil.move(file, os.path.join("archive_split/test", file.split("/")[-1]))
print("done")