glenn-jocher commited on
Commit
194f168
1 Parent(s): ea7e78c

Multi-threaded image caching

Browse files
data/scripts/get_voc.sh CHANGED
@@ -25,7 +25,7 @@ end=$(date +%s)
25
  runtime=$((end - start))
26
  echo "Completed in" $runtime "seconds"
27
 
28
- echo "Spliting dataset..."
29
  python3 - "$@" <<END
30
  import xml.etree.ElementTree as ET
31
  import pickle
 
25
  runtime=$((end - start))
26
  echo "Completed in" $runtime "seconds"
27
 
28
+ echo "Splitting dataset..."
29
  python3 - "$@" <<END
30
  import xml.etree.ElementTree as ET
31
  import pickle
requirements.txt CHANGED
@@ -20,7 +20,6 @@ tqdm>=4.41.0
20
  # pycocotools>=2.0
21
 
22
  # export --------------------------------------
23
- # packaging # for coremltools
24
  # coremltools==4.0
25
  # onnx>=1.7.0
26
  # scikit-learn==0.19.2 # for coreml quantization
 
20
  # pycocotools>=2.0
21
 
22
  # export --------------------------------------
 
23
  # coremltools==4.0
24
  # onnx>=1.7.0
25
  # scikit-learn==0.19.2 # for coreml quantization
utils/datasets.py CHANGED
@@ -1,13 +1,15 @@
1
  import glob
 
2
  import os
3
  import random
4
  import shutil
5
  import time
 
 
6
  from pathlib import Path
7
  from threading import Thread
8
 
9
  import cv2
10
- import math
11
  import numpy as np
12
  import torch
13
  from PIL import Image, ExifTags
@@ -474,10 +476,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing
474
  self.imgs = [None] * n
475
  if cache_images:
476
  gb = 0 # Gigabytes of cached images
477
- pbar = tqdm(range(len(self.img_files)), desc='Caching images')
478
  self.img_hw0, self.img_hw = [None] * n, [None] * n
479
- for i in pbar: # max 10k images
480
- self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
 
 
481
  gb += self.imgs[i].nbytes
482
  pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
483
 
 
1
  import glob
2
+ import math
3
  import os
4
  import random
5
  import shutil
6
  import time
7
+ from itertools import repeat
8
+ from multiprocessing.pool import ThreadPool
9
  from pathlib import Path
10
  from threading import Thread
11
 
12
  import cv2
 
13
  import numpy as np
14
  import torch
15
  from PIL import Image, ExifTags
 
476
  self.imgs = [None] * n
477
  if cache_images:
478
  gb = 0 # Gigabytes of cached images
 
479
  self.img_hw0, self.img_hw = [None] * n, [None] * n
480
+ results = ThreadPool(8).imap_unordered(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
481
+ pbar = tqdm(enumerate(results), total=n)
482
+ for i, x in pbar:
483
+ self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
484
  gb += self.imgs[i].nbytes
485
  pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
486