glenn-jocher commited on
Commit
a64a4c8
1 Parent(s): 1492632

Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` (#4919)

Browse files

* Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()`

* Cleanup

Files changed (3) hide show
  1. utils/datasets.py +3 -2
  2. utils/downloads.py +3 -2
  3. utils/general.py +10 -8
utils/datasets.py CHANGED
@@ -15,6 +15,7 @@ from itertools import repeat
15
  from multiprocessing.pool import ThreadPool, Pool
16
  from pathlib import Path
17
  from threading import Thread
 
18
 
19
  import cv2
20
  import numpy as np
@@ -928,8 +929,8 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
928
  # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
929
  if str(path).endswith('.zip'): # path is data.zip
930
  assert Path(path).is_file(), f'Error unzipping {path}, file not found'
931
- assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
932
- dir = path.with_suffix('') # dataset directory
933
  return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
934
  else: # path is data.yaml
935
  return False, None, path
 
15
  from multiprocessing.pool import ThreadPool, Pool
16
  from pathlib import Path
17
  from threading import Thread
18
+ from zipfile import ZipFile
19
 
20
  import cv2
21
  import numpy as np
 
929
  # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
930
  if str(path).endswith('.zip'): # path is data.zip
931
  assert Path(path).is_file(), f'Error unzipping {path}, file not found'
932
+ ZipFile(path).extractall(path=path.parent) # unzip
933
+ dir = path.with_suffix('') # dataset directory == zip name
934
  return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
935
  else: # path is data.yaml
936
  return False, None, path
utils/downloads.py CHANGED
@@ -9,6 +9,7 @@ import subprocess
9
  import time
10
  import urllib
11
  from pathlib import Path
 
12
 
13
  import requests
14
  import torch
@@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
104
  # Unzip if archive
105
  if file.suffix == '.zip':
106
  print('unzipping... ', end='')
107
- os.system(f'unzip -q {file}') # unzip
108
- file.unlink() # remove zip to free space
109
 
110
  print(f'Done ({time.time() - t:.1f}s)')
111
  return r
 
9
  import time
10
  import urllib
11
  from pathlib import Path
12
+ from zipfile import ZipFile
13
 
14
  import requests
15
  import torch
 
105
  # Unzip if archive
106
  if file.suffix == '.zip':
107
  print('unzipping... ', end='')
108
+ ZipFile(file).extractall(path=file.parent) # unzip
109
+ file.unlink() # remove zip
110
 
111
  print(f'Done ({time.time() - t:.1f}s)')
112
  return r
utils/general.py CHANGED
@@ -18,6 +18,7 @@ from itertools import repeat
18
  from multiprocessing.pool import ThreadPool
19
  from pathlib import Path
20
  from subprocess import check_output
 
21
 
22
  import cv2
23
  import numpy as np
@@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True):
353
  if s and autodownload: # download script
354
  if s.startswith('http') and s.endswith('.zip'): # URL
355
  f = Path(s).name # filename
356
- print(f'Downloading {s} ...')
357
  torch.hub.download_url_to_file(s, f)
358
  root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
359
  Path(root).mkdir(parents=True, exist_ok=True) # create root
360
- r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
 
 
361
  elif s.startswith('bash '): # bash script
362
  print(f'Running {s} ...')
363
  r = os.system(s)
364
  else: # python script
365
  r = exec(s, {'yaml': data}) # return None
366
- print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
367
  else:
368
  raise Exception('Dataset not found.')
369
 
@@ -393,12 +396,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
393
  if unzip and f.suffix in ('.zip', '.gz'):
394
  print(f'Unzipping {f}...')
395
  if f.suffix == '.zip':
396
- s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite
397
  elif f.suffix == '.gz':
398
- s = f'tar xfz {f} --directory {f.parent}' # unzip
399
- if delete: # delete zip file after unzip
400
- s += f' && rm {f}'
401
- os.system(s)
402
 
403
  dir = Path(dir)
404
  dir.mkdir(parents=True, exist_ok=True) # make directory
 
18
  from multiprocessing.pool import ThreadPool
19
  from pathlib import Path
20
  from subprocess import check_output
21
+ from zipfile import ZipFile
22
 
23
  import cv2
24
  import numpy as np
 
354
  if s and autodownload: # download script
355
  if s.startswith('http') and s.endswith('.zip'): # URL
356
  f = Path(s).name # filename
357
+ print(f'Downloading {s} to {f}...')
358
  torch.hub.download_url_to_file(s, f)
359
  root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
360
  Path(root).mkdir(parents=True, exist_ok=True) # create root
361
+ ZipFile(f).extractall(path=root) # unzip
362
+ Path(f).unlink() # remove zip
363
+ r = None # success
364
  elif s.startswith('bash '): # bash script
365
  print(f'Running {s} ...')
366
  r = os.system(s)
367
  else: # python script
368
  r = exec(s, {'yaml': data}) # return None
369
+ print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}")
370
  else:
371
  raise Exception('Dataset not found.')
372
 
 
396
  if unzip and f.suffix in ('.zip', '.gz'):
397
  print(f'Unzipping {f}...')
398
  if f.suffix == '.zip':
399
+ ZipFile(f).extractall(path=dir) # unzip
400
  elif f.suffix == '.gz':
401
+ os.system(f'tar xfz {f} --directory {f.parent}') # unzip
402
+ if delete:
403
+ f.unlink() # remove zip
 
404
 
405
  dir = Path(dir)
406
  dir.mkdir(parents=True, exist_ok=True) # make directory