Spaces:

xiang-wuu
/

yolov5

Runtime error

App Files Files Community

glenn-jocher commited on May 24, 2022

Commit

c215878

•

1 Parent(s): 27911dc

YOLOv5 Apple Metal Performance Shader (MPS) support (#7878)

Browse files

* Apple Metal Performance Shader (MPS) device support

Following https://pytorch.org/blog/introducing-accelerated-pytorch-training-on-mac/

Should work with Apple M1 devices with PyTorch nightly installed with command `--device mps`. Usage examples:
```bash
python train.py --device mps
python detect.py --device mps
python val.py --device mps
```

* Update device strategy to fix MPS issue

Files changed (5) hide show

export.py +1 -1
models/common.py +1 -1
models/experimental.py +3 -3
models/tf.py +1 -1
utils/torch_utils.py +5 -2

export.py CHANGED Viewed

@@ -486,7 +486,7 @@ def run(
     if half:
         assert device.type != 'cpu' or coreml or xml, '--half only compatible with GPU export, i.e. use --device 0'
         assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'
-    model = attempt_load(weights, map_location=device, inplace=True, fuse=True)  # load FP32 model
     nc, names = model.nc, model.names  # number of classes, class names
     # Checks

     if half:
         assert device.type != 'cpu' or coreml or xml, '--half only compatible with GPU export, i.e. use --device 0'
         assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'
+    model = attempt_load(weights, device=device, inplace=True, fuse=True)  # load FP32 model
     nc, names = model.nc, model.names  # number of classes, class names
     # Checks

models/common.py CHANGED Viewed

@@ -331,7 +331,7 @@ class DetectMultiBackend(nn.Module):
                 names = yaml.safe_load(f)['names']
         if pt:  # PyTorch
-            model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
             stride = max(int(model.stride.max()), 32)  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()

                 names = yaml.safe_load(f)['names']
         if pt:  # PyTorch
+            model = attempt_load(weights if isinstance(weights, list) else w, device=device)
             stride = max(int(model.stride.max()), 32)  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()

models/experimental.py CHANGED Viewed

@@ -71,14 +71,14 @@ class Ensemble(nn.ModuleList):
         return y, None  # inference, train output
-def attempt_load(weights, map_location=None, inplace=True, fuse=True):
     from models.yolo import Detect, Model
     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
     model = Ensemble()
     for w in weights if isinstance(weights, list) else [weights]:
-        ckpt = torch.load(attempt_download(w), map_location=map_location)  # load
-        ckpt = (ckpt.get('ema') or ckpt['model']).float()  # FP32 model
         model.append(ckpt.fuse().eval() if fuse else ckpt.eval())  # fused or un-fused model in eval mode
     # Compatibility updates

         return y, None  # inference, train output
+def attempt_load(weights, device=None, inplace=True, fuse=True):
     from models.yolo import Detect, Model
     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
     model = Ensemble()
     for w in weights if isinstance(weights, list) else [weights]:
+        ckpt = torch.load(attempt_download(w))
+        ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float()  # FP32 model
         model.append(ckpt.fuse().eval() if fuse else ckpt.eval())  # fused or un-fused model in eval mode
     # Compatibility updates

models/tf.py CHANGED Viewed

@@ -536,7 +536,7 @@ def run(
 ):
     # PyTorch model
     im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
-    model = attempt_load(weights, map_location=torch.device('cpu'), inplace=True, fuse=False)
     _ = model(im)  # inference
     model.info()

 ):
     # PyTorch model
     im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
+    model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
     _ = model(im)  # inference
     model.info()

utils/torch_utils.py CHANGED Viewed

@@ -54,7 +54,8 @@ def select_device(device='', batch_size=0, newline=True):
     s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
     device = str(device).strip().lower().replace('cuda:', '').replace('none', '')  # to string, 'cuda:0' to '0'
     cpu = device == 'cpu'
-    if cpu:
         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
     elif device:  # non-cpu device requested
         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable - must be before assert is_available()
@@ -71,13 +72,15 @@ def select_device(device='', batch_size=0, newline=True):
         for i, d in enumerate(devices):
             p = torch.cuda.get_device_properties(i)
             s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n"  # bytes to MB
     else:
         s += 'CPU\n'
     if not newline:
         s = s.rstrip()
     LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
-    return torch.device('cuda:0' if cuda else 'cpu')
 def time_sync():

     s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
     device = str(device).strip().lower().replace('cuda:', '').replace('none', '')  # to string, 'cuda:0' to '0'
     cpu = device == 'cpu'
+    mps = device == 'mps'  # Apple Metal Performance Shaders (MPS)
+    if cpu or mps:
         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
     elif device:  # non-cpu device requested
         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable - must be before assert is_available()
         for i, d in enumerate(devices):
             p = torch.cuda.get_device_properties(i)
             s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n"  # bytes to MB
+    elif mps:
+        s += 'MPS\n'
     else:
         s += 'CPU\n'
     if not newline:
         s = s.rstrip()
     LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
+    return torch.device('cuda:0' if cuda else 'mps' if mps else 'cpu')
 def time_sync():