#!/usr/bin/env python # pylint: disable=cell-var-from-loop """ Test Torch Dynamo functionality and backends """ import json import warnings import numpy as np import torch from torchvision.models import resnet18 print('torch:', torch.__version__) try: # must be imported explicitly or namespace is not found import torch._dynamo as dynamo # pylint: disable=ungrouped-imports except Exception as err: print('torch without dynamo support', err) N_ITERS = 20 torch._dynamo.config.verbose=True # pylint: disable=protected-access warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons # torch.set_float32_matmul_precision('high') # enable to test in fp32 def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) start.record() result = fn() end.record() torch.cuda.synchronize() return result, start.elapsed_time(end) def generate_data(b): return ( torch.randn(b, 3, 128, 128).to(torch.float32).cuda(), torch.randint(1000, (b,)).cuda(), ) def init_model(): return resnet18().to(torch.float32).cuda() def evaluate(mod, val): return mod(val) if __name__ == '__main__': # first pass, dynamo is going to be slower as it compiles model = init_model() inp = generate_data(16)[0] # repeat test results = {} times = [] print('eager initial eval:', timed(lambda: evaluate(model, inp))[1]) for _i in range(N_ITERS): inp = generate_data(16)[0] _res, time = timed(lambda: evaluate(model, inp)) # noqa: B023 times.append(time) results['default'] = np.median(times) print('dynamo available backends:', dynamo.list_backends()) for backend in dynamo.list_backends(): try: # required before changing backends torch._dynamo.reset() # pylint: disable=protected-access eval_dyn = dynamo.optimize(backend)(evaluate) print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023 times = [] for _i in range(N_ITERS): inp = generate_data(16)[0] _res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023 times.append(time) results[backend] = np.median(times) except Exception as err: lines = str(err).split('\n') print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long results[backend] = 'error' # print stats print(json.dumps(results, indent = 4)) """ Reference: Training & Inference backends: dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd Inference-only backends: dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU """