test / cli /torch-compile.py
bilegentile's picture
Upload folder using huggingface_hub
c19ca42 verified
#!/usr/bin/env python
# pylint: disable=cell-var-from-loop
"""
Test Torch Dynamo functionality and backends
"""
import json
import warnings
import numpy as np
import torch
from torchvision.models import resnet18
print('torch:', torch.__version__)
try:
# must be imported explicitly or namespace is not found
import torch._dynamo as dynamo # pylint: disable=ungrouped-imports
except Exception as err:
print('torch without dynamo support', err)
N_ITERS = 20
torch._dynamo.config.verbose=True # pylint: disable=protected-access
warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons
# torch.set_float32_matmul_precision('high') # enable to test in fp32
def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
result = fn()
end.record()
torch.cuda.synchronize()
return result, start.elapsed_time(end)
def generate_data(b):
return (
torch.randn(b, 3, 128, 128).to(torch.float32).cuda(),
torch.randint(1000, (b,)).cuda(),
)
def init_model():
return resnet18().to(torch.float32).cuda()
def evaluate(mod, val):
return mod(val)
if __name__ == '__main__':
# first pass, dynamo is going to be slower as it compiles
model = init_model()
inp = generate_data(16)[0]
# repeat test
results = {}
times = []
print('eager initial eval:', timed(lambda: evaluate(model, inp))[1])
for _i in range(N_ITERS):
inp = generate_data(16)[0]
_res, time = timed(lambda: evaluate(model, inp)) # noqa: B023
times.append(time)
results['default'] = np.median(times)
print('dynamo available backends:', dynamo.list_backends())
for backend in dynamo.list_backends():
try:
# required before changing backends
torch._dynamo.reset() # pylint: disable=protected-access
eval_dyn = dynamo.optimize(backend)(evaluate)
print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023
times = []
for _i in range(N_ITERS):
inp = generate_data(16)[0]
_res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023
times.append(time)
results[backend] = np.median(times)
except Exception as err:
lines = str(err).split('\n')
print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long
results[backend] = 'error'
# print stats
print(json.dumps(results, indent = 4))
"""
Reference: <https://github.com/pytorch/pytorch/blob/4f4b62e4a255708e928445b6502139d5962974fa/docs/source/dynamo/get-started.rst>
Training & Inference backends:
dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels
dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd
dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd
Inference-only backends:
dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference
dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations
dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU
"""