JSX_TTS / torch /testing /_internal /jit_utils.py

Upload 5875 files

9dd3461 over 2 years ago

34.2 kB

	# Torch
	from torch.autograd import Variable
	from torch.autograd.function import _nested_map
	from torch.jit.annotations import BroadcastingList2, BroadcastingList3 # noqa: F401

	from torch.onnx import OperatorExportTypes
	import torch
	import torch.cuda
	import torch.jit
	import torch.jit._logging
	import torch.jit.frontend
	import torch.jit.quantized
	import zipfile
	import functools

	# Testing utils
	from torch.testing import FileCheck
	from torch.testing._internal.common_utils import IS_WINDOWS, \
	freeze_rng_state, enable_profiling_mode_for_profiling_tests, ProfilingMode, TEST_BAILOUTS, \
	is_iterable_of_tensors
	from torch.testing._internal.common_jit import JitCommonTestCase
	from torch.testing._internal.common_utils import enable_profiling_mode # noqa: F401

	# Standard library
	from contextlib import contextmanager
	from functools import reduce
	from io import StringIO
	from collections import defaultdict

	import importlib.util
	import inspect
	import io
	import math
	import os
	import pickle
	import sys
	import tempfile
	import textwrap
	from importlib.abc import Loader
	from typing import Any, Dict, List, Tuple, Union

	RUN_CUDA = torch.cuda.is_available()
	RUN_CUDA_MULTI_GPU = RUN_CUDA and torch.cuda.device_count() > 1
	RUN_CUDA_HALF = RUN_CUDA
	# HIP supports half, no version check necessary
	if torch.cuda.is_available() and not torch.version.hip:
	CUDA_VERSION = torch._C._cuda_getCompiledVersion()
	for d in range(torch.cuda.device_count()):
	major = torch.cuda.get_device_capability(d)[0]
	if (major < 6):
	RUN_CUDA_HALF = False

	def execWrapper(code, glob, loc):
	exec(code, glob, loc)

	def do_input_map(fn, input):
	return _nested_map(lambda t: isinstance(t, torch.Tensor), fn)(input)

	def clear_class_registry():
	torch._C._jit_clear_class_registry()
	torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore()
	torch.jit._state._clear_class_state()

	def get_execution_plan(graph_executor_state):
	execution_plans = list(graph_executor_state.execution_plans.values())
	num_plans = len(execution_plans)
	if num_plans != 1:
	raise RuntimeError('This test assumes this GraphExecutor should '
	'only have one execution plan, got: {}'.format(num_plans))
	return execution_plans[0]

	class _AssertRaisesRegexWithHighlightContext(object):
	"""
	A context manager that is useful for checking that error messages highlight
	the correct part of the source code.
	"""

	def __init__(self, test_case, exception, regex, highlight):
	self.test_case = test_case
	self.exception_type = exception
	self.regex = regex
	self.highlight = highlight

	def __enter__(self):
	return self

	def __exit__(self, type, value, traceback):
	with self.test_case.assertRaisesRegex(self.exception_type, self.regex):
	if type:
	raise value

	if self.highlight:
	FileCheck().check_source_highlighted(self.highlight).run(str(value))

	return True

	FUSION_GROUP = "prim::TensorExprGroup"

	class JitTestCase(JitCommonTestCase):
	_do_cuda_memory_leak_check = True
	_restored_warnings = False

	class capture_stdout(list):
	"""
	Replace sys.stdout with a temporary StringIO
	"""
	def __enter__(self):
	self.sys_stdout = sys.stdout
	self.stringio = StringIO()
	sys.stdout = self.stringio
	return self

	def __exit__(self, *args):
	self.append(str(self.stringio.getvalue()))
	del self.stringio
	sys.stdout = self.sys_stdout

	class capture_stderr(list):
	"""
	Replace sys.stderr with a temporary StringIO
	"""
	def __enter__(self):
	self.sys_stderr = sys.stderr
	self.stringio = StringIO()
	sys.stderr = self.stringio
	return self

	def __exit__(self, *args):
	self.append(str(self.stringio.getvalue()))
	del self.stringio
	sys.stderr = self.sys_stderr

	def setHooks(self):
	torch._C._jit_set_emit_hooks(self.emitModuleHook, self.emitFunctionHook)

	def clearHooks(self):
	torch._C._jit_set_emit_hooks(None, None)

	def setUp(self):
	super().setUp()
	# unittest overrides all warning filters and forces all of them to show up
	# after we install our own to silence those coming from inside PyTorch.
	# This will ensure that our filter still takes precedence.
	if not JitTestCase._restored_warnings:
	torch.jit.TracerWarning.ignore_lib_warnings()
	JitTestCase._restored_warnings = True
	self.setHooks()

	def tearDown(self):
	super().tearDown()
	# needs to be cleared because python might be unloaded before
	# the callback gets destucted
	self.clearHooks()
	clear_class_registry()

	def assertAllFused(self, graph, except_for=()):

	# note this helper collects nodes on 'fast path' only
	# i.e. the true blocks of specialized checks
	def get_nodes_and_parents_recursively(block, kind, acc):
	for node in block.nodes():
	if node.kind() == kind:
	acc[block].append(node)
	elif node.kind() == 'prim::DifferentiableGraph':
	get_nodes_and_parents_recursively(node.g('Subgraph'), kind, acc)
	elif node.kind() == 'prim::If' and (node.inputs().__next__().node().kind() == 'aten::all' or
	node.inputs().__next__().node().kind() == 'prim::TypeCheck' or
	node.inputs().__next__().node().kind() == 'prim::RequiresGradCheck'):
	get_nodes_and_parents_recursively(node.blocks().__next__(), kind, acc)
	else:
	for inner_block in node.blocks():
	get_nodes_and_parents_recursively(inner_block, kind, acc)

	allowed_nodes = {'prim::Constant', FUSION_GROUP, 'prim::BailoutTemplate',
	'prim::TupleConstruct', 'prim::If', 'prim::TypeCheck', 'prim::RequiresGradCheck'} \| set(except_for)

	fusion_groups : Dict[torch._C.Block, List[torch._C.Node]] = defaultdict(list)
	get_nodes_and_parents_recursively(graph, FUSION_GROUP, fusion_groups)
	self.assertTrue(len(fusion_groups) == 1, 'got {}'.format(graph))
	(graph, fusion_nodes) = list(fusion_groups.items())[0]
	# the block contains one FUSION_GROUP and the rest of nodes are `allowed_nodes`
	self.assertTrue(len(fusion_nodes) == 1, 'got {}'.format(graph))
	self.assertTrue(all(node.kind() in allowed_nodes for node in graph.nodes()),
	'got {}'.format(graph))

	def _isHookExceptionOk(self, e):
	se = str(e)
	allowed = ("Could not export Python function",
	"closures are not exportable")
	for a in allowed:
	if a in se:
	return True
	return False

	def _compared_saved_loaded(self, m):
	def extract_files(buffer):
	# crack open the zip format to get at the main module code
	archive = zipfile.ZipFile(buffer)
	# check that we have no duplicate names
	self.assertEqual(len(set(archive.namelist())), len(archive.namelist()))
	files = list(filter(lambda x: x.startswith('archive/code/'), archive.namelist()))
	# unwrap all the code files into strings
	code_files_str = filter(lambda x: x.endswith('.py'), files)
	code_files_stream = (archive.open(f) for f in code_files_str)
	code_files = ("".join([line.decode() for line in file]) for file in code_files_stream)

	# unpickled all the debug files
	debug_files_str = filter(lambda f: f.endswith('.debug_pkl'), files)
	debug_files_stream = (archive.open(f) for f in debug_files_str)
	debug_files = (pickle.load(f) for f in debug_files_stream)
	return code_files, debug_files

	# disable the hook while we parse code, otherwise we will re-enter the hook
	with torch._jit_internal._disable_emit_hooks():
	try:
	# short-circuit if this is an empty function or module
	if len(m.code) == 0:
	return
	if isinstance(m, torch._C.ScriptModule):
	if len(m._method_names()) == 0:
	return

	# save the module to a buffer
	buffer = io.BytesIO()
	torch.jit.save(m, buffer)
	# copy the data in the buffer so we can restore it later. This
	# is because py2 and py3 have different semantics with zipfile
	# and it's easier to just work with a fresh copy each time.
	buffer_copy = buffer.getvalue()

	code_files, debug_files = extract_files(buffer)

	except RuntimeError as e:
	if not self._isHookExceptionOk(e):
	raise
	else:
	return

	# import the model again (from a the copy we made of the original)
	buffer2 = io.BytesIO(buffer_copy)
	imported = torch.jit.load(buffer2)

	# save it again
	saved_module_buffer_2 = io.BytesIO()
	torch.jit.save(imported, saved_module_buffer_2)

	saved_module_buffer_2.seek(0)
	code_files_2, debug_files_2 = extract_files(saved_module_buffer_2)

	for a, b in zip(code_files, code_files_2):
	self.assertMultiLineEqual(a, b)

	if isinstance(m, torch._C.ScriptModule):
	self.assertTrue(torch._C._ivalue_tags_match(m, imported._c))


	def emitFunctionHook(self, func):
	# func has invalid names for export, skip the jitter check
	if func.name == "<lambda>" or "aten::" in func.name:
	return
	self._compared_saved_loaded(func)

	def emitModuleHook(self, module):
	self._compared_saved_loaded(module)


	def getExportImportCopyWithPacking(self, m, also_test_file=True, map_location=None):
	buffer = io.BytesIO()
	m.apply(lambda s: s._pack() if s._c._has_method('_pack') else None)
	torch.jit.save(m, buffer)
	m.apply(lambda s: s._unpack() if s._c._has_method('_unpack') else None)
	buffer.seek(0)
	imported = torch.jit.load(buffer, map_location=map_location)
	imported.apply(lambda s: s._unpack() if s._c._has_method('_unpack') else None)

	if not also_test_file:
	return imported

	# Ideally we would like to not have to manually delete the file, but NamedTemporaryFile
	# opens the file, and it cannot be opened multiple times in Windows. To support Windows,
	# close the file after creation and try to remove it manually
	f = tempfile.NamedTemporaryFile(delete=False)
	try:
	f.close()
	imported.save(f.name)
	result = torch.jit.load(f.name, map_location=map_location)
	finally:
	os.unlink(f.name)

	result.apply(lambda s: s._unpack() if s._c._has_method('_unpack') else None)
	return result

	def assertGraphContains(self, graph, kind, consider_subgraphs=False):

	if consider_subgraphs:
	strgraph = str(graph)
	count = strgraph.count(kind) - strgraph.count('with {}'.format(kind))
	self.assertTrue(count > 0)
	return

	def nodes(block):
	out = []
	for node in block.nodes():
	if node.kind() == kind:
	out.append(node)
	for block in node.blocks():
	out += nodes(block)
	return out

	out_nodes = nodes(graph)
	self.assertTrue(len(out_nodes) > 0)

	def assertGraphContainsExactly(self, graph, kind, num_kind_nodes, consider_subgraphs=False):
	def perform_assert(graph, kind, actual, expected, consider_subgraphs):
	if actual == expected:
	return
	subgraph = 'including' if consider_subgraphs else 'excluding'
	raise AssertionError(
	'{}\nError: graph contains {} {} nodes ({} subgraphs) but expected {}'.format(
	graph, actual, kind, subgraph, expected))

	if consider_subgraphs:
	strgraph = str(graph)
	count = strgraph.count(kind) - strgraph.count('with {}'.format(kind))
	perform_assert(graph, kind, count, num_kind_nodes,
	consider_subgraphs)
	return

	def nodes(block):
	out = []
	for node in block.nodes():
	if node.kind() == kind:
	out.append(node)
	for block in node.blocks():
	out += nodes(block)
	return out

	out_nodes = nodes(graph)
	perform_assert(graph, kind, len(out_nodes), num_kind_nodes,
	consider_subgraphs)

	def assertExpectedONNXGraph(self, g, args, *kwargs):
	g = torch.onnx._optimize_trace(g, operator_export_type=OperatorExportTypes.ONNX)
	self.assertExpectedGraph(g, args, *kwargs)

	def assertExpectedGraph(self, trace, args, *kwargs):
	if isinstance(trace, torch._C.Graph):
	graph = trace
	else:
	graph = trace.graph()

	torch._C._jit_pass_lint(graph)
	torch._C._jit_pass_dce(graph)
	torch._C._jit_pass_lint(graph)
	graph = torch._C._jit_pass_canonicalize(graph)
	torch._C._jit_pass_lint(graph)
	self.assertExpected(str(graph), args, *kwargs)

	def run_pass(self, name, trace):
	if isinstance(trace, torch._C.Graph):
	graph = trace
	set_graph = False
	else:
	set_graph = True
	graph = trace.graph()

	torch._C._jit_pass_lint(graph)
	result = getattr(torch._C, '_jit_pass_' + name)(graph)
	if result is not None and not isinstance(result, bool):
	graph = result
	torch._C._jit_pass_lint(graph)

	if set_graph:
	trace.set_graph(graph)
	return graph

	def get_frame_vars(self, frames_up):
	frame = inspect.currentframe()
	if not frame:
	raise RuntimeError("failed to inspect frame")
	i = 0
	while i < frames_up + 1:
	frame = frame.f_back
	if not frame:
	raise RuntimeError("failed to get frame")
	i += 1
	defined_vars: Dict[str, Any] = {}
	defined_vars.update(frame.f_locals)
	defined_vars.update(frame.f_globals)
	return defined_vars

	def assertRaisesRegexWithHighlight(self, exception, regex, highlight):
	return _AssertRaisesRegexWithHighlightContext(self, exception, regex, highlight)

	def checkScriptRaisesRegex(self, script, inputs, exception, regex,
	name=None, outputs=None, capture_output=False,
	frames_up=1, profiling=ProfilingMode.PROFILING):
	"""
	Checks that a given function will throw the correct exception,
	when executed with normal python, the string frontend, and the
	AST frontend. Logic taken from `checkScript` (see comments there
	for details)
	"""
	with enable_profiling_mode_for_profiling_tests():
	# Normal Python
	with self.assertRaisesRegex(exception, regex):
	if isinstance(script, str):
	frame = self.get_frame_vars(frames_up)
	the_locals: Dict[str, Any] = {}
	execWrapper(script, glob=frame, loc=the_locals)
	frame.update(the_locals)

	python_fn = frame[name]
	else:
	python_fn = script

	python_fn(*inputs)

	# String frontend
	with self.assertRaisesRegex(exception, regex):
	if isinstance(script, str):
	cu = torch.jit.CompilationUnit(script, _frames_up=frames_up)
	string_frontend = getattr(cu, name)
	else:
	source = textwrap.dedent(inspect.getsource(script))
	cu = torch.jit.CompilationUnit(source, _frames_up=frames_up)
	string_frontend = getattr(cu, script.__name__)

	string_frontend(*inputs)

	# Python AST frontend
	if not isinstance(script, str):
	with self.assertRaisesRegex(exception, regex):
	ge = torch.jit.script(python_fn)
	ge(*inputs)

	def checkBailouts(self, model, inputs, expected):
	state = model.get_debug_state()
	plan = get_execution_plan(state)
	num_bailouts = plan.code.num_bailouts()
	for i in range(0, num_bailouts):
	plan.code.request_bailout(i)
	bailout_outputs = model(*inputs)
	self.assertEqual(bailout_outputs, expected)

	def checkScript(self,
	script,
	inputs,
	name='func',
	optimize=True,
	inputs_requires_grad=False,
	capture_output=False,
	frames_up=1,
	profiling=ProfilingMode.PROFILING,
	atol=None,
	rtol=None):
	"""
	Checks that a given script generates the same output as the Python
	version using the given inputs.
	"""
	with torch.jit.optimized_execution(optimize):
	with enable_profiling_mode_for_profiling_tests():
	extra_profile_runs = any(isinstance(x, torch.Tensor) and x.requires_grad for x in inputs)
	if isinstance(script, str):
	# Compile the string to a Script function
	# with enable_profiling_mode():
	cu = torch.jit.CompilationUnit(script, _frames_up=frames_up)

	# Execute the Python function so we can run it later and get its
	# outputs

	frame = self.get_frame_vars(frames_up)
	the_locals: Dict[str, Any] = {}
	execWrapper(script, glob=frame, loc=the_locals)
	frame.update(the_locals)

	python_fn = frame[name]
	scripted_fn = getattr(cu, name)
	else:

	# Check the string frontend first
	source = textwrap.dedent(inspect.getsource(script))
	self.checkScript(
	source,
	inputs,
	script.__name__,
	optimize=optimize,
	inputs_requires_grad=inputs_requires_grad,
	capture_output=capture_output,
	profiling=profiling,
	frames_up=2)

	# Continue checking the Python frontend
	scripted_fn = torch.jit.script(script, _frames_up=1)
	python_fn = script

	if inputs_requires_grad:
	recording_inputs = do_input_map(lambda t: t.detach().requires_grad_(), inputs)
	else:
	recording_inputs = inputs

	if capture_output:
	with self.capture_stdout() as script_stdout:
	script_outputs = scripted_fn(*recording_inputs)
	with self.capture_stdout() as opt_script_stdout:
	opt_script_outputs = scripted_fn(*recording_inputs)
	with self.capture_stdout() as _python_stdout:
	python_outputs = python_fn(*inputs)
	if not IS_WINDOWS:
	self.assertExpected(script_stdout[0], subname='stdout')
	self.assertEqual(python_outputs, opt_script_outputs, atol=atol, rtol=rtol)
	else:
	# profiling run
	script_outputs = scripted_fn(*recording_inputs)
	if inputs_requires_grad or extra_profile_runs:
	opt_script_outputs = scripted_fn(*recording_inputs)
	# optimized run
	opt_script_outputs = scripted_fn(*recording_inputs)
	if TEST_BAILOUTS:
	self.checkBailouts(scripted_fn, inputs, opt_script_outputs)
	python_outputs = python_fn(*inputs)
	self.assertEqual(python_outputs, script_outputs, atol=atol, rtol=rtol)
	self.assertEqual(script_outputs, opt_script_outputs, atol=atol, rtol=rtol)
	return scripted_fn

	def checkTrace(self, func, reference_tensors, input_tensors=None,
	drop=None, allow_unused=False, verbose=False,
	inputs_require_grads=True, check_tolerance=1e-5, export_import=True,
	_force_outplace=False):

	# TODO: check gradients for parameters, not just inputs
	def allSum(vs):
	# drop allows us to remove some values from ever being used
	# to test unused outputs
	if drop is not None:
	vs = vs[:-drop]
	# we don't want all the grad for all the outputs to be the same
	# so we multiply each by a constant
	return sum(math.log(i + 2) * v.sum() for i, v in enumerate(vs) if v is not None)
	if input_tensors is None:
	input_tensors = reference_tensors

	def flatten_inputs(inputs):
	def input_reduce(input, fn, acc):
	if isinstance(input, torch.Tensor):
	fn(input, acc)
	elif isinstance(input, dict):
	reduce(lambda acc, key: input_reduce(input[key], fn, acc), input, acc)
	else:
	reduce(lambda acc, val: input_reduce(val, fn, acc), input, acc)
	return acc
	return tuple(input_reduce(recording_inputs, lambda t, acc: acc.append(t), []))

	nograd_inputs = reference_tensors
	if inputs_require_grads:
	recording_inputs = do_input_map(lambda t: t.clone().requires_grad_(), reference_tensors)
	flattened_recording_inputs = flatten_inputs(recording_inputs)
	else:
	recording_inputs = reference_tensors

	# `check_trace` is set to False because check_trace is run with @no_grad
	# Also, `checkTrace` already does all the checks
	# against python function
	ge = torch.jit.trace(func, input_tensors, check_tolerance=check_tolerance,
	_force_outplace=_force_outplace, check_trace=False)

	if export_import:
	ge = self.getExportImportCopy(ge)

	if verbose:
	print(ge.graph)

	# test no gradients case
	outputs = func(*nograd_inputs)
	outputs_ge = ge(*nograd_inputs)
	self.assertEqual(outputs, outputs_ge)

	# test gradients case
	outputs = func(*recording_inputs)
	if inputs_require_grads:
	grads = torch.autograd.grad(allSum(outputs), flattened_recording_inputs,
	allow_unused=allow_unused)

	outputs_ge = ge(*recording_inputs)
	if inputs_require_grads:
	grads_ge = torch.autograd.grad(allSum(outputs_ge), flattened_recording_inputs,
	allow_unused=allow_unused)
	self.assertEqual(outputs, outputs_ge)
	if inputs_require_grads:
	self.assertEqual(grads, grads_ge)

	self.assertEqual(outputs, outputs_ge)
	if inputs_require_grads:
	self.assertEqual(grads, grads_ge)

	# test the grad grad case
	outputs = func(*recording_inputs)
	l1 = allSum(outputs)
	if inputs_require_grads:
	grads = torch.autograd.grad(l1, flattened_recording_inputs, create_graph=True,
	allow_unused=allow_unused)
	if inputs_require_grads:
	l2 = (allSum(grads) * l1)
	grads2 = torch.autograd.grad(l2, flattened_recording_inputs, allow_unused=allow_unused)

	if inputs_require_grads:
	recording_inputs = do_input_map(lambda t: Variable(t, requires_grad=True), reference_tensors)
	flattened_recording_inputs = flatten_inputs(recording_inputs)

	outputs_ge = ge(*recording_inputs)
	l1_ge = allSum(outputs_ge)
	if inputs_require_grads:
	grads_ge = torch.autograd.grad(
	l1_ge, flattened_recording_inputs, create_graph=True, allow_unused=allow_unused)

	if inputs_require_grads:
	l2_ge = (allSum(grads_ge) * l1_ge)
	grads2_ge = torch.autograd.grad(l2_ge, flattened_recording_inputs, allow_unused=allow_unused)

	self.assertEqual(outputs, outputs_ge)
	if inputs_require_grads:
	self.assertEqual(grads, grads_ge)
	for g2, g2_ge in zip(grads2, grads2_ge):
	if g2 is None and g2_ge is None:
	continue
	self.assertEqual(g2, g2_ge, atol=8e-4, rtol=8e-4)

	return ge

	def checkModule(self, nn_module, args):
	"""
	Check that a nn.Module's results in Script mode match eager and that it
	can be exported
	"""
	sm = torch.jit.script(nn_module)

	with freeze_rng_state():
	eager_out = nn_module(*args)

	with freeze_rng_state():
	script_out = sm(*args)

	self.assertEqual(eager_out, script_out)
	self.assertExportImportModule(sm, args)

	return sm

	class NoTracerWarnContextManager(object):
	def __enter__(self):
	self.prev = torch._C._jit_get_tracer_state_warn()
	torch._C._jit_set_tracer_state_warn(False)

	def __exit__(self, *args):
	torch._C._jit_set_tracer_state_warn(self.prev)

	@contextmanager
	def inline_everything_mode(should_inline):
	old = torch._C._jit_get_inline_everything_mode()
	torch._C._jit_set_inline_everything_mode(should_inline)
	try:
	yield
	finally:
	torch._C._jit_set_inline_everything_mode(old)

	@contextmanager
	def set_fusion_group_inlining(inlining):
	old = torch._C._debug_get_fusion_group_inlining()
	torch._C._debug_set_fusion_group_inlining(inlining)
	try:
	yield
	finally:
	torch._C._debug_set_fusion_group_inlining(old)

	# note: not re-entrant, use unnested only
	@contextmanager
	def disable_autodiff_subgraph_inlining(enabled=True):
	torch._C._debug_set_autodiff_subgraph_inlining(not enabled)
	try:
	yield
	finally:
	torch._C._debug_set_autodiff_subgraph_inlining(True)

	def _inline_everything(fn):
	@functools.wraps(fn)
	def wrapper(args, *kwargs):
	with inline_everything_mode(True):
	fn(args, *kwargs)
	return wrapper

	# this exists for forward compatibility reasons temporarily.
	# TODO(suo) remove
	def _tmp_donotuse_dont_inline_everything(fn):
	@functools.wraps(fn)
	def wrapper(args, *kwargs):
	with inline_everything_mode(False):
	fn(args, *kwargs)
	return wrapper

	# make it easy to quicky define/trace a function for these tests
	def _trace(args, *kwargs):
	def wrapper(func):
	return torch.jit.trace(func, args, **kwargs)
	return wrapper


	def enable_cpu_fuser(fn):
	def wrapper(args, *kwargs):
	torch._C._jit_override_can_fuse_on_cpu_legacy(True)
	torch._C._jit_override_can_fuse_on_cpu(True)
	torch._C._jit_set_te_must_use_llvm_cpu(False)
	try:
	fn(args, *kwargs)
	finally:
	torch._C._jit_override_can_fuse_on_cpu_legacy(False)
	torch._C._jit_override_can_fuse_on_cpu(False)
	torch._C._jit_set_te_must_use_llvm_cpu(True)
	return wrapper


	def enable_cpu_fuser_if(cond):
	if cond:
	return enable_cpu_fuser
	else:
	def noop_fuser(fn):
	def wrapper(args, *kwargs):
	return fn(args, *kwargs)
	return wrapper
	return noop_fuser

	def get_forward(c):
	return c._get_method('forward')

	def get_forward_graph(c):
	return c._get_method('forward').graph

	def get_module_method(m, module, method):
	return m._c.getattr(module)._get_method(method)

	def attrs_with_prefix(module, prefix):
	return [x for x, _ in module._modules._c.items()
	if x.startswith(prefix)]

	def warmup_backward(f, *args):
	profiling_count = 3
	results = []
	for i in range(profiling_count):
	if len(args) > 0:
	r = torch.autograd.grad(f, *args)
	results.append(r)
	else:
	f.backward(retain_graph=True)

	return results

	# TODO: Remove me once https://bugs.python.org/issue42666 is resolved
	def make_global(*args):
	for arg in args:
	setattr(sys.modules[arg.__module__], arg.__name__, arg)

	# Helper function to eval Python3 code without causing a syntax error for
	# this file under py2
	def _get_py3_code(code, fn_name):
	with tempfile.TemporaryDirectory() as tmp_dir:
	script_path = os.path.join(tmp_dir, 'script.py')
	with open(script_path, 'w') as f:
	f.write(code)
	spec = importlib.util.spec_from_file_location(fn_name, script_path)
	module = importlib.util.module_from_spec(spec)
	loader = spec.loader
	assert isinstance(loader, Loader) # Assert type to meet MyPy requriement
	loader.exec_module(module)
	fn = getattr(module, fn_name)
	return fn

	class TensorExprTestOptions():
	def __init__(self):
	self.old_profiling_executor = torch._C._jit_set_profiling_executor(True)
	self.old_profiling_mode = torch._C._get_graph_executor_optimize(True)

	self.old_cpu_fuser_state = torch._C._jit_can_fuse_on_cpu()
	self.old_gpu_fuser_state = torch._C._jit_can_fuse_on_gpu()
	torch._C._jit_override_can_fuse_on_cpu(True)
	torch._C._jit_override_can_fuse_on_gpu(True)
	self.texpr_fuser_state = torch._C._jit_texpr_fuser_enabled()
	torch._C._jit_set_texpr_fuser_enabled(True)
	self.old_fusion_inlining = torch._C._debug_get_fusion_group_inlining()
	torch._C._debug_set_fusion_group_inlining(False)
	self.old_te_must_use_llvm_cpu = torch._C._jit_get_te_must_use_llvm_cpu()
	torch._C._jit_set_te_must_use_llvm_cpu(False)
	self.old_nvfuser = torch._C._jit_set_nvfuser_enabled(False)

	def restore(self):
	torch._C._jit_set_profiling_executor(self.old_profiling_executor)
	torch._C._get_graph_executor_optimize(self.old_profiling_mode)

	torch._C._jit_set_texpr_fuser_enabled(self.texpr_fuser_state)
	torch._C._jit_override_can_fuse_on_gpu(self.old_gpu_fuser_state)
	torch._C._jit_override_can_fuse_on_cpu(self.old_cpu_fuser_state)
	torch._C._debug_set_fusion_group_inlining(self.old_fusion_inlining)
	torch._C._jit_set_te_must_use_llvm_cpu(self.old_te_must_use_llvm_cpu)
	torch._C._jit_set_nvfuser_enabled(self.old_nvfuser)

	def clone_inputs(args):
	inputs: List[Union[torch.Tensor, List[torch.Tensor]]] = []

	for arg in args:
	if isinstance(arg, torch.Tensor):
	inputs.append(arg.detach().clone())
	elif is_iterable_of_tensors(arg):
	inputs.append([t.detach().clone() for t in arg])
	else:
	inputs.append(arg)

	return inputs

	def get_traced_sample_variant_pairs(device, dtype, op):
	# tuples of (variant, sample)
	outputs: List[Tuple[Any, Any]] = []

	samples = op.sample_inputs(device, dtype)

	# Acquires variants to test
	func = op.get_op()
	method = op.get_method()
	variants = {
	# TODO: inplace tests currently fail, fix and add inplace variant
	'function': func, 'method': method,
	}

	# TODO: find better way to standardize on op registration itself..
	has_fake_function = op.name in ["resize_", 'resize_as_']

	if has_fake_function:
	variants = {'method': getattr(torch.Tensor, op.name)}

	# In eager mode, these ops can take (Tensor, bool) args; but in
	# JIT they can only take (Tensor, Scalar), and bool is not a
	# scalar in the JIT type system. So to test these in JIT, the bool
	# is converted to an int for the test.
	ops_with_unsupported_bool_args = [
	{
	"name": "div_floor_rounding",
	"arg_idx": [0],
	},
	{
	"name": "div_no_rounding_mode",
	"arg_idx": [0],
	},
	{
	"name": "div_trunc_rounding",
	"arg_idx": [0],
	},
	{
	"name": "index_fill",
	"arg_idx": [2],
	},
	{
	"name": "full_like",
	"arg_idx": [0],
	},
	{
	"name": "mul",
	"arg_idx": [0],
	},
	{
	"name": "new_full",
	"arg_idx": [1],
	},
	]

	# doesn't support tracing
	if has_fake_function:
	return outputs

	for sample in samples:
	for func_type, variant in variants.items():
	if variant is None:
	continue

	if is_lambda(variant):
	continue

	matching_ops = filter(lambda x: op.formatted_name == x["name"], ops_with_unsupported_bool_args)
	for op_data in matching_ops:
	for idx in op_data["arg_idx"]:
	args = list(sample.args)
	if len(sample.args) > idx and isinstance(sample.args[idx], bool):
	args[idx] = int(args[idx])
	sample.args = tuple(args)

	outputs.append((variant, sample))

	return outputs

	# types.LambdaType gave false positives
	def is_lambda(lamb):
	LAMBDA = lambda: 0 # noqa: E731
	return isinstance(lamb, type(LAMBDA)) and lamb.__name__ == LAMBDA.__name__