koichi12 commited on Feb 12, 2025

Commit

ece6bf3

verified ·

1 Parent(s): 593c823

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/codecache.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_cuda_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cslt_compress_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_empty_per_channel_affine_quantized_cpu_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_cosh_cuda_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_sqrt_cpu_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sdp_choice_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_indices_copy_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_mps_convolution_transpose.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_softmax_with_shape.h +30 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets.h +34 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nnz_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_softmax.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_sum_native.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_unique2_cuda_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta.h +27 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_native.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/addcmul_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/alias_compositeexplicitautograd_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arctanh_ops.h +50 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/atan2_native.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_update_stats_cpu_dispatch.h +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bitwise_left_shift.h +67 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/clone_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/complex_cuda_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cosh_cuda_dispatch.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_grid_sampler_backward_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dstack.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/expm1_meta.h +27 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward_ops.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2.h +91 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool3d_cuda_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/from_file.h +43 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/instance_norm_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_leaf.h +26 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isnan_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/leaky_relu_ops.h +50 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_solve_ex_native.h +22 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_solve_triangular_ops.h +39 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/log1p_compositeexplicitautogradnonfunctional_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/log_softmax_compositeexplicitautograd_dispatch.h +24 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logdet_native.h +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logit_backward_cuda_dispatch.h +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/matrix_exp_backward_ops.h +28 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool3d_with_indices_backward_ops.h +39 -0

.gitattributes CHANGED Viewed

@@ -87,3 +87,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/modules/
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/__pycache__/__init__.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/modules/__pycache__/module.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/__pycache__/__init__.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/nn/modules/__pycache__/module.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/codecache.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/codecache.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c1a24c10bc84094a10d8939b8a88ad6adf045be858e0ba010cb9f309fc406a7
+size 136352

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor _convert_indices_from_csr_to_coo(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32=false, bool transpose=false);
+TORCH_API at::Tensor & _convert_indices_from_csr_to_coo_out(at::Tensor & out, const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32=false, bool transpose=false);
+TORCH_API at::Tensor & _convert_indices_from_csr_to_coo_outf(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, at::Tensor & out);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cslt_compress_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor _cslt_compress(const at::Tensor & input);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_cudnn_rnn_flatten_weight_ops.h>
+namespace at {
+// aten::_cudnn_rnn_flatten_weight(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional) -> Tensor
+inline at::Tensor _cudnn_rnn_flatten_weight(at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor _cudnn_rnn_flatten_weight(at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional);
+  }
+}
+// aten::_cudnn_rnn_flatten_weight(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional) -> Tensor
+inline at::Tensor _cudnn_rnn_flatten_weight_symint(at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor _cudnn_rnn_flatten_weight(at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional);
+  }
+}
+// aten::_cudnn_rnn_flatten_weight.out(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _cudnn_rnn_flatten_weight_out(at::Tensor & out, at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & _cudnn_rnn_flatten_weight_out(at::Tensor & out, at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+  }
+}
+// aten::_cudnn_rnn_flatten_weight.out(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _cudnn_rnn_flatten_weight_outf(at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & _cudnn_rnn_flatten_weight_outf(at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+  }
+}
+// aten::_cudnn_rnn_flatten_weight.out(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _cudnn_rnn_flatten_weight_symint_out(at::Tensor & out, at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & _cudnn_rnn_flatten_weight_out(at::Tensor & out, at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+  }
+}
+// aten::_cudnn_rnn_flatten_weight.out(Tensor[] weight_arr, int weight_stride0, SymInt input_size, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, bool bidirectional, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _cudnn_rnn_flatten_weight_symint_outf(at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & _cudnn_rnn_flatten_weight_outf(at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out) {
+    return at::_ops::_cudnn_rnn_flatten_weight_out::call(weight_arr, weight_stride0, input_size, mode, hidden_size, proj_size, num_layers, batch_first, bidirectional, out);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_cudnn_rnn_flatten_weight_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor & _cudnn_rnn_flatten_weight_out_symint(at::TensorList weight_arr, int64_t weight_stride0, c10::SymInt input_size, int64_t mode, c10::SymInt hidden_size, c10::SymInt proj_size, int64_t num_layers, bool batch_first, bool bidirectional, at::Tensor & out);
+TORCH_API at::Tensor _cudnn_rnn_flatten_weight(at::TensorList weight_arr, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, bool batch_first, bool bidirectional);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_empty_per_channel_affine_quantized_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options={}, c10::optional<at::MemoryFormat> memory_format=MemoryFormat::Contiguous);
+TORCH_API at::Tensor _empty_per_channel_affine_quantized(at::IntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format);
+TORCH_API at::Tensor _empty_per_channel_affine_quantized_symint(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, at::TensorOptions options={}, c10::optional<at::MemoryFormat> memory_format=MemoryFormat::Contiguous);
+TORCH_API at::Tensor _empty_per_channel_affine_quantized_symint(c10::SymIntArrayRef size, const at::Tensor & scales, const at::Tensor & zero_points, int64_t axis, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory, c10::optional<at::MemoryFormat> memory_format);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_cosh_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API ::std::vector<at::Tensor> _foreach_cosh(at::TensorList self);
+TORCH_API void _foreach_cosh_(at::TensorList self);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_foreach_sqrt_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API ::std::vector<at::Tensor> _foreach_sqrt(at::TensorList self);
+TORCH_API void _foreach_sqrt_(at::TensorList self);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_fused_sdp_choice_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API int64_t _fused_sdp_choice_cpp(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask={}, double dropout_p=0.0, bool is_causal=false, c10::optional<double> scale=c10::nullopt);
+TORCH_API int64_t _fused_sdp_choice_cuda(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask={}, double dropout_p=0.0, bool is_causal=false, c10::optional<double> scale=c10::nullopt);
+TORCH_API int64_t _fused_sdp_choice_meta(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const c10::optional<at::Tensor> & attn_mask={}, double dropout_p=0.0, bool is_causal=false, c10::optional<double> scale=c10::nullopt);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_indices_copy_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor & _indices_copy_out(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor _indices_copy(const at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_mps_convolution_transpose.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_mps_convolution_transpose_ops.h>
+namespace at {
+// aten::_mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
+inline at::Tensor _mps_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups) {
+    return at::_ops::_mps_convolution_transpose::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor _mps_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups) {
+    return at::_ops::_mps_convolution_transpose::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups);
+  }
+}
+// aten::_mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
+inline at::Tensor _mps_convolution_transpose_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups) {
+    return at::_ops::_mps_convolution_transpose::call(self, weight, padding, output_padding, stride, dilation, groups);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor _mps_convolution_transpose(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups) {
+    return at::_ops::_mps_convolution_transpose::call(self, weight, padding, output_padding, stride, dilation, groups);
+  }
+}
+// aten::_mps_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _mps_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & _mps_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, out);
+  }
+}
+// aten::_mps_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _mps_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, at::Tensor & out) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & _mps_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef padding, at::IntArrayRef output_padding, at::IntArrayRef stride, at::IntArrayRef dilation, int64_t groups, at::Tensor & out) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, c10::fromIntArrayRefSlow(padding), c10::fromIntArrayRefSlow(output_padding), c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(dilation), groups, out);
+  }
+}
+// aten::_mps_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _mps_convolution_transpose_symint_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & _mps_convolution_transpose_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, out);
+  }
+}
+// aten::_mps_convolution_transpose.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _mps_convolution_transpose_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::Tensor & out) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & _mps_convolution_transpose_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef output_padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::Tensor & out) {
+    return at::_ops::_mps_convolution_transpose_out::call(self, weight, padding, output_padding, stride, dilation, groups, out);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_softmax_with_shape.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_nested_tensor_softmax_with_shape_ops.h>
+namespace at {
+// aten::_nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
+inline at::Tensor _nested_tensor_softmax_with_shape(const at::Tensor & self, const at::Tensor & query) {
+    return at::_ops::_nested_tensor_softmax_with_shape::call(self, query);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nested_tensor_storage_offsets.h ADDED Viewed

	@@ -0,0 +1,34 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_nested_tensor_storage_offsets_ops.h>
+namespace at {
+// aten::_nested_tensor_storage_offsets.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _nested_tensor_storage_offsets_out(at::Tensor & out, const at::Tensor & self) {
+    return at::_ops::_nested_tensor_storage_offsets_out::call(self, out);
+}
+// aten::_nested_tensor_storage_offsets.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _nested_tensor_storage_offsets_outf(const at::Tensor & self, at::Tensor & out) {
+    return at::_ops::_nested_tensor_storage_offsets_out::call(self, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_nnz_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API int64_t _nnz_sparse(const at::Tensor & self);
+TORCH_API int64_t _nnz_sparse_csr(const at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_softmax.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/_softmax_ops.h>
+namespace at {
+// aten::_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+inline at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float) {
+    return at::_ops::_softmax::call(self, dim, half_to_float);
+}
+// aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float) {
+    return at::_ops::_softmax_out::call(self, dim, half_to_float, out);
+}
+// aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & _softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out) {
+    return at::_ops::_softmax_out::call(self, dim, half_to_float, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_sparse_sum_native.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor _sparse_sum(const at::Tensor & self);
+TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::ScalarType dtype);
+TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim);
+TORCH_API at::Tensor & _sparse_sum_dim_out(const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out);
+TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &,at::Tensor &,at::Tensor &> _thnn_fused_gru_cell_backward_out(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4);
+TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor,at::Tensor,at::Tensor> _thnn_fused_gru_cell_backward_cuda(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_unique2_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API ::std::tuple<at::Tensor,at::Tensor,at::Tensor> _unique2(const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta.h ADDED Viewed

	@@ -0,0 +1,27 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeMetaFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/TensorIterator.h>
+#include <ATen/TensorMeta.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace meta {
+struct TORCH_API structured__upsample_bilinear2d_aa : public at::impl::MetaBase {
+    void meta(const at::Tensor & self, at::ArrayRef<int64_t> output_size, bool align_corners, c10::optional<double> scales_h, c10::optional<double> scales_w);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_native.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/_upsample_nearest_exact3d_meta.h>
+namespace at {
+namespace native {
+TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, c10::optional<at::ArrayRef<double>> scale_factors);
+struct TORCH_API structured__upsample_nearest_exact3d_out_cpu : public at::meta::structured__upsample_nearest_exact3d {
+void impl(const at::Tensor & self, at::ArrayRef<int64_t> output_size, c10::optional<double> scales_d, c10::optional<double> scales_h, c10::optional<double> scales_w, const at::Tensor & out);
+};
+struct TORCH_API structured__upsample_nearest_exact3d_out_cuda : public at::meta::structured__upsample_nearest_exact3d {
+void impl(const at::Tensor & self, at::ArrayRef<int64_t> output_size, c10::optional<double> scales_d, c10::optional<double> scales_h, c10::optional<double> scales_w, const at::Tensor & out);
+};
+TORCH_API at::Tensor _upsample_nearest_exact3d_quantized_cpu(const at::Tensor & self, at::IntArrayRef output_size, c10::optional<double> scales_d=c10::nullopt, c10::optional<double> scales_h=c10::nullopt, c10::optional<double> scales_w=c10::nullopt);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/addcmul_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/addcmul_meta.h>
+namespace at {
+namespace native {
+struct TORCH_API structured_addcmul_out : public at::meta::structured_addcmul {
+void impl(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value, const at::Tensor & out);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/alias_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor alias(const at::Tensor & self);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/arctanh_ops.h ADDED Viewed

	@@ -0,0 +1,50 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API arctanh {
+  using schema = at::Tensor (const at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::arctanh")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "arctanh(Tensor self) -> Tensor")
+  static at::Tensor call(const at::Tensor & self);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self);
+};
+struct TORCH_API arctanh_ {
+  using schema = at::Tensor & (at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::arctanh_")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "arctanh_(Tensor(a!) self) -> Tensor(a!)")
+  static at::Tensor & call(at::Tensor & self);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self);
+};
+struct TORCH_API arctanh_out {
+  using schema = at::Tensor & (const at::Tensor &, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::arctanh")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "arctanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & self, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/atan2_native.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+#include <ATen/ops/atan2_meta.h>
+namespace at {
+namespace native {
+struct TORCH_API structured_atan2_out : public at::meta::structured_atan2 {
+void impl(const at::Tensor & self, const at::Tensor & other, const at::Tensor & out);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/batch_norm_update_stats_cpu_dispatch.h ADDED Viewed

	@@ -0,0 +1,23 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cpu {
+TORCH_API ::std::tuple<at::Tensor,at::Tensor> batch_norm_update_stats(const at::Tensor & input, const c10::optional<at::Tensor> & running_mean, const c10::optional<at::Tensor> & running_var, double momentum);
+} // namespace cpu
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/bitwise_left_shift.h ADDED Viewed

	@@ -0,0 +1,67 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/bitwise_left_shift_ops.h>
+namespace at {
+// aten::bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor
+inline at::Tensor bitwise_left_shift(const at::Tensor & self, const at::Tensor & other) {
+    return at::_ops::bitwise_left_shift_Tensor::call(self, other);
+}
+// aten::bitwise_left_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & bitwise_left_shift_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) {
+    return at::_ops::bitwise_left_shift_Tensor_out::call(self, other, out);
+}
+// aten::bitwise_left_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & bitwise_left_shift_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
+    return at::_ops::bitwise_left_shift_Tensor_out::call(self, other, out);
+}
+// aten::bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
+inline at::Tensor bitwise_left_shift(const at::Tensor & self, const at::Scalar & other) {
+    return at::_ops::bitwise_left_shift_Tensor_Scalar::call(self, other);
+}
+// aten::bitwise_left_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & bitwise_left_shift_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other) {
+    return at::_ops::bitwise_left_shift_Tensor_Scalar_out::call(self, other, out);
+}
+// aten::bitwise_left_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & bitwise_left_shift_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
+    return at::_ops::bitwise_left_shift_Tensor_Scalar_out::call(self, other, out);
+}
+// aten::bitwise_left_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
+inline at::Tensor bitwise_left_shift(const at::Scalar & self, const at::Tensor & other) {
+    return at::_ops::bitwise_left_shift_Scalar_Tensor::call(self, other);
+}
+// aten::bitwise_left_shift.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & bitwise_left_shift_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other) {
+    return at::_ops::bitwise_left_shift_Scalar_Tensor_out::call(self, other, out);
+}
+// aten::bitwise_left_shift.Scalar_Tensor_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & bitwise_left_shift_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out) {
+    return at::_ops::bitwise_left_shift_Scalar_Tensor_out::call(self, other, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/clone_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API clone {
+  using schema = at::Tensor (const at::Tensor &, c10::optional<at::MemoryFormat>);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::clone")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format);
+};
+struct TORCH_API clone_out {
+  using schema = at::Tensor & (const at::Tensor &, c10::optional<at::MemoryFormat>, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::clone")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "clone.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::optional<at::MemoryFormat> memory_format, at::Tensor & out);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/complex_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor & complex_out(at::Tensor & out, const at::Tensor & real, const at::Tensor & imag);
+TORCH_API at::Tensor & complex_outf(const at::Tensor & real, const at::Tensor & imag, at::Tensor & out);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cosh_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor cosh(const at::Tensor & self);
+TORCH_API at::Tensor & cosh_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & cosh_outf(const at::Tensor & self, at::Tensor & out);
+TORCH_API at::Tensor & cosh_(at::Tensor & self);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/cudnn_grid_sampler_backward_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> cudnn_grid_sampler_backward_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & self, const at::Tensor & grid, const at::Tensor & grad_output);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> cudnn_grid_sampler_backward_outf(const at::Tensor & self, const at::Tensor & grid, const at::Tensor & grad_output, at::Tensor & out0, at::Tensor & out1);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/dstack.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/dstack_ops.h>
+namespace at {
+// aten::dstack(Tensor[] tensors) -> Tensor
+inline at::Tensor dstack(at::TensorList tensors) {
+    return at::_ops::dstack::call(tensors);
+}
+// aten::dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & dstack_out(at::Tensor & out, at::TensorList tensors) {
+    return at::_ops::dstack_out::call(tensors, out);
+}
+// aten::dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & dstack_outf(at::TensorList tensors, at::Tensor & out) {
+    return at::_ops::dstack_out::call(tensors, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/expm1_meta.h ADDED Viewed

	@@ -0,0 +1,27 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeMetaFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/TensorIterator.h>
+#include <ATen/TensorMeta.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace meta {
+struct TORCH_API structured_expm1 : public TensorIteratorBase {
+    void meta(const at::Tensor & self);
+};
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward_ops.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API fake_quantize_per_channel_affine_cachemask_backward {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::fake_quantize_per_channel_affine_cachemask_backward")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "fake_quantize_per_channel_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor")
+  static at::Tensor call(const at::Tensor & grad, const at::Tensor & mask);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & mask);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> fake_quantize_per_channel_affine_cachemask_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> fake_quantize_per_channel_affine_cachemask_outf(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, at::Tensor & out0, at::Tensor & out1);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fft_irfft2.h ADDED Viewed

	@@ -0,0 +1,91 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/fft_irfft2_ops.h>
+namespace at {
+// aten::fft_irfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
+inline at::Tensor fft_irfft2(const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor fft_irfft2(const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm);
+  }
+}
+// aten::fft_irfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
+inline at::Tensor fft_irfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2::call(self, s, dim, norm);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor fft_irfft2(const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2::call(self, s, dim, norm);
+  }
+}
+// aten::fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_irfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & fft_irfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out);
+  }
+}
+// aten::fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_irfft2_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::IntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_irfft2_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, int64_t>::value>>
+  at::Tensor & fft_irfft2_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::IntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_irfft2_out::call(self, s.has_value() ? c10::make_optional(c10::fromIntArrayRefSlow(*s)) : c10::nullopt, dim, norm, out);
+  }
+}
+// aten::fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_irfft2_symint_out(at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2_out::call(self, s, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & fft_irfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=c10::nullopt, at::IntArrayRef dim={-2,-1}, c10::optional<c10::string_view> norm=c10::nullopt) {
+    return at::_ops::fft_irfft2_out::call(self, s, dim, norm, out);
+  }
+}
+// aten::fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & fft_irfft2_symint_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_irfft2_out::call(self, s, dim, norm, out);
+}
+namespace symint {
+  template <typename T, typename = std::enable_if_t<std::is_same<T, c10::SymInt>::value>>
+  at::Tensor & fft_irfft2_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, c10::optional<c10::string_view> norm, at::Tensor & out) {
+    return at::_ops::fft_irfft2_out::call(self, s, dim, norm, out);
+  }
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/fractional_max_pool3d_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API ::std::tuple<at::Tensor,at::Tensor> fractional_max_pool3d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & random_samples);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> fractional_max_pool3d_out(at::Tensor & output, at::Tensor & indices, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & random_samples);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> fractional_max_pool3d_outf(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & random_samples, at::Tensor & output, at::Tensor & indices);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/from_file.h ADDED Viewed

	@@ -0,0 +1,43 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/from_file_ops.h>
+namespace at {
+// aten::from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+inline at::Tensor from_file(c10::string_view filename, c10::optional<bool> shared=c10::nullopt, c10::optional<int64_t> size=0, at::TensorOptions options={}) {
+    return at::_ops::from_file::call(filename, shared, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt());
+}
+// aten::from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+inline at::Tensor from_file(c10::string_view filename, c10::optional<bool> shared, c10::optional<int64_t> size, c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout, c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
+    return at::_ops::from_file::call(filename, shared, size, dtype, layout, device, pin_memory);
+}
+// aten::from_file.out(str filename, bool? shared=None, int? size=0, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & from_file_out(at::Tensor & out, c10::string_view filename, c10::optional<bool> shared=c10::nullopt, c10::optional<int64_t> size=0) {
+    return at::_ops::from_file_out::call(filename, shared, size, out);
+}
+// aten::from_file.out(str filename, bool? shared=None, int? size=0, *, Tensor(a!) out) -> Tensor(a!)
+inline at::Tensor & from_file_outf(c10::string_view filename, c10::optional<bool> shared, c10::optional<int64_t> size, at::Tensor & out) {
+    return at::_ops::from_file_out::call(filename, shared, size, out);
+}
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/instance_norm_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor instance_norm(const at::Tensor & input, const c10::optional<at::Tensor> & weight, const c10::optional<at::Tensor> & bias, const c10::optional<at::Tensor> & running_mean, const c10::optional<at::Tensor> & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/is_leaf.h ADDED Viewed

	@@ -0,0 +1,26 @@

+#pragma once
+// @generated by torchgen/gen.py from Function.h
+#include <ATen/Context.h>
+#include <ATen/DeviceGuard.h>
+#include <ATen/TensorUtils.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/Generator.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <ATen/ops/is_leaf_ops.h>
+namespace at {
+}

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/isnan_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & isnan_out(at::Tensor & out, const at::Tensor & self);
+TORCH_API at::Tensor & isnan_outf(const at::Tensor & self, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/leaky_relu_ops.h ADDED Viewed

	@@ -0,0 +1,50 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API leaky_relu_out {
+  using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::leaky_relu")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out);
+};
+struct TORCH_API leaky_relu {
+  using schema = at::Tensor (const at::Tensor &, const at::Scalar &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::leaky_relu")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, const at::Scalar & negative_slope);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & negative_slope);
+};
+struct TORCH_API leaky_relu_ {
+  using schema = at::Tensor & (at::Tensor &, const at::Scalar &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::leaky_relu_")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)")
+  static at::Tensor & call(at::Tensor & self, const at::Scalar & negative_slope);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Scalar & negative_slope);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_solve_ex_native.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API ::std::tuple<at::Tensor,at::Tensor> linalg_solve_ex(const at::Tensor & A, const at::Tensor & B, bool left=true, bool check_errors=false);
+TORCH_API ::std::tuple<at::Tensor &,at::Tensor &> linalg_solve_ex_out(const at::Tensor & A, const at::Tensor & B, bool left, bool check_errors, at::Tensor & result, at::Tensor & info);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/linalg_solve_triangular_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API linalg_solve_triangular_out {
+  using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, bool, bool, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::linalg_solve_triangular")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & self, const at::Tensor & B, bool upper, bool left, bool unitriangular, at::Tensor & out);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & B, bool upper, bool left, bool unitriangular, at::Tensor & out);
+};
+struct TORCH_API linalg_solve_triangular {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool, bool, bool);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::linalg_solve_triangular")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, const at::Tensor & B, bool upper, bool left, bool unitriangular);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & B, bool upper, bool left, bool unitriangular);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/log1p_compositeexplicitautogradnonfunctional_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautogradnonfunctional {
+TORCH_API at::Tensor log1p(const at::Tensor & self);
+TORCH_API at::Tensor & log1p_(at::Tensor & self);
+} // namespace compositeexplicitautogradnonfunctional
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/log_softmax_compositeexplicitautograd_dispatch.h ADDED Viewed

	@@ -0,0 +1,24 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace compositeexplicitautograd {
+TORCH_API at::Tensor & log_softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, c10::optional<at::ScalarType> dtype=c10::nullopt);
+TORCH_API at::Tensor & log_softmax_outf(const at::Tensor & self, int64_t dim, c10::optional<at::ScalarType> dtype, at::Tensor & out);
+} // namespace compositeexplicitautograd
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logdet_native.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+// @generated by torchgen/gen.py from NativeFunction.h
+#include <c10/core/Scalar.h>
+#include <c10/core/Storage.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Optional.h>
+#include <c10/core/QScheme.h>
+#include <ATen/core/Reduction.h>
+#include <ATen/core/Tensor.h>
+#include <tuple>
+#include <vector>
+namespace at {
+namespace native {
+TORCH_API at::Tensor logdet(const at::Tensor & self);
+} // namespace native
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/logit_backward_cuda_dispatch.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+// @generated by torchgen/gen.py from DispatchKeyFunction.h
+// NB: The implementing C++ file is RegisterDispatchKey.cpp
+// The only #includes we need are for custom classes that have defaults in the C++ API
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Reduction.h>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace cuda {
+TORCH_API at::Tensor logit_backward(const at::Tensor & grad_output, const at::Tensor & self, c10::optional<double> eps=c10::nullopt);
+TORCH_API at::Tensor & logit_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, c10::optional<double> eps=c10::nullopt);
+TORCH_API at::Tensor & logit_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, c10::optional<double> eps, at::Tensor & grad_input);
+} // namespace cuda
+} // namespace at

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/matrix_exp_backward_ops.h ADDED Viewed

	@@ -0,0 +1,28 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API matrix_exp_backward {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::matrix_exp_backward")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "matrix_exp_backward(Tensor self, Tensor grad) -> Tensor")
+  static at::Tensor call(const at::Tensor & self, const at::Tensor & grad);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad);
+};
+}} // namespace at::_ops

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/include/ATen/ops/max_pool3d_with_indices_backward_ops.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+// @generated by torchgen/gen.py from Operator.h
+#include <tuple>
+#include <vector>
+// Forward declarations of any types needed in the operator signatures.
+// We can't directly include these classes because it will cause circular include dependencies.
+// This file is included by TensorBody.h, which defines the Tensor class.
+#include <ATen/core/ATen_fwd.h>
+namespace at {
+namespace _ops {
+struct TORCH_API max_pool3d_with_indices_backward_grad_input {
+  using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool, const at::Tensor &, at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool3d_with_indices_backward")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "grad_input")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)")
+  static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices, at::Tensor & grad_input);
+  static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices, at::Tensor & grad_input);
+};
+struct TORCH_API max_pool3d_with_indices_backward {
+  using schema = at::Tensor (const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, bool, const at::Tensor &);
+  using ptr_schema = schema*;
+  // See Note [static constexpr char* members for windows NVCC]
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::max_pool3d_with_indices_backward")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "")
+  STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor")
+  static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices);
+  static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices);
+};
+}} // namespace at::_ops