koichi12 commited on Feb 12, 2025

Commit

b891f5b

verified ·

1 Parent(s): e74124d

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +11 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/INSTALLER +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/RECORD +104 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/WHEEL +5 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/top_level.txt +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/ctx_mp.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__init__.py +77 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libelefun.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libhyper.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libmpc.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libelefun.py +1428 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libhyper.py +1150 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libintmath.py +584 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libmpc.py +835 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h +98 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_activity.h +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_events.h +1371 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h +419 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_result.h +328 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h +447 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_target.h +43 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_version.h +130 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h +2941 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h +2139 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h +38 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer.h +571 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftw.h +454 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/LICENSE +29 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/RECORD +65 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/REQUESTED +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/entry_points.txt +5 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/detail/cpp_conduit.h +77 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/gil.h +219 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/iostream.h +265 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so +0 -0

.gitattributes CHANGED Viewed

@@ -51,3 +51,14 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Plex/Trans
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ExprNodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ExprNodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/gen.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35bbd7708e61d6b2d4704c7139018d3eae67bca303d9fa03228b50845f6fffe6
+size 340320

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e03d22fd7cc8b4e378f65e07858c4720dcc03e0fa3553c776863e4969826cfd4
+size 145746

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c423f97f1ac36f06a8a2c6ff723696608c3e094001049a85ad421706ae558dea
+size 522167

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab3f1d1811e8f1f97f96bc002bc8705a4adb7a26f43def577bf24b25263f4b32
+size 213081

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:465b72a0af764658a62bbb1d50e50b9a762ba16ddb1a6be0dd5b3b1f15c8a205
+size 254554

tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f505b823a26bd0da98ceb5e93ba4f79513f56cebf4f8cb1c8ed579dcdabaac32
+size 129942

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.44 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc ADDED Viewed

Binary file (3.58 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,104 @@

+fsspec-2024.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+fsspec-2024.2.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
+fsspec-2024.2.0.dist-info/METADATA,sha256=uwzW1Braxnd_QGVI8W6J0KHi5KTiTJEm8YzSUdG-_Dc,6786
+fsspec-2024.2.0.dist-info/RECORD,,
+fsspec-2024.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+fsspec-2024.2.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
+fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
+fsspec/__pycache__/__init__.cpython-311.pyc,,
+fsspec/__pycache__/_version.cpython-311.pyc,,
+fsspec/__pycache__/archive.cpython-311.pyc,,
+fsspec/__pycache__/asyn.cpython-311.pyc,,
+fsspec/__pycache__/caching.cpython-311.pyc,,
+fsspec/__pycache__/callbacks.cpython-311.pyc,,
+fsspec/__pycache__/compression.cpython-311.pyc,,
+fsspec/__pycache__/config.cpython-311.pyc,,
+fsspec/__pycache__/conftest.cpython-311.pyc,,
+fsspec/__pycache__/core.cpython-311.pyc,,
+fsspec/__pycache__/dircache.cpython-311.pyc,,
+fsspec/__pycache__/exceptions.cpython-311.pyc,,
+fsspec/__pycache__/fuse.cpython-311.pyc,,
+fsspec/__pycache__/generic.cpython-311.pyc,,
+fsspec/__pycache__/gui.cpython-311.pyc,,
+fsspec/__pycache__/mapping.cpython-311.pyc,,
+fsspec/__pycache__/parquet.cpython-311.pyc,,
+fsspec/__pycache__/registry.cpython-311.pyc,,
+fsspec/__pycache__/spec.cpython-311.pyc,,
+fsspec/__pycache__/transaction.cpython-311.pyc,,
+fsspec/__pycache__/utils.cpython-311.pyc,,
+fsspec/_version.py,sha256=onTKKWe4fXkBjQxbTwM82SUT0H3x4U17IYrciFAryaU,500
+fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
+fsspec/asyn.py,sha256=kJ45sFFya2lZsmu2v8CVc8ZPRs8AccEzAy6Jot2ylkU,36157
+fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
+fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
+fsspec/compression.py,sha256=Yyd8FXw2rwWRtVoRVah_yguv-J7BUcBo4yDu6Qt52a0,4859
+fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
+fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
+fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
+fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
+fsspec/exceptions.py,sha256=xcS7LiRrQ748kvOB9mrUR14kpjNztrHgEkZWi9M-VaI,330
+fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
+fsspec/generic.py,sha256=NuNaP66OaphwMbuLHRFBLda78TD81isa9O4ozJqbUv0,13455
+fsspec/gui.py,sha256=XKoXZpUhRE7jOhRCJH4-jRbKhVu56aS8h9tecvPD3nc,13932
+fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
+fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
+fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
+fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
+fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
+fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
+fsspec/implementations/__pycache__/data.cpython-311.pyc,,
+fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
+fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
+fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
+fsspec/implementations/__pycache__/git.cpython-311.pyc,,
+fsspec/implementations/__pycache__/github.cpython-311.pyc,,
+fsspec/implementations/__pycache__/http.cpython-311.pyc,,
+fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
+fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
+fsspec/implementations/__pycache__/local.cpython-311.pyc,,
+fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
+fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
+fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
+fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
+fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
+fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
+fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
+fsspec/implementations/arrow.py,sha256=_7TLuV6ZzNlpmUU_v6ud56u2wadzsKmY5qugPBxgMEs,8649
+fsspec/implementations/cache_mapper.py,sha256=iHgBA6gjzDJ7_mBboHFzpLTf55HP3UEwUOZ43xyUK4M,2429
+fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
+fsspec/implementations/cached.py,sha256=LbbPbeUup07O0y7gXD_atFgajWM9p1vlDKu_BOyLfbo,30943
+fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
+fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
+fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
+fsspec/implementations/dirfs.py,sha256=inDIRSDPhI1_ud1MMBFrpZQ11VIAMJ_dZQtbE4V08Ng,11384
+fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
+fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
+fsspec/implementations/github.py,sha256=0kIiKkeAaROuHgdWBHVQFrzJ2ZfoDgymCehL_kJXHYA,7565
+fsspec/implementations/http.py,sha256=PkhfgUV3-T7fG2Jf-NLX9doH52snV5Wmw91uVA9k74M,29454
+fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
+fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
+fsspec/implementations/local.py,sha256=nxiRKg9FAQHTQss9-ET8ZzDXPGhSOktgkxrg0ffMs2I,13454
+fsspec/implementations/memory.py,sha256=2iU--pOV2KCTrS-d5K8VKSygh9MPk2D7NZ_C8lMMEIw,9701
+fsspec/implementations/reference.py,sha256=0iGu8mscaQ3a5iTlRNByytQ3_-1Bj8__ARqVwyy4q2M,43871
+fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
+fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
+fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
+fsspec/implementations/webhdfs.py,sha256=wqVfno7z0TY1HepaIvKTUUcl_bi5NkV6qWsST8t_s7Y,16745
+fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
+fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
+fsspec/parquet.py,sha256=qVxDhwc960SGOt5etcYAJxCr-7HQKP01687KpDR02Gw,19463
+fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
+fsspec/spec.py,sha256=3t96RgizRN_slIuHXnuR0bXjVUfBS1TfuDrEua4oQvE,66277
+fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
+fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
+fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
+fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
+fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
+fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
+fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
+fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: bdist_wheel (0.42.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ fsspec

tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d0c8228a395e1b7975c5d22cd5fe655e5a7b7024723a69164e0c9045aee847d
+size 324168

tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6ec603b289fea3017e8bb0c8eb537328f368d775f0aee16f2837595da3258b
+size 110499

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/ctx_mp.cpython-311.pyc ADDED Viewed

Binary file (71.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__init__.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from .libmpf import (prec_to_dps, dps_to_prec, repr_dps,
+  round_down, round_up, round_floor, round_ceiling, round_nearest,
+  to_pickable, from_pickable, ComplexResult,
+  fzero, fnzero, fone, fnone, ftwo, ften, fhalf, fnan, finf, fninf,
+  math_float_inf, round_int, normalize, normalize1,
+  from_man_exp, from_int, to_man_exp, to_int, mpf_ceil, mpf_floor,
+  mpf_nint, mpf_frac,
+  from_float, from_npfloat, from_Decimal, to_float, from_rational, to_rational, to_fixed,
+  mpf_rand, mpf_eq, mpf_hash, mpf_cmp, mpf_lt, mpf_le, mpf_gt, mpf_ge,
+  mpf_pos, mpf_neg, mpf_abs, mpf_sign, mpf_add, mpf_sub, mpf_sum,
+  mpf_mul, mpf_mul_int, mpf_shift, mpf_frexp,
+  mpf_div, mpf_rdiv_int, mpf_mod, mpf_pow_int,
+  mpf_perturb,
+  to_digits_exp, to_str, str_to_man_exp, from_str, from_bstr, to_bstr,
+  mpf_sqrt, mpf_hypot)
+from .libmpc import (mpc_one, mpc_zero, mpc_two, mpc_half,
+  mpc_is_inf, mpc_is_infnan, mpc_to_str, mpc_to_complex, mpc_hash,
+  mpc_conjugate, mpc_is_nonzero, mpc_add, mpc_add_mpf,
+  mpc_sub, mpc_sub_mpf, mpc_pos, mpc_neg, mpc_shift, mpc_abs,
+  mpc_arg, mpc_floor, mpc_ceil,  mpc_nint, mpc_frac, mpc_mul, mpc_square,
+  mpc_mul_mpf, mpc_mul_imag_mpf, mpc_mul_int,
+  mpc_div, mpc_div_mpf, mpc_reciprocal, mpc_mpf_div,
+  complex_int_pow, mpc_pow, mpc_pow_mpf, mpc_pow_int,
+  mpc_sqrt, mpc_nthroot, mpc_cbrt, mpc_exp, mpc_log, mpc_cos, mpc_sin,
+  mpc_tan, mpc_cos_pi, mpc_sin_pi, mpc_cosh, mpc_sinh, mpc_tanh,
+  mpc_atan, mpc_acos, mpc_asin, mpc_asinh, mpc_acosh, mpc_atanh,
+  mpc_fibonacci, mpf_expj, mpf_expjpi, mpc_expj, mpc_expjpi,
+  mpc_cos_sin, mpc_cos_sin_pi)
+from .libelefun import (ln2_fixed, mpf_ln2, ln10_fixed, mpf_ln10,
+  pi_fixed, mpf_pi, e_fixed, mpf_e, phi_fixed, mpf_phi,
+  degree_fixed, mpf_degree,
+  mpf_pow, mpf_nthroot, mpf_cbrt, log_int_fixed, agm_fixed,
+  mpf_log, mpf_log_hypot, mpf_exp, mpf_cos_sin, mpf_cos, mpf_sin, mpf_tan,
+  mpf_cos_sin_pi, mpf_cos_pi, mpf_sin_pi, mpf_cosh_sinh,
+  mpf_cosh, mpf_sinh, mpf_tanh, mpf_atan, mpf_atan2, mpf_asin,
+  mpf_acos, mpf_asinh, mpf_acosh, mpf_atanh, mpf_fibonacci)
+from .libhyper import (NoConvergence, make_hyp_summator,
+  mpf_erf, mpf_erfc, mpf_ei, mpc_ei, mpf_e1, mpc_e1, mpf_expint,
+  mpf_ci_si, mpf_ci, mpf_si, mpc_ci, mpc_si, mpf_besseljn,
+  mpc_besseljn, mpf_agm, mpf_agm1, mpc_agm, mpc_agm1,
+  mpf_ellipk, mpc_ellipk, mpf_ellipe, mpc_ellipe)
+from .gammazeta import (catalan_fixed, mpf_catalan,
+  khinchin_fixed, mpf_khinchin, glaisher_fixed, mpf_glaisher,
+  apery_fixed, mpf_apery, euler_fixed, mpf_euler, mertens_fixed,
+  mpf_mertens, twinprime_fixed, mpf_twinprime,
+  mpf_bernoulli, bernfrac, mpf_gamma_int,
+  mpf_factorial, mpc_factorial, mpf_gamma, mpc_gamma,
+  mpf_loggamma, mpc_loggamma, mpf_rgamma, mpc_rgamma,
+  mpf_harmonic, mpc_harmonic, mpf_psi0, mpc_psi0,
+  mpf_psi, mpc_psi, mpf_zeta_int, mpf_zeta, mpc_zeta,
+  mpf_altzeta, mpc_altzeta, mpf_zetasum, mpc_zetasum)
+from .libmpi import (mpi_str,
+  mpi_from_str, mpi_to_str,
+  mpi_eq, mpi_ne,
+  mpi_lt, mpi_le, mpi_gt, mpi_ge,
+  mpi_add, mpi_sub, mpi_delta, mpi_mid,
+  mpi_pos, mpi_neg, mpi_abs, mpi_mul, mpi_div, mpi_exp,
+  mpi_log, mpi_sqrt, mpi_pow_int, mpi_pow, mpi_cos_sin,
+  mpi_cos, mpi_sin, mpi_tan, mpi_cot,
+  mpi_atan, mpi_atan2,
+  mpci_pos, mpci_neg, mpci_add, mpci_sub, mpci_mul, mpci_div, mpci_pow,
+  mpci_abs, mpci_pow, mpci_exp, mpci_log, mpci_cos, mpci_sin,
+  mpi_gamma, mpci_gamma, mpi_loggamma, mpci_loggamma,
+  mpi_rgamma, mpci_rgamma, mpi_factorial, mpci_factorial)
+from .libintmath import (trailing, bitcount, numeral, bin_to_radix,
+  isqrt, isqrt_small, isqrt_fast, sqrt_fixed, sqrtrem, ifib, ifac,
+  list_primes, isprime, moebius, gcd, eulernum, stirling1, stirling2)
+from .backend import (gmpy, sage, BACKEND, STRICT, MPZ, MPZ_TYPE,
+  MPZ_ZERO, MPZ_ONE, MPZ_TWO, MPZ_THREE, MPZ_FIVE, int_types,
+  HASH_MODULUS, HASH_BITS)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libelefun.cpython-311.pyc ADDED Viewed

Binary file (54.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libhyper.cpython-311.pyc ADDED Viewed

Binary file (52.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libmpc.cpython-311.pyc ADDED Viewed

Binary file (43.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libelefun.py ADDED Viewed

	@@ -0,0 +1,1428 @@

+"""
+This module implements computation of elementary transcendental
+functions (powers, logarithms, trigonometric and hyperbolic
+functions, inverse trigonometric and hyperbolic) for real
+floating-point numbers.
+For complex and interval implementations of the same functions,
+see libmpc and libmpi.
+"""
+import math
+from bisect import bisect
+from .backend import xrange
+from .backend import MPZ, MPZ_ZERO, MPZ_ONE, MPZ_TWO, MPZ_FIVE, BACKEND
+from .libmpf import (
+    round_floor, round_ceiling, round_down, round_up,
+    round_nearest, round_fast,
+    ComplexResult,
+    bitcount, bctable, lshift, rshift, giant_steps, sqrt_fixed,
+    from_int, to_int, from_man_exp, to_fixed, to_float, from_float,
+    from_rational, normalize,
+    fzero, fone, fnone, fhalf, finf, fninf, fnan,
+    mpf_cmp, mpf_sign, mpf_abs,
+    mpf_pos, mpf_neg, mpf_add, mpf_sub, mpf_mul, mpf_div, mpf_shift,
+    mpf_rdiv_int, mpf_pow_int, mpf_sqrt,
+    reciprocal_rnd, negative_rnd, mpf_perturb,
+    isqrt_fast
+)
+from .libintmath import ifib
+#-------------------------------------------------------------------------------
+# Tuning parameters
+#-------------------------------------------------------------------------------
+# Cutoff for computing exp from cosh+sinh. This reduces the
+# number of terms by half, but also requires a square root which
+# is expensive with the pure-Python square root code.
+if BACKEND == 'python':
+    EXP_COSH_CUTOFF = 600
+else:
+    EXP_COSH_CUTOFF = 400
+# Cutoff for using more than 2 series
+EXP_SERIES_U_CUTOFF = 1500
+# Also basically determined by sqrt
+if BACKEND == 'python':
+    COS_SIN_CACHE_PREC = 400
+else:
+    COS_SIN_CACHE_PREC = 200
+COS_SIN_CACHE_STEP = 8
+cos_sin_cache = {}
+# Number of integer logarithms to cache (for zeta sums)
+MAX_LOG_INT_CACHE = 2000
+log_int_cache = {}
+LOG_TAYLOR_PREC = 2500  # Use Taylor series with caching up to this prec
+LOG_TAYLOR_SHIFT = 9    # Cache log values in steps of size 2^-N
+log_taylor_cache = {}
+# prec/size ratio of x for fastest convergence in AGM formula
+LOG_AGM_MAG_PREC_RATIO = 20
+ATAN_TAYLOR_PREC = 3000  # Same as for log
+ATAN_TAYLOR_SHIFT = 7   # steps of size 2^-N
+atan_taylor_cache = {}
+# ~= next power of two + 20
+cache_prec_steps = [22,22]
+for k in xrange(1, bitcount(LOG_TAYLOR_PREC)+1):
+    cache_prec_steps += [min(2**k,LOG_TAYLOR_PREC)+20] * 2**(k-1)
+#----------------------------------------------------------------------------#
+#                                                                            #
+#                   Elementary mathematical constants                        #
+#                                                                            #
+#----------------------------------------------------------------------------#
+def constant_memo(f):
+    """
+    Decorator for caching computed values of mathematical
+    constants. This decorator should be applied to a
+    function taking a single argument prec as input and
+    returning a fixed-point value with the given precision.
+    """
+    f.memo_prec = -1
+    f.memo_val = None
+    def g(prec, **kwargs):
+        memo_prec = f.memo_prec
+        if prec <= memo_prec:
+            return f.memo_val >> (memo_prec-prec)
+        newprec = int(prec*1.05+10)
+        f.memo_val = f(newprec, **kwargs)
+        f.memo_prec = newprec
+        return f.memo_val >> (newprec-prec)
+    g.__name__ = f.__name__
+    g.__doc__ = f.__doc__
+    return g
+def def_mpf_constant(fixed):
+    """
+    Create a function that computes the mpf value for a mathematical
+    constant, given a function that computes the fixed-point value.
+    Assumptions: the constant is positive and has magnitude ~= 1;
+    the fixed-point function rounds to floor.
+    """
+    def f(prec, rnd=round_fast):
+        wp = prec + 20
+        v = fixed(wp)
+        if rnd in (round_up, round_ceiling):
+            v += 1
+        return normalize(0, v, -wp, bitcount(v), prec, rnd)
+    f.__doc__ = fixed.__doc__
+    return f
+def bsp_acot(q, a, b, hyperbolic):
+    if b - a == 1:
+        a1 = MPZ(2*a + 3)
+        if hyperbolic or a&1:
+            return MPZ_ONE, a1 * q**2, a1
+        else:
+            return -MPZ_ONE, a1 * q**2, a1
+    m = (a+b)//2
+    p1, q1, r1 = bsp_acot(q, a, m, hyperbolic)
+    p2, q2, r2 = bsp_acot(q, m, b, hyperbolic)
+    return q2*p1 + r1*p2, q1*q2, r1*r2
+# the acoth(x) series converges like the geometric series for x^2
+# N = ceil(p*log(2)/(2*log(x)))
+def acot_fixed(a, prec, hyperbolic):
+    """
+    Compute acot(a) or acoth(a) for an integer a with binary splitting; see
+    http://numbers.computation.free.fr/Constants/Algorithms/splitting.html
+    """
+    N = int(0.35 * prec/math.log(a) + 20)
+    p, q, r = bsp_acot(a, 0,N, hyperbolic)
+    return ((p+q)<<prec)//(q*a)
+def machin(coefs, prec, hyperbolic=False):
+    """
+    Evaluate a Machin-like formula, i.e., a linear combination of
+    acot(n) or acoth(n) for specific integer values of n, using fixed-
+    point arithmetic. The input should be a list [(c, n), ...], giving
+    c*acot[h](n) + ...
+    """
+    extraprec = 10
+    s = MPZ_ZERO
+    for a, b in coefs:
+        s += MPZ(a) * acot_fixed(MPZ(b), prec+extraprec, hyperbolic)
+    return (s >> extraprec)
+# Logarithms of integers are needed for various computations involving
+# logarithms, powers, radix conversion, etc
+@constant_memo
+def ln2_fixed(prec):
+    """
+    Computes ln(2). This is done with a hyperbolic Machin-type formula,
+    with binary splitting at high precision.
+    """
+    return machin([(18, 26), (-2, 4801), (8, 8749)], prec, True)
+@constant_memo
+def ln10_fixed(prec):
+    """
+    Computes ln(10). This is done with a hyperbolic Machin-type formula.
+    """
+    return machin([(46, 31), (34, 49), (20, 161)], prec, True)
+r"""
+For computation of pi, we use the Chudnovsky series:
+             oo
+             ___        k
+      1     \       (-1)  (6 k)! (A + B k)
+    ----- =  )     -----------------------
+    12 pi   /___               3  3k+3/2
+                    (3 k)! (k!)  C
+            k = 0
+where A, B, and C are certain integer constants. This series adds roughly
+14 digits per term. Note that C^(3/2) can be extracted so that the
+series contains only rational terms. This makes binary splitting very
+efficient.
+The recurrence formulas for the binary splitting were taken from
+ftp://ftp.gmplib.org/pub/src/gmp-chudnovsky.c
+Previously, Machin's formula was used at low precision and the AGM iteration
+was used at high precision. However, the Chudnovsky series is essentially as
+fast as the Machin formula at low precision and in practice about 3x faster
+than the AGM at high precision (despite theoretically having a worse
+asymptotic complexity), so there is no reason not to use it in all cases.
+"""
+# Constants in Chudnovsky's series
+CHUD_A = MPZ(13591409)
+CHUD_B = MPZ(545140134)
+CHUD_C = MPZ(640320)
+CHUD_D = MPZ(12)
+def bs_chudnovsky(a, b, level, verbose):
+    """
+    Computes the sum from a to b of the series in the Chudnovsky
+    formula. Returns g, p, q where p/q is the sum as an exact
+    fraction and g is a temporary value used to save work
+    for recursive calls.
+    """
+    if b-a == 1:
+        g = MPZ((6*b-5)*(2*b-1)*(6*b-1))
+        p = b**3 * CHUD_C**3 // 24
+        q = (-1)**b * g * (CHUD_A+CHUD_B*b)
+    else:
+        if verbose and level < 4:
+            print("  binary splitting", a, b)
+        mid = (a+b)//2
+        g1, p1, q1 = bs_chudnovsky(a, mid, level+1, verbose)
+        g2, p2, q2 = bs_chudnovsky(mid, b, level+1, verbose)
+        p = p1*p2
+        g = g1*g2
+        q = q1*p2 + q2*g1
+    return g, p, q
+@constant_memo
+def pi_fixed(prec, verbose=False, verbose_base=None):
+    """
+    Compute floor(pi * 2**prec) as a big integer.
+    This is done using Chudnovsky's series (see comments in
+    libelefun.py for details).
+    """
+    # The Chudnovsky series gives 14.18 digits per term
+    N = int(prec/3.3219280948/14.181647462 + 2)
+    if verbose:
+        print("binary splitting with N =", N)
+    g, p, q = bs_chudnovsky(0, N, 0, verbose)
+    sqrtC = isqrt_fast(CHUD_C<<(2*prec))
+    v = p*CHUD_C*sqrtC//((q+CHUD_A*p)*CHUD_D)
+    return v
+def degree_fixed(prec):
+    return pi_fixed(prec)//180
+def bspe(a, b):
+    """
+    Sum series for exp(1)-1 between a, b, returning the result
+    as an exact fraction (p, q).
+    """
+    if b-a == 1:
+        return MPZ_ONE, MPZ(b)
+    m = (a+b)//2
+    p1, q1 = bspe(a, m)
+    p2, q2 = bspe(m, b)
+    return p1*q2+p2, q1*q2
+@constant_memo
+def e_fixed(prec):
+    """
+    Computes exp(1). This is done using the ordinary Taylor series for
+    exp, with binary splitting. For a description of the algorithm,
+    see:
+        http://numbers.computation.free.fr/Constants/
+            Algorithms/splitting.html
+    """
+    # Slight overestimate of N needed for 1/N! < 2**(-prec)
+    # This could be tightened for large N.
+    N = int(1.1*prec/math.log(prec) + 20)
+    p, q = bspe(0,N)
+    return ((p+q)<<prec)//q
+@constant_memo
+def phi_fixed(prec):
+    """
+    Computes the golden ratio, (1+sqrt(5))/2
+    """
+    prec += 10
+    a = isqrt_fast(MPZ_FIVE<<(2*prec)) + (MPZ_ONE << prec)
+    return a >> 11
+mpf_phi    = def_mpf_constant(phi_fixed)
+mpf_pi     = def_mpf_constant(pi_fixed)
+mpf_e      = def_mpf_constant(e_fixed)
+mpf_degree = def_mpf_constant(degree_fixed)
+mpf_ln2    = def_mpf_constant(ln2_fixed)
+mpf_ln10   = def_mpf_constant(ln10_fixed)
+@constant_memo
+def ln_sqrt2pi_fixed(prec):
+    wp = prec + 10
+    # ln(sqrt(2*pi)) = ln(2*pi)/2
+    return to_fixed(mpf_log(mpf_shift(mpf_pi(wp), 1), wp), prec-1)
+@constant_memo
+def sqrtpi_fixed(prec):
+    return sqrt_fixed(pi_fixed(prec), prec)
+mpf_sqrtpi   = def_mpf_constant(sqrtpi_fixed)
+mpf_ln_sqrt2pi   = def_mpf_constant(ln_sqrt2pi_fixed)
+#----------------------------------------------------------------------------#
+#                                                                            #
+#                                    Powers                                  #
+#                                                                            #
+#----------------------------------------------------------------------------#
+def mpf_pow(s, t, prec, rnd=round_fast):
+    """
+    Compute s**t. Raises ComplexResult if s is negative and t is
+    fractional.
+    """
+    ssign, sman, sexp, sbc = s
+    tsign, tman, texp, tbc = t
+    if ssign and texp < 0:
+        raise ComplexResult("negative number raised to a fractional power")
+    if texp >= 0:
+        return mpf_pow_int(s, (-1)**tsign * (tman<<texp), prec, rnd)
+    # s**(n/2) = sqrt(s)**n
+    if texp == -1:
+        if tman == 1:
+            if tsign:
+                return mpf_div(fone, mpf_sqrt(s, prec+10,
+                    reciprocal_rnd[rnd]), prec, rnd)
+            return mpf_sqrt(s, prec, rnd)
+        else:
+            if tsign:
+                return mpf_pow_int(mpf_sqrt(s, prec+10,
+                    reciprocal_rnd[rnd]), -tman, prec, rnd)
+            return mpf_pow_int(mpf_sqrt(s, prec+10, rnd), tman, prec, rnd)
+    # General formula: s**t = exp(t*log(s))
+    # TODO: handle rnd direction of the logarithm carefully
+    c = mpf_log(s, prec+10, rnd)
+    return mpf_exp(mpf_mul(t, c), prec, rnd)
+def int_pow_fixed(y, n, prec):
+    """n-th power of a fixed point number with precision prec
+       Returns the power in the form man, exp,
+       man * 2**exp ~= y**n
+    """
+    if n == 2:
+        return (y*y), 0
+    bc = bitcount(y)
+    exp = 0
+    workprec = 2 * (prec + 4*bitcount(n) + 4)
+    _, pm, pe, pbc = fone
+    while 1:
+        if n & 1:
+            pm = pm*y
+            pe = pe+exp
+            pbc += bc - 2
+            pbc = pbc + bctable[int(pm >> pbc)]
+            if pbc > workprec:
+                pm = pm >> (pbc-workprec)
+                pe += pbc - workprec
+                pbc = workprec
+            n -= 1
+            if not n:
+                break
+        y = y*y
+        exp = exp+exp
+        bc = bc + bc - 2
+        bc = bc + bctable[int(y >> bc)]
+        if bc > workprec:
+            y = y >> (bc-workprec)
+            exp += bc - workprec
+            bc = workprec
+        n = n // 2
+    return pm, pe
+# froot(s, n, prec, rnd) computes the real n-th root of a
+# positive mpf tuple s.
+# To compute the root we start from a 50-bit estimate for r
+# generated with ordinary floating-point arithmetic, and then refine
+# the value to full accuracy using the iteration
+#            1  /                     y       \
+#   r     = --- | (n-1)  * r   +  ----------  |
+#    n+1     n  \           n     r_n**(n-1)  /
+# which is simply Newton's method applied to the equation r**n = y.
+# With giant_steps(start, prec+extra) = [p0,...,pm, prec+extra]
+# and y = man * 2**-shift  one has
+# (man * 2**exp)**(1/n) =
+# y**(1/n) * 2**(start-prec/n) * 2**(p0-start) * ... * 2**(prec+extra-pm) *
+# 2**((exp+shift-(n-1)*prec)/n -extra))
+# The last factor is accounted for in the last line of froot.
+def nthroot_fixed(y, n, prec, exp1):
+    start = 50
+    try:
+        y1 = rshift(y, prec - n*start)
+        r = MPZ(int(y1**(1.0/n)))
+    except OverflowError:
+        y1 = from_int(y1, start)
+        fn = from_int(n)
+        fn = mpf_rdiv_int(1, fn, start)
+        r = mpf_pow(y1, fn, start)
+        r = to_int(r)
+    extra = 10
+    extra1 = n
+    prevp = start
+    for p in giant_steps(start, prec+extra):
+        pm, pe = int_pow_fixed(r, n-1, prevp)
+        r2 = rshift(pm, (n-1)*prevp - p - pe - extra1)
+        B = lshift(y, 2*p-prec+extra1)//r2
+        r = (B + (n-1) * lshift(r, p-prevp))//n
+        prevp = p
+    return r
+def mpf_nthroot(s, n, prec, rnd=round_fast):
+    """nth-root of a positive number
+    Use the Newton method when faster, otherwise use x**(1/n)
+    """
+    sign, man, exp, bc = s
+    if sign:
+        raise ComplexResult("nth root of a negative number")
+    if not man:
+        if s == fnan:
+            return fnan
+        if s == fzero:
+            if n > 0:
+                return fzero
+            if n == 0:
+                return fone
+            return finf
+        # Infinity
+        if not n:
+            return fnan
+        if n < 0:
+            return fzero
+        return finf
+    flag_inverse = False
+    if n < 2:
+        if n == 0:
+            return fone
+        if n == 1:
+            return mpf_pos(s, prec, rnd)
+        if n == -1:
+            return mpf_div(fone, s, prec, rnd)
+        # n < 0
+        rnd = reciprocal_rnd[rnd]
+        flag_inverse = True
+        extra_inverse = 5
+        prec += extra_inverse
+        n = -n
+    if n > 20 and (n >= 20000 or prec < int(233 + 28.3 * n**0.62)):
+        prec2 = prec + 10
+        fn = from_int(n)
+        nth = mpf_rdiv_int(1, fn, prec2)
+        r = mpf_pow(s, nth, prec2, rnd)
+        s = normalize(r[0], r[1], r[2], r[3], prec, rnd)
+        if flag_inverse:
+            return mpf_div(fone, s, prec-extra_inverse, rnd)
+        else:
+            return s
+    # Convert to a fixed-point number with prec2 bits.
+    prec2 = prec + 2*n - (prec%n)
+    # a few tests indicate that
+    # for 10 < n < 10**4 a bit more precision is needed
+    if n > 10:
+        prec2 += prec2//10
+        prec2 = prec2 - prec2%n
+    # Mantissa may have more bits than we need. Trim it down.
+    shift = bc - prec2
+    # Adjust exponents to make prec2 and exp+shift multiples of n.
+    sign1 = 0
+    es = exp+shift
+    if es < 0:
+        sign1 = 1
+        es = -es
+    if sign1:
+        shift += es%n
+    else:
+        shift -= es%n
+    man = rshift(man, shift)
+    extra = 10
+    exp1 = ((exp+shift-(n-1)*prec2)//n) - extra
+    rnd_shift = 0
+    if flag_inverse:
+        if rnd == 'u' or rnd == 'c':
+            rnd_shift = 1
+    else:
+        if rnd == 'd' or rnd == 'f':
+            rnd_shift = 1
+    man = nthroot_fixed(man+rnd_shift, n, prec2, exp1)
+    s = from_man_exp(man, exp1, prec, rnd)
+    if flag_inverse:
+        return mpf_div(fone, s, prec-extra_inverse, rnd)
+    else:
+        return s
+def mpf_cbrt(s, prec, rnd=round_fast):
+    """cubic root of a positive number"""
+    return mpf_nthroot(s, 3, prec, rnd)
+#----------------------------------------------------------------------------#
+#                                                                            #
+#                                Logarithms                                  #
+#                                                                            #
+#----------------------------------------------------------------------------#
+def log_int_fixed(n, prec, ln2=None):
+    """
+    Fast computation of log(n), caching the value for small n,
+    intended for zeta sums.
+    """
+    if n in log_int_cache:
+        value, vprec = log_int_cache[n]
+        if vprec >= prec:
+            return value >> (vprec - prec)
+    wp = prec + 10
+    if wp <= LOG_TAYLOR_SHIFT:
+        if ln2 is None:
+            ln2 = ln2_fixed(wp)
+        r = bitcount(n)
+        x = n << (wp-r)
+        v = log_taylor_cached(x, wp) + r*ln2
+    else:
+        v = to_fixed(mpf_log(from_int(n), wp+5), wp)
+    if n < MAX_LOG_INT_CACHE:
+        log_int_cache[n] = (v, wp)
+    return v >> (wp-prec)
+def agm_fixed(a, b, prec):
+    """
+    Fixed-point computation of agm(a,b), assuming
+    a, b both close to unit magnitude.
+    """
+    i = 0
+    while 1:
+        anew = (a+b)>>1
+        if i > 4 and abs(a-anew) < 8:
+            return a
+        b = isqrt_fast(a*b)
+        a = anew
+        i += 1
+    return a
+def log_agm(x, prec):
+    """
+    Fixed-point computation of -log(x) = log(1/x), suitable
+    for large precision. It is required that 0 < x < 1. The
+    algorithm used is the Sasaki-Kanada formula
+        -log(x) = pi/agm(theta2(x)^2,theta3(x)^2). [1]
+    For faster convergence in the theta functions, x should
+    be chosen closer to 0.
+    Guard bits must be added by the caller.
+    HYPOTHESIS: if x = 2^(-n), n bits need to be added to
+    account for the truncation to a fixed-point number,
+    and this is the only significant cancellation error.
+    The number of bits lost to roundoff is small and can be
+    considered constant.
+    [1] Richard P. Brent, "Fast Algorithms for High-Precision
+        Computation of Elementary Functions (extended abstract)",
+        http://wwwmaths.anu.edu.au/~brent/pd/RNC7-Brent.pdf
+    """
+    x2 = (x*x) >> prec
+    # Compute jtheta2(x)**2
+    s = a = b = x2
+    while a:
+        b = (b*x2) >> prec
+        a = (a*b) >> prec
+        s += a
+    s += (MPZ_ONE<<prec)
+    s = (s*s)>>(prec-2)
+    s = (s*isqrt_fast(x<<prec))>>prec
+    # Compute jtheta3(x)**2
+    t = a = b = x
+    while a:
+        b = (b*x2) >> prec
+        a = (a*b) >> prec
+        t += a
+    t = (MPZ_ONE<<prec) + (t<<1)
+    t = (t*t)>>prec
+    # Final formula
+    p = agm_fixed(s, t, prec)
+    return (pi_fixed(prec) << prec) // p
+def log_taylor(x, prec, r=0):
+    """
+    Fixed-point calculation of log(x). It is assumed that x is close
+    enough to 1 for the Taylor series to converge quickly. Convergence
+    can be improved by specifying r > 0 to compute
+    log(x^(1/2^r))*2^r, at the cost of performing r square roots.
+    The caller must provide sufficient guard bits.
+    """
+    for i in xrange(r):
+        x = isqrt_fast(x<<prec)
+    one = MPZ_ONE << prec
+    v = ((x-one)<<prec)//(x+one)
+    sign = v < 0
+    if sign:
+        v = -v
+    v2 = (v*v) >> prec
+    v4 = (v2*v2) >> prec
+    s0 = v
+    s1 = v//3
+    v = (v*v4) >> prec
+    k = 5
+    while v:
+        s0 += v // k
+        k += 2
+        s1 += v // k
+        v = (v*v4) >> prec
+        k += 2
+    s1 = (s1*v2) >> prec
+    s = (s0+s1) << (1+r)
+    if sign:
+        return -s
+    return s
+def log_taylor_cached(x, prec):
+    """
+    Fixed-point computation of log(x), assuming x in (0.5, 2)
+    and prec <= LOG_TAYLOR_PREC.
+    """
+    n = x >> (prec-LOG_TAYLOR_SHIFT)
+    cached_prec = cache_prec_steps[prec]
+    dprec = cached_prec - prec
+    if (n, cached_prec) in log_taylor_cache:
+        a, log_a = log_taylor_cache[n, cached_prec]
+    else:
+        a = n << (cached_prec - LOG_TAYLOR_SHIFT)
+        log_a = log_taylor(a, cached_prec, 8)
+        log_taylor_cache[n, cached_prec] = (a, log_a)
+    a >>= dprec
+    log_a >>= dprec
+    u = ((x - a) << prec) // a
+    v = (u << prec) // ((MPZ_TWO << prec) + u)
+    v2 = (v*v) >> prec
+    v4 = (v2*v2) >> prec
+    s0 = v
+    s1 = v//3
+    v = (v*v4) >> prec
+    k = 5
+    while v:
+        s0 += v//k
+        k += 2
+        s1 += v//k
+        v = (v*v4) >> prec
+        k += 2
+    s1 = (s1*v2) >> prec
+    s = (s0+s1) << 1
+    return log_a + s
+def mpf_log(x, prec, rnd=round_fast):
+    """
+    Compute the natural logarithm of the mpf value x. If x is negative,
+    ComplexResult is raised.
+    """
+    sign, man, exp, bc = x
+    #------------------------------------------------------------------
+    # Handle special values
+    if not man:
+        if x == fzero: return fninf
+        if x == finf: return finf
+        if x == fnan: return fnan
+    if sign:
+        raise ComplexResult("logarithm of a negative number")
+    wp = prec + 20
+    #------------------------------------------------------------------
+    # Handle log(2^n) = log(n)*2.
+    # Here we catch the only possible exact value, log(1) = 0
+    if man == 1:
+        if not exp:
+            return fzero
+        return from_man_exp(exp*ln2_fixed(wp), -wp, prec, rnd)
+    mag = exp+bc
+    abs_mag = abs(mag)
+    #------------------------------------------------------------------
+    # Handle x = 1+eps, where log(x) ~ x. We need to check for
+    # cancellation when moving to fixed-point math and compensate
+    # by increasing the precision. Note that abs_mag in (0, 1) <=>
+    # 0.5 < x < 2 and x != 1
+    if abs_mag <= 1:
+        # Calculate t = x-1 to measure distance from 1 in bits
+        tsign = 1-abs_mag
+        if tsign:
+            tman = (MPZ_ONE<<bc) - man
+        else:
+            tman = man - (MPZ_ONE<<(bc-1))
+        tbc = bitcount(tman)
+        cancellation = bc - tbc
+        if cancellation > wp:
+            t = normalize(tsign, tman, abs_mag-bc, tbc, tbc, 'n')
+            return mpf_perturb(t, tsign, prec, rnd)
+        else:
+            wp += cancellation
+        # TODO: if close enough to 1, we could use Taylor series
+        # even in the AGM precision range, since the Taylor series
+        # converges rapidly
+    #------------------------------------------------------------------
+    # Another special case:
+    # n*log(2) is a good enough approximation
+    if abs_mag > 10000:
+        if bitcount(abs_mag) > wp:
+            return from_man_exp(exp*ln2_fixed(wp), -wp, prec, rnd)
+    #------------------------------------------------------------------
+    # General case.
+    # Perform argument reduction using log(x) = log(x*2^n) - n*log(2):
+    # If we are in the Taylor precision range, choose magnitude 0 or 1.
+    # If we are in the AGM precision range, choose magnitude -m for
+    # some large m; benchmarking on one machine showed m = prec/20 to be
+    # optimal between 1000 and 100,000 digits.
+    if wp <= LOG_TAYLOR_PREC:
+        m = log_taylor_cached(lshift(man, wp-bc), wp)
+        if mag:
+            m += mag*ln2_fixed(wp)
+    else:
+        optimal_mag = -wp//LOG_AGM_MAG_PREC_RATIO
+        n = optimal_mag - mag
+        x = mpf_shift(x, n)
+        wp += (-optimal_mag)
+        m = -log_agm(to_fixed(x, wp), wp)
+        m -= n*ln2_fixed(wp)
+    return from_man_exp(m, -wp, prec, rnd)
+def mpf_log_hypot(a, b, prec, rnd):
+    """
+    Computes log(sqrt(a^2+b^2)) accurately.
+    """
+    # If either a or b is inf/nan/0, assume it to be a
+    if not b[1]:
+        a, b = b, a
+    # a is inf/nan/0
+    if not a[1]:
+        # both are inf/nan/0
+        if not b[1]:
+            if a == b == fzero:
+                return fninf
+            if fnan in (a, b):
+                return fnan
+            # at least one term is (+/- inf)^2
+            return finf
+        # only a is inf/nan/0
+        if a == fzero:
+            # log(sqrt(0+b^2)) = log(|b|)
+            return mpf_log(mpf_abs(b), prec, rnd)
+        if a == fnan:
+            return fnan
+        return finf
+    # Exact
+    a2 = mpf_mul(a,a)
+    b2 = mpf_mul(b,b)
+    extra = 20
+    # Not exact
+    h2 = mpf_add(a2, b2, prec+extra)
+    cancelled = mpf_add(h2, fnone, 10)
+    mag_cancelled = cancelled[2]+cancelled[3]
+    # Just redo the sum exactly if necessary (could be smarter
+    # and avoid memory allocation when a or b is precisely 1
+    # and the other is tiny...)
+    if cancelled == fzero or mag_cancelled < -extra//2:
+        h2 = mpf_add(a2, b2, prec+extra-min(a2[2],b2[2]))
+    return mpf_shift(mpf_log(h2, prec, rnd), -1)
+#----------------------------------------------------------------------
+# Inverse tangent
+#
+def atan_newton(x, prec):
+    if prec >= 100:
+        r = math.atan(int((x>>(prec-53)))/2.0**53)
+    else:
+        r = math.atan(int(x)/2.0**prec)
+    prevp = 50
+    r = MPZ(int(r * 2.0**53) >> (53-prevp))
+    extra_p = 50
+    for wp in giant_steps(prevp, prec):
+        wp += extra_p
+        r = r << (wp-prevp)
+        cos, sin = cos_sin_fixed(r, wp)
+        tan = (sin << wp) // cos
+        a = ((tan-rshift(x, prec-wp)) << wp) // ((MPZ_ONE<<wp) + ((tan**2)>>wp))
+        r = r - a
+        prevp = wp
+    return rshift(r, prevp-prec)
+def atan_taylor_get_cached(n, prec):
+    # Taylor series with caching wins up to huge precisions
+    # To avoid unnecessary precomputation at low precision, we
+    # do it in steps
+    # Round to next power of 2
+    prec2 = (1<<(bitcount(prec-1))) + 20
+    dprec = prec2 - prec
+    if (n, prec2) in atan_taylor_cache:
+        a, atan_a = atan_taylor_cache[n, prec2]
+    else:
+        a = n << (prec2 - ATAN_TAYLOR_SHIFT)
+        atan_a = atan_newton(a, prec2)
+        atan_taylor_cache[n, prec2] = (a, atan_a)
+    return (a >> dprec), (atan_a >> dprec)
+def atan_taylor(x, prec):
+    n = (x >> (prec-ATAN_TAYLOR_SHIFT))
+    a, atan_a = atan_taylor_get_cached(n, prec)
+    d = x - a
+    s0 = v = (d << prec) // ((a**2 >> prec) + (a*d >> prec) + (MPZ_ONE << prec))
+    v2 = (v**2 >> prec)
+    v4 = (v2 * v2) >> prec
+    s1 = v//3
+    v = (v * v4) >> prec
+    k = 5
+    while v:
+        s0 += v // k
+        k += 2
+        s1 += v // k
+        v = (v * v4) >> prec
+        k += 2
+    s1 = (s1 * v2) >> prec
+    s = s0 - s1
+    return atan_a + s
+def atan_inf(sign, prec, rnd):
+    if not sign:
+        return mpf_shift(mpf_pi(prec, rnd), -1)
+    return mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
+def mpf_atan(x, prec, rnd=round_fast):
+    sign, man, exp, bc = x
+    if not man:
+        if x == fzero: return fzero
+        if x == finf: return atan_inf(0, prec, rnd)
+        if x == fninf: return atan_inf(1, prec, rnd)
+        return fnan
+    mag = exp + bc
+    # Essentially infinity
+    if mag > prec+20:
+        return atan_inf(sign, prec, rnd)
+    # Essentially ~ x
+    if -mag > prec+20:
+        return mpf_perturb(x, 1-sign, prec, rnd)
+    wp = prec + 30 + abs(mag)
+    # For large x, use atan(x) = pi/2 - atan(1/x)
+    if mag >= 2:
+        x = mpf_rdiv_int(1, x, wp)
+        reciprocal = True
+    else:
+        reciprocal = False
+    t = to_fixed(x, wp)
+    if sign:
+        t = -t
+    if wp < ATAN_TAYLOR_PREC:
+        a = atan_taylor(t, wp)
+    else:
+        a = atan_newton(t, wp)
+    if reciprocal:
+        a = ((pi_fixed(wp)>>1)+1) - a
+    if sign:
+        a = -a
+    return from_man_exp(a, -wp, prec, rnd)
+# TODO: cleanup the special cases
+def mpf_atan2(y, x, prec, rnd=round_fast):
+    xsign, xman, xexp, xbc = x
+    ysign, yman, yexp, ybc = y
+    if not yman:
+        if y == fzero and x != fnan:
+            if mpf_sign(x) >= 0:
+                return fzero
+            return mpf_pi(prec, rnd)
+        if y in (finf, fninf):
+            if x in (finf, fninf):
+                return fnan
+            # pi/2
+            if y == finf:
+                return mpf_shift(mpf_pi(prec, rnd), -1)
+            # -pi/2
+            return mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
+        return fnan
+    if ysign:
+        return mpf_neg(mpf_atan2(mpf_neg(y), x, prec, negative_rnd[rnd]))
+    if not xman:
+        if x == fnan:
+            return fnan
+        if x == finf:
+            return fzero
+        if x == fninf:
+            return mpf_pi(prec, rnd)
+        if y == fzero:
+            return fzero
+        return mpf_shift(mpf_pi(prec, rnd), -1)
+    tquo = mpf_atan(mpf_div(y, x, prec+4), prec+4)
+    if xsign:
+        return mpf_add(mpf_pi(prec+4), tquo, prec, rnd)
+    else:
+        return mpf_pos(tquo, prec, rnd)
+def mpf_asin(x, prec, rnd=round_fast):
+    sign, man, exp, bc = x
+    if bc+exp > 0 and x not in (fone, fnone):
+        raise ComplexResult("asin(x) is real only for -1 <= x <= 1")
+    # asin(x) = 2*atan(x/(1+sqrt(1-x**2)))
+    wp = prec + 15
+    a = mpf_mul(x, x)
+    b = mpf_add(fone, mpf_sqrt(mpf_sub(fone, a, wp), wp), wp)
+    c = mpf_div(x, b, wp)
+    return mpf_shift(mpf_atan(c, prec, rnd), 1)
+def mpf_acos(x, prec, rnd=round_fast):
+    # acos(x) = 2*atan(sqrt(1-x**2)/(1+x))
+    sign, man, exp, bc = x
+    if bc + exp > 0:
+        if x not in (fone, fnone):
+            raise ComplexResult("acos(x) is real only for -1 <= x <= 1")
+        if x == fnone:
+            return mpf_pi(prec, rnd)
+    wp = prec + 15
+    a = mpf_mul(x, x)
+    b = mpf_sqrt(mpf_sub(fone, a, wp), wp)
+    c = mpf_div(b, mpf_add(fone, x, wp), wp)
+    return mpf_shift(mpf_atan(c, prec, rnd), 1)
+def mpf_asinh(x, prec, rnd=round_fast):
+    wp = prec + 20
+    sign, man, exp, bc = x
+    mag = exp+bc
+    if mag < -8:
+        if mag < -wp:
+            return mpf_perturb(x, 1-sign, prec, rnd)
+        wp += (-mag)
+    # asinh(x) = log(x+sqrt(x**2+1))
+    # use reflection symmetry to avoid cancellation
+    q = mpf_sqrt(mpf_add(mpf_mul(x, x), fone, wp), wp)
+    q = mpf_add(mpf_abs(x), q, wp)
+    if sign:
+        return mpf_neg(mpf_log(q, prec, negative_rnd[rnd]))
+    else:
+        return mpf_log(q, prec, rnd)
+def mpf_acosh(x, prec, rnd=round_fast):
+    # acosh(x) = log(x+sqrt(x**2-1))
+    wp = prec + 15
+    if mpf_cmp(x, fone) == -1:
+        raise ComplexResult("acosh(x) is real only for x >= 1")
+    q = mpf_sqrt(mpf_add(mpf_mul(x,x), fnone, wp), wp)
+    return mpf_log(mpf_add(x, q, wp), prec, rnd)
+def mpf_atanh(x, prec, rnd=round_fast):
+    # atanh(x) = log((1+x)/(1-x))/2
+    sign, man, exp, bc = x
+    if (not man) and exp:
+        if x in (fzero, fnan):
+            return x
+        raise ComplexResult("atanh(x) is real only for -1 <= x <= 1")
+    mag = bc + exp
+    if mag > 0:
+        if mag == 1 and man == 1:
+            return [finf, fninf][sign]
+        raise ComplexResult("atanh(x) is real only for -1 <= x <= 1")
+    wp = prec + 15
+    if mag < -8:
+        if mag < -wp:
+            return mpf_perturb(x, sign, prec, rnd)
+        wp += (-mag)
+    a = mpf_add(x, fone, wp)
+    b = mpf_sub(fone, x, wp)
+    return mpf_shift(mpf_log(mpf_div(a, b, wp), prec, rnd), -1)
+def mpf_fibonacci(x, prec, rnd=round_fast):
+    sign, man, exp, bc = x
+    if not man:
+        if x == fninf:
+            return fnan
+        return x
+    # F(2^n) ~= 2^(2^n)
+    size = abs(exp+bc)
+    if exp >= 0:
+        # Exact
+        if size < 10 or size <= bitcount(prec):
+            return from_int(ifib(to_int(x)), prec, rnd)
+    # Use the modified Binet formula
+    wp = prec + size + 20
+    a = mpf_phi(wp)
+    b = mpf_add(mpf_shift(a, 1), fnone, wp)
+    u = mpf_pow(a, x, wp)
+    v = mpf_cos_pi(x, wp)
+    v = mpf_div(v, u, wp)
+    u = mpf_sub(u, v, wp)
+    u = mpf_div(u, b, prec, rnd)
+    return u
+#-------------------------------------------------------------------------------
+# Exponential-type functions
+#-------------------------------------------------------------------------------
+def exponential_series(x, prec, type=0):
+    """
+    Taylor series for cosh/sinh or cos/sin.
+    type = 0 -- returns exp(x)  (slightly faster than cosh+sinh)
+    type = 1 -- returns (cosh(x), sinh(x))
+    type = 2 -- returns (cos(x), sin(x))
+    """
+    if x < 0:
+        x = -x
+        sign = 1
+    else:
+        sign = 0
+    r = int(0.5*prec**0.5)
+    xmag = bitcount(x) - prec
+    r = max(0, xmag + r)
+    extra = 10 + 2*max(r,-xmag)
+    wp = prec + extra
+    x <<= (extra - r)
+    one = MPZ_ONE << wp
+    alt = (type == 2)
+    if prec < EXP_SERIES_U_CUTOFF:
+        x2 = a = (x*x) >> wp
+        x4 = (x2*x2) >> wp
+        s0 = s1 = MPZ_ZERO
+        k = 2
+        while a:
+            a //= (k-1)*k; s0 += a; k += 2
+            a //= (k-1)*k; s1 += a; k += 2
+            a = (a*x4) >> wp
+        s1 = (x2*s1) >> wp
+        if alt:
+            c = s1 - s0 + one
+        else:
+            c = s1 + s0 + one
+    else:
+        u = int(0.3*prec**0.35)
+        x2 = a = (x*x) >> wp
+        xpowers = [one, x2]
+        for i in xrange(1, u):
+            xpowers.append((xpowers[-1]*x2)>>wp)
+        sums = [MPZ_ZERO] * u
+        k = 2
+        while a:
+            for i in xrange(u):
+                a //= (k-1)*k
+                if alt and k & 2: sums[i] -= a
+                else:             sums[i] += a
+                k += 2
+            a = (a*xpowers[-1]) >> wp
+        for i in xrange(1, u):
+            sums[i] = (sums[i]*xpowers[i]) >> wp
+        c = sum(sums) + one
+    if type == 0:
+        s = isqrt_fast(c*c - (one<<wp))
+        if sign:
+            v = c - s
+        else:
+            v = c + s
+        for i in xrange(r):
+            v = (v*v) >> wp
+        return v >> extra
+    else:
+        # Repeatedly apply the double-angle formula
+        # cosh(2*x) = 2*cosh(x)^2 - 1
+        # cos(2*x) = 2*cos(x)^2 - 1
+        pshift = wp-1
+        for i in xrange(r):
+            c = ((c*c) >> pshift) - one
+        # With the abs, this is the same for sinh and sin
+        s = isqrt_fast(abs((one<<wp) - c*c))
+        if sign:
+            s = -s
+        return (c>>extra), (s>>extra)
+def exp_basecase(x, prec):
+    """
+    Compute exp(x) as a fixed-point number. Works for any x,
+    but for speed should have |x| < 1. For an arbitrary number,
+    use exp(x) = exp(x-m*log(2)) * 2^m where m = floor(x/log(2)).
+    """
+    if prec > EXP_COSH_CUTOFF:
+        return exponential_series(x, prec, 0)
+    r = int(prec**0.5)
+    prec += r
+    s0 = s1 = (MPZ_ONE << prec)
+    k = 2
+    a = x2 = (x*x) >> prec
+    while a:
+        a //= k; s0 += a; k += 1
+        a //= k; s1 += a; k += 1
+        a = (a*x2) >> prec
+    s1 = (s1*x) >> prec
+    s = s0 + s1
+    u = r
+    while r:
+        s = (s*s) >> prec
+        r -= 1
+    return s >> u
+def exp_expneg_basecase(x, prec):
+    """
+    Computation of exp(x), exp(-x)
+    """
+    if prec > EXP_COSH_CUTOFF:
+        cosh, sinh = exponential_series(x, prec, 1)
+        return cosh+sinh, cosh-sinh
+    a = exp_basecase(x, prec)
+    b = (MPZ_ONE << (prec+prec)) // a
+    return a, b
+def cos_sin_basecase(x, prec):
+    """
+    Compute cos(x), sin(x) as fixed-point numbers, assuming x
+    in [0, pi/2). For an arbitrary number, use x' = x - m*(pi/2)
+    where m = floor(x/(pi/2)) along with quarter-period symmetries.
+    """
+    if prec > COS_SIN_CACHE_PREC:
+        return exponential_series(x, prec, 2)
+    precs = prec - COS_SIN_CACHE_STEP
+    t = x >> precs
+    n = int(t)
+    if n not in cos_sin_cache:
+        w = t<<(10+COS_SIN_CACHE_PREC-COS_SIN_CACHE_STEP)
+        cos_t, sin_t = exponential_series(w, 10+COS_SIN_CACHE_PREC, 2)
+        cos_sin_cache[n] = (cos_t>>10), (sin_t>>10)
+    cos_t, sin_t = cos_sin_cache[n]
+    offset = COS_SIN_CACHE_PREC - prec
+    cos_t >>= offset
+    sin_t >>= offset
+    x -= t << precs
+    cos = MPZ_ONE << prec
+    sin = x
+    k = 2
+    a = -((x*x) >> prec)
+    while a:
+        a //= k; cos += a; k += 1; a = (a*x) >> prec
+        a //= k; sin += a; k += 1; a = -((a*x) >> prec)
+    return ((cos*cos_t-sin*sin_t) >> prec), ((sin*cos_t+cos*sin_t) >> prec)
+def mpf_exp(x, prec, rnd=round_fast):
+    sign, man, exp, bc = x
+    if man:
+        mag = bc + exp
+        wp = prec + 14
+        if sign:
+            man = -man
+        # TODO: the best cutoff depends on both x and the precision.
+        if prec > 600 and exp >= 0:
+            # Need about log2(exp(n)) ~= 1.45*mag extra precision
+            e = mpf_e(wp+int(1.45*mag))
+            return mpf_pow_int(e, man<<exp, prec, rnd)
+        if mag < -wp:
+            return mpf_perturb(fone, sign, prec, rnd)
+        # |x| >= 2
+        if mag > 1:
+            # For large arguments: exp(2^mag*(1+eps)) =
+            # exp(2^mag)*exp(2^mag*eps) = exp(2^mag)*(1 + 2^mag*eps + ...)
+            # so about mag extra bits is required.
+            wpmod = wp + mag
+            offset = exp + wpmod
+            if offset >= 0:
+                t = man << offset
+            else:
+                t = man >> (-offset)
+            lg2 = ln2_fixed(wpmod)
+            n, t = divmod(t, lg2)
+            n = int(n)
+            t >>= mag
+        else:
+            offset = exp + wp
+            if offset >= 0:
+                t = man << offset
+            else:
+                t = man >> (-offset)
+            n = 0
+        man = exp_basecase(t, wp)
+        return from_man_exp(man, n-wp, prec, rnd)
+    if not exp:
+        return fone
+    if x == fninf:
+        return fzero
+    return x
+def mpf_cosh_sinh(x, prec, rnd=round_fast, tanh=0):
+    """Simultaneously compute (cosh(x), sinh(x)) for real x"""
+    sign, man, exp, bc = x
+    if (not man) and exp:
+        if tanh:
+            if x == finf: return fone
+            if x == fninf: return fnone
+            return fnan
+        if x == finf: return (finf, finf)
+        if x == fninf: return (finf, fninf)
+        return fnan, fnan
+    mag = exp+bc
+    wp = prec+14
+    if mag < -4:
+        # Extremely close to 0, sinh(x) ~= x and cosh(x) ~= 1
+        if mag < -wp:
+            if tanh:
+                return mpf_perturb(x, 1-sign, prec, rnd)
+            cosh = mpf_perturb(fone, 0, prec, rnd)
+            sinh = mpf_perturb(x, sign, prec, rnd)
+            return cosh, sinh
+        # Fix for cancellation when computing sinh
+        wp += (-mag)
+    # Does exp(-2*x) vanish?
+    if mag > 10:
+        if 3*(1<<(mag-1)) > wp:
+            # XXX: rounding
+            if tanh:
+                return mpf_perturb([fone,fnone][sign], 1-sign, prec, rnd)
+            c = s = mpf_shift(mpf_exp(mpf_abs(x), prec, rnd), -1)
+            if sign:
+                s = mpf_neg(s)
+            return c, s
+    # |x| > 1
+    if mag > 1:
+        wpmod = wp + mag
+        offset = exp + wpmod
+        if offset >= 0:
+            t = man << offset
+        else:
+            t = man >> (-offset)
+        lg2 = ln2_fixed(wpmod)
+        n, t = divmod(t, lg2)
+        n = int(n)
+        t >>= mag
+    else:
+        offset = exp + wp
+        if offset >= 0:
+            t = man << offset
+        else:
+            t = man >> (-offset)
+        n = 0
+    a, b = exp_expneg_basecase(t, wp)
+    # TODO: optimize division precision
+    cosh = a + (b>>(2*n))
+    sinh = a - (b>>(2*n))
+    if sign:
+        sinh = -sinh
+    if tanh:
+        man = (sinh << wp) // cosh
+        return from_man_exp(man, -wp, prec, rnd)
+    else:
+        cosh = from_man_exp(cosh, n-wp-1, prec, rnd)
+        sinh = from_man_exp(sinh, n-wp-1, prec, rnd)
+        return cosh, sinh
+def mod_pi2(man, exp, mag, wp):
+    # Reduce to standard interval
+    if mag > 0:
+        i = 0
+        while 1:
+            cancellation_prec = 20 << i
+            wpmod = wp + mag + cancellation_prec
+            pi2 = pi_fixed(wpmod-1)
+            pi4 = pi2 >> 1
+            offset = wpmod + exp
+            if offset >= 0:
+                t = man << offset
+            else:
+                t = man >> (-offset)
+            n, y = divmod(t, pi2)
+            if y > pi4:
+                small = pi2 - y
+            else:
+                small = y
+            if small >> (wp+mag-10):
+                n = int(n)
+                t = y >> mag
+                wp = wpmod - mag
+                break
+            i += 1
+    else:
+        wp += (-mag)
+        offset = exp + wp
+        if offset >= 0:
+            t = man << offset
+        else:
+            t = man >> (-offset)
+        n = 0
+    return t, n, wp
+def mpf_cos_sin(x, prec, rnd=round_fast, which=0, pi=False):
+    """
+    which:
+    0 -- return cos(x), sin(x)
+    1 -- return cos(x)
+    2 -- return sin(x)
+    3 -- return tan(x)
+    if pi=True, compute for pi*x
+    """
+    sign, man, exp, bc = x
+    if not man:
+        if exp:
+            c, s = fnan, fnan
+        else:
+            c, s = fone, fzero
+        if which == 0: return c, s
+        if which == 1: return c
+        if which == 2: return s
+        if which == 3: return s
+    mag = bc + exp
+    wp = prec + 10
+    # Extremely small?
+    if mag < 0:
+        if mag < -wp:
+            if pi:
+                x = mpf_mul(x, mpf_pi(wp))
+            c = mpf_perturb(fone, 1, prec, rnd)
+            s = mpf_perturb(x, 1-sign, prec, rnd)
+            if which == 0: return c, s
+            if which == 1: return c
+            if which == 2: return s
+            if which == 3: return mpf_perturb(x, sign, prec, rnd)
+    if pi:
+        if exp >= -1:
+            if exp == -1:
+                c = fzero
+                s = (fone, fnone)[bool(man & 2) ^ sign]
+            elif exp == 0:
+                c, s = (fnone, fzero)
+            else:
+                c, s = (fone, fzero)
+            if which == 0: return c, s
+            if which == 1: return c
+            if which == 2: return s
+            if which == 3: return mpf_div(s, c, prec, rnd)
+        # Subtract nearest half-integer (= mod by pi/2)
+        n = ((man >> (-exp-2)) + 1) >> 1
+        man = man - (n << (-exp-1))
+        mag2 = bitcount(man) + exp
+        wp = prec + 10 - mag2
+        offset = exp + wp
+        if offset >= 0:
+            t = man << offset
+        else:
+            t = man >> (-offset)
+        t = (t*pi_fixed(wp)) >> wp
+    else:
+        t, n, wp = mod_pi2(man, exp, mag, wp)
+    c, s = cos_sin_basecase(t, wp)
+    m = n & 3
+    if   m == 1: c, s = -s, c
+    elif m == 2: c, s = -c, -s
+    elif m == 3: c, s = s, -c
+    if sign:
+        s = -s
+    if which == 0:
+        c = from_man_exp(c, -wp, prec, rnd)
+        s = from_man_exp(s, -wp, prec, rnd)
+        return c, s
+    if which == 1:
+        return from_man_exp(c, -wp, prec, rnd)
+    if which == 2:
+        return from_man_exp(s, -wp, prec, rnd)
+    if which == 3:
+        return from_rational(s, c, prec, rnd)
+def mpf_cos(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 1)
+def mpf_sin(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 2)
+def mpf_tan(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 3)
+def mpf_cos_sin_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 0, 1)
+def mpf_cos_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 1, 1)
+def mpf_sin_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 2, 1)
+def mpf_cosh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd)[0]
+def mpf_sinh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd)[1]
+def mpf_tanh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd, tanh=1)
+# Low-overhead fixed-point versions
+def cos_sin_fixed(x, prec, pi2=None):
+    if pi2 is None:
+        pi2 = pi_fixed(prec-1)
+    n, t = divmod(x, pi2)
+    n = int(n)
+    c, s = cos_sin_basecase(t, prec)
+    m = n & 3
+    if m == 0: return c, s
+    if m == 1: return -s, c
+    if m == 2: return -c, -s
+    if m == 3: return s, -c
+def exp_fixed(x, prec, ln2=None):
+    if ln2 is None:
+        ln2 = ln2_fixed(prec)
+    n, t = divmod(x, ln2)
+    n = int(n)
+    v = exp_basecase(t, prec)
+    if n >= 0:
+        return v << n
+    else:
+        return v >> (-n)
+if BACKEND == 'sage':
+    try:
+        import sage.libs.mpmath.ext_libmp as _lbmp
+        mpf_sqrt = _lbmp.mpf_sqrt
+        mpf_exp = _lbmp.mpf_exp
+        mpf_log = _lbmp.mpf_log
+        mpf_cos = _lbmp.mpf_cos
+        mpf_sin = _lbmp.mpf_sin
+        mpf_pow = _lbmp.mpf_pow
+        exp_fixed = _lbmp.exp_fixed
+        cos_sin_fixed = _lbmp.cos_sin_fixed
+        log_int_fixed = _lbmp.log_int_fixed
+    except (ImportError, AttributeError):
+        print("Warning: Sage imports in libelefun failed")

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libhyper.py ADDED Viewed

	@@ -0,0 +1,1150 @@

+"""
+This module implements computation of hypergeometric and related
+functions. In particular, it provides code for generic summation
+of hypergeometric series. Optimized versions for various special
+cases are also provided.
+"""
+import operator
+import math
+from .backend import MPZ_ZERO, MPZ_ONE, BACKEND, xrange, exec_
+from .libintmath import gcd
+from .libmpf import (\
+    ComplexResult, round_fast, round_nearest,
+    negative_rnd, bitcount, to_fixed, from_man_exp, from_int, to_int,
+    from_rational,
+    fzero, fone, fnone, ftwo, finf, fninf, fnan,
+    mpf_sign, mpf_add, mpf_abs, mpf_pos,
+    mpf_cmp, mpf_lt, mpf_le, mpf_gt, mpf_min_max,
+    mpf_perturb, mpf_neg, mpf_shift, mpf_sub, mpf_mul, mpf_div,
+    sqrt_fixed, mpf_sqrt, mpf_rdiv_int, mpf_pow_int,
+    to_rational,
+)
+from .libelefun import (\
+    mpf_pi, mpf_exp, mpf_log, pi_fixed, mpf_cos_sin, mpf_cos, mpf_sin,
+    mpf_sqrt, agm_fixed,
+)
+from .libmpc import (\
+    mpc_one, mpc_sub, mpc_mul_mpf, mpc_mul, mpc_neg, complex_int_pow,
+    mpc_div, mpc_add_mpf, mpc_sub_mpf,
+    mpc_log, mpc_add, mpc_pos, mpc_shift,
+    mpc_is_infnan, mpc_zero, mpc_sqrt, mpc_abs,
+    mpc_mpf_div, mpc_square, mpc_exp
+)
+from .libintmath import ifac
+from .gammazeta import mpf_gamma_int, mpf_euler, euler_fixed
+class NoConvergence(Exception):
+    pass
+#-----------------------------------------------------------------------#
+#                                                                       #
+#                     Generic hypergeometric series                     #
+#                                                                       #
+#-----------------------------------------------------------------------#
+"""
+TODO:
+1. proper mpq parsing
+2. imaginary z special-cased (also: rational, integer?)
+3. more clever handling of series that don't converge because of stupid
+   upwards rounding
+4. checking for cancellation
+"""
+def make_hyp_summator(key):
+    """
+    Returns a function that sums a generalized hypergeometric series,
+    for given parameter types (integer, rational, real, complex).
+    """
+    p, q, param_types, ztype = key
+    pstring = "".join(param_types)
+    fname = "hypsum_%i_%i_%s_%s_%s" % (p, q, pstring[:p], pstring[p:], ztype)
+    #print "generating hypsum", fname
+    have_complex_param = 'C' in param_types
+    have_complex_arg = ztype == 'C'
+    have_complex = have_complex_param or have_complex_arg
+    source = []
+    add = source.append
+    aint = []
+    arat = []
+    bint = []
+    brat = []
+    areal = []
+    breal = []
+    acomplex = []
+    bcomplex = []
+    #add("wp = prec + 40")
+    add("MAX = kwargs.get('maxterms', wp*100)")
+    add("HIGH = MPZ_ONE<<epsshift")
+    add("LOW = -HIGH")
+    # Setup code
+    add("SRE = PRE = one = (MPZ_ONE << wp)")
+    if have_complex:
+        add("SIM = PIM = MPZ_ZERO")
+    if have_complex_arg:
+        add("xsign, xm, xe, xbc = z[0]")
+        add("if xsign: xm = -xm")
+        add("ysign, ym, ye, ybc = z[1]")
+        add("if ysign: ym = -ym")
+    else:
+        add("xsign, xm, xe, xbc = z")
+        add("if xsign: xm = -xm")
+    add("offset = xe + wp")
+    add("if offset >= 0:")
+    add("    ZRE = xm << offset")
+    add("else:")
+    add("    ZRE = xm >> (-offset)")
+    if have_complex_arg:
+        add("offset = ye + wp")
+        add("if offset >= 0:")
+        add("    ZIM = ym << offset")
+        add("else:")
+        add("    ZIM = ym >> (-offset)")
+    for i, flag in enumerate(param_types):
+        W = ["A", "B"][i >= p]
+        if flag == 'Z':
+            ([aint,bint][i >= p]).append(i)
+            add("%sINT_%i = coeffs[%i]" % (W, i, i))
+        elif flag == 'Q':
+            ([arat,brat][i >= p]).append(i)
+            add("%sP_%i, %sQ_%i = coeffs[%i]._mpq_" % (W, i, W, i, i))
+        elif flag == 'R':
+            ([areal,breal][i >= p]).append(i)
+            add("xsign, xm, xe, xbc = coeffs[%i]._mpf_" % i)
+            add("if xsign: xm = -xm")
+            add("offset = xe + wp")
+            add("if offset >= 0:")
+            add("    %sREAL_%i = xm << offset" % (W, i))
+            add("else:")
+            add("    %sREAL_%i = xm >> (-offset)" % (W, i))
+        elif flag == 'C':
+            ([acomplex,bcomplex][i >= p]).append(i)
+            add("__re, __im = coeffs[%i]._mpc_" % i)
+            add("xsign, xm, xe, xbc = __re")
+            add("if xsign: xm = -xm")
+            add("ysign, ym, ye, ybc = __im")
+            add("if ysign: ym = -ym")
+            add("offset = xe + wp")
+            add("if offset >= 0:")
+            add("    %sCRE_%i = xm << offset" % (W, i))
+            add("else:")
+            add("    %sCRE_%i = xm >> (-offset)" % (W, i))
+            add("offset = ye + wp")
+            add("if offset >= 0:")
+            add("    %sCIM_%i = ym << offset" % (W, i))
+            add("else:")
+            add("    %sCIM_%i = ym >> (-offset)" % (W, i))
+        else:
+            raise ValueError
+    l_areal = len(areal)
+    l_breal = len(breal)
+    cancellable_real = min(l_areal, l_breal)
+    noncancellable_real_num = areal[cancellable_real:]
+    noncancellable_real_den = breal[cancellable_real:]
+    # LOOP
+    add("for n in xrange(1,10**8):")
+    add("    if n in magnitude_check:")
+    add("        p_mag = bitcount(abs(PRE))")
+    if have_complex:
+        add("        p_mag = max(p_mag, bitcount(abs(PIM)))")
+    add("        magnitude_check[n] = wp-p_mag")
+    # Real factors
+    multiplier = " * ".join(["AINT_#".replace("#", str(i)) for i in aint] + \
+                            ["AP_#".replace("#", str(i)) for i in arat] + \
+                            ["BQ_#".replace("#", str(i)) for i in brat])
+    divisor    = " * ".join(["BINT_#".replace("#", str(i)) for i in bint] + \
+                            ["BP_#".replace("#", str(i)) for i in brat] + \
+                            ["AQ_#".replace("#", str(i)) for i in arat] + ["n"])
+    if multiplier:
+        add("    mul = " + multiplier)
+    add("    div = " + divisor)
+    # Check for singular terms
+    add("    if not div:")
+    if multiplier:
+        add("        if not mul:")
+        add("            break")
+    add("        raise ZeroDivisionError")
+    # Update product
+    if have_complex:
+        # TODO: when there are several real parameters and just a few complex
+        # (maybe just the complex argument), we only need to do about
+        # half as many ops if we accumulate the real factor in a single real variable
+        for k in range(cancellable_real): add("    PRE = PRE * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
+        for i in noncancellable_real_num: add("    PRE = (PRE * AREAL_#) >> wp".replace("#", str(i)))
+        for i in noncancellable_real_den: add("    PRE = (PRE << wp) // BREAL_#".replace("#", str(i)))
+        for k in range(cancellable_real): add("    PIM = PIM * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
+        for i in noncancellable_real_num: add("    PIM = (PIM * AREAL_#) >> wp".replace("#", str(i)))
+        for i in noncancellable_real_den: add("    PIM = (PIM << wp) // BREAL_#".replace("#", str(i)))
+        if multiplier:
+            if have_complex_arg:
+                add("    PRE, PIM = (mul*(PRE*ZRE-PIM*ZIM))//div, (mul*(PIM*ZRE+PRE*ZIM))//div")
+                add("    PRE >>= wp")
+                add("    PIM >>= wp")
+            else:
+                add("    PRE = ((mul * PRE * ZRE) >> wp) // div")
+                add("    PIM = ((mul * PIM * ZRE) >> wp) // div")
+        else:
+            if have_complex_arg:
+                add("    PRE, PIM = (PRE*ZRE-PIM*ZIM)//div, (PIM*ZRE+PRE*ZIM)//div")
+                add("    PRE >>= wp")
+                add("    PIM >>= wp")
+            else:
+                add("    PRE = ((PRE * ZRE) >> wp) // div")
+                add("    PIM = ((PIM * ZRE) >> wp) // div")
+        for i in acomplex:
+            add("    PRE, PIM = PRE*ACRE_#-PIM*ACIM_#, PIM*ACRE_#+PRE*ACIM_#".replace("#", str(i)))
+            add("    PRE >>= wp")
+            add("    PIM >>= wp")
+        for i in bcomplex:
+            add("    mag = BCRE_#*BCRE_#+BCIM_#*BCIM_#".replace("#", str(i)))
+            add("    re = PRE*BCRE_# + PIM*BCIM_#".replace("#", str(i)))
+            add("    im = PIM*BCRE_# - PRE*BCIM_#".replace("#", str(i)))
+            add("    PRE = (re << wp) // mag".replace("#", str(i)))
+            add("    PIM = (im << wp) // mag".replace("#", str(i)))
+    else:
+        for k in range(cancellable_real): add("    PRE = PRE * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
+        for i in noncancellable_real_num: add("    PRE = (PRE * AREAL_#) >> wp".replace("#", str(i)))
+        for i in noncancellable_real_den: add("    PRE = (PRE << wp) // BREAL_#".replace("#", str(i)))
+        if multiplier:
+            add("    PRE = ((PRE * mul * ZRE) >> wp) // div")
+        else:
+            add("    PRE = ((PRE * ZRE) >> wp) // div")
+    # Add product to sum
+    if have_complex:
+        add("    SRE += PRE")
+        add("    SIM += PIM")
+        add("    if (HIGH > PRE > LOW) and (HIGH > PIM > LOW):")
+        add("        break")
+    else:
+        add("    SRE += PRE")
+        add("    if HIGH > PRE > LOW:")
+        add("        break")
+    #add("    from mpmath import nprint, log, ldexp")
+    #add("    nprint([n, log(abs(PRE),2), ldexp(PRE,-wp)])")
+    add("    if n > MAX:")
+    add("        raise NoConvergence('Hypergeometric series converges too slowly. Try increasing maxterms.')")
+    # +1 all parameters for next loop
+    for i in aint:     add("    AINT_# += 1".replace("#", str(i)))
+    for i in bint:     add("    BINT_# += 1".replace("#", str(i)))
+    for i in arat:     add("    AP_# += AQ_#".replace("#", str(i)))
+    for i in brat:     add("    BP_# += BQ_#".replace("#", str(i)))
+    for i in areal:    add("    AREAL_# += one".replace("#", str(i)))
+    for i in breal:    add("    BREAL_# += one".replace("#", str(i)))
+    for i in acomplex: add("    ACRE_# += one".replace("#", str(i)))
+    for i in bcomplex: add("    BCRE_# += one".replace("#", str(i)))
+    if have_complex:
+        add("a = from_man_exp(SRE, -wp, prec, 'n')")
+        add("b = from_man_exp(SIM, -wp, prec, 'n')")
+        add("if SRE:")
+        add("    if SIM:")
+        add("        magn = max(a[2]+a[3], b[2]+b[3])")
+        add("    else:")
+        add("        magn = a[2]+a[3]")
+        add("elif SIM:")
+        add("    magn = b[2]+b[3]")
+        add("else:")
+        add("    magn = -wp+1")
+        add("return (a, b), True, magn")
+    else:
+        add("a = from_man_exp(SRE, -wp, prec, 'n')")
+        add("if SRE:")
+        add("    magn = a[2]+a[3]")
+        add("else:")
+        add("    magn = -wp+1")
+        add("return a, False, magn")
+    source = "\n".join(("    " + line) for line in source)
+    source = ("def %s(coeffs, z, prec, wp, epsshift, magnitude_check, **kwargs):\n" % fname) + source
+    namespace = {}
+    exec_(source, globals(), namespace)
+    #print source
+    return source, namespace[fname]
+if BACKEND == 'sage':
+    def make_hyp_summator(key):
+        """
+        Returns a function that sums a generalized hypergeometric series,
+        for given parameter types (integer, rational, real, complex).
+        """
+        from sage.libs.mpmath.ext_main import hypsum_internal
+        p, q, param_types, ztype = key
+        def _hypsum(coeffs, z, prec, wp, epsshift, magnitude_check, **kwargs):
+            return hypsum_internal(p, q, param_types, ztype, coeffs, z,
+                prec, wp, epsshift, magnitude_check, kwargs)
+        return "(none)", _hypsum
+#-----------------------------------------------------------------------#
+#                                                                       #
+#                              Error functions                          #
+#                                                                       #
+#-----------------------------------------------------------------------#
+# TODO: mpf_erf should call mpf_erfc when appropriate (currently
+#    only the converse delegation is implemented)
+def mpf_erf(x, prec, rnd=round_fast):
+    sign, man, exp, bc = x
+    if not man:
+        if x == fzero: return fzero
+        if x == finf: return fone
+        if x== fninf: return fnone
+        return fnan
+    size = exp + bc
+    lg = math.log
+    # The approximation erf(x) = 1 is accurate to > x^2 * log(e,2) bits
+    if size > 3 and 2*(size-1) + 0.528766 > lg(prec,2):
+        if sign:
+            return mpf_perturb(fnone, 0, prec, rnd)
+        else:
+            return mpf_perturb(fone, 1, prec, rnd)
+    # erf(x) ~ 2*x/sqrt(pi) close to 0
+    if size < -prec:
+        # 2*x
+        x = mpf_shift(x,1)
+        c = mpf_sqrt(mpf_pi(prec+20), prec+20)
+        # TODO: interval rounding
+        return mpf_div(x, c, prec, rnd)
+    wp = prec + abs(size) + 25
+    # Taylor series for erf, fixed-point summation
+    t = abs(to_fixed(x, wp))
+    t2 = (t*t) >> wp
+    s, term, k = t, 12345, 1
+    while term:
+        t = ((t * t2) >> wp) // k
+        term = t // (2*k+1)
+        if k & 1:
+            s -= term
+        else:
+            s += term
+        k += 1
+    s = (s << (wp+1)) // sqrt_fixed(pi_fixed(wp), wp)
+    if sign:
+        s = -s
+    return from_man_exp(s, -wp, prec, rnd)
+# If possible, we use the asymptotic series for erfc.
+# This is an alternating divergent asymptotic series, so
+# the error is at most equal to the first omitted term.
+# Here we check if the smallest term is small enough
+# for a given x and precision
+def erfc_check_series(x, prec):
+    n = to_int(x)
+    if n**2 * 1.44 > prec:
+        return True
+    return False
+def mpf_erfc(x, prec, rnd=round_fast):
+    sign, man, exp, bc = x
+    if not man:
+        if x == fzero: return fone
+        if x == finf: return fzero
+        if x == fninf: return ftwo
+        return fnan
+    wp = prec + 20
+    mag = bc+exp
+    # Preserve full accuracy when exponent grows huge
+    wp += max(0, 2*mag)
+    regular_erf = sign or mag < 2
+    if regular_erf or not erfc_check_series(x, wp):
+        if regular_erf:
+            return mpf_sub(fone, mpf_erf(x, prec+10, negative_rnd[rnd]), prec, rnd)
+        # 1-erf(x) ~ exp(-x^2), increase prec to deal with cancellation
+        n = to_int(x)+1
+        return mpf_sub(fone, mpf_erf(x, prec + int(n**2*1.44) + 10), prec, rnd)
+    s = term = MPZ_ONE << wp
+    term_prev = 0
+    t = (2 * to_fixed(x, wp) ** 2) >> wp
+    k = 1
+    while 1:
+        term = ((term * (2*k - 1)) << wp) // t
+        if k > 4 and term > term_prev or not term:
+            break
+        if k & 1:
+            s -= term
+        else:
+            s += term
+        term_prev = term
+        #print k, to_str(from_man_exp(term, -wp, 50), 10)
+        k += 1
+    s = (s << wp) // sqrt_fixed(pi_fixed(wp), wp)
+    s = from_man_exp(s, -wp, wp)
+    z = mpf_exp(mpf_neg(mpf_mul(x,x,wp),wp),wp)
+    y = mpf_div(mpf_mul(z, s, wp), x, prec, rnd)
+    return y
+#-----------------------------------------------------------------------#
+#                                                                       #
+#                         Exponential integrals                         #
+#                                                                       #
+#-----------------------------------------------------------------------#
+def ei_taylor(x, prec):
+    s = t = x
+    k = 2
+    while t:
+        t = ((t*x) >> prec) // k
+        s += t // k
+        k += 1
+    return s
+def complex_ei_taylor(zre, zim, prec):
+    _abs = abs
+    sre = tre = zre
+    sim = tim = zim
+    k = 2
+    while _abs(tre) + _abs(tim) > 5:
+        tre, tim = ((tre*zre-tim*zim)//k)>>prec, ((tre*zim+tim*zre)//k)>>prec
+        sre += tre // k
+        sim += tim // k
+        k += 1
+    return sre, sim
+def ei_asymptotic(x, prec):
+    one = MPZ_ONE << prec
+    x = t = ((one << prec) // x)
+    s = one + x
+    k = 2
+    while t:
+        t = (k*t*x) >> prec
+        s += t
+        k += 1
+    return s
+def complex_ei_asymptotic(zre, zim, prec):
+    _abs = abs
+    one = MPZ_ONE << prec
+    M = (zim*zim + zre*zre) >> prec
+    # 1 / z
+    xre = tre = (zre << prec) // M
+    xim = tim = ((-zim) << prec) // M
+    sre = one + xre
+    sim = xim
+    k = 2
+    while _abs(tre) + _abs(tim) > 1000:
+        #print tre, tim
+        tre, tim = ((tre*xre-tim*xim)*k)>>prec, ((tre*xim+tim*xre)*k)>>prec
+        sre += tre
+        sim += tim
+        k += 1
+        if k > prec:
+            raise NoConvergence
+    return sre, sim
+def mpf_ei(x, prec, rnd=round_fast, e1=False):
+    if e1:
+        x = mpf_neg(x)
+    sign, man, exp, bc = x
+    if e1 and not sign:
+        if x == fzero:
+            return finf
+        raise ComplexResult("E1(x) for x < 0")
+    if man:
+        xabs = 0, man, exp, bc
+        xmag = exp+bc
+        wp = prec + 20
+        can_use_asymp = xmag > wp
+        if not can_use_asymp:
+            if exp >= 0:
+                xabsint = man << exp
+            else:
+                xabsint = man >> (-exp)
+            can_use_asymp = xabsint > int(wp*0.693) + 10
+        if can_use_asymp:
+            if xmag > wp:
+                v = fone
+            else:
+                v = from_man_exp(ei_asymptotic(to_fixed(x, wp), wp), -wp)
+            v = mpf_mul(v, mpf_exp(x, wp), wp)
+            v = mpf_div(v, x, prec, rnd)
+        else:
+            wp += 2*int(to_int(xabs))
+            u = to_fixed(x, wp)
+            v = ei_taylor(u, wp) + euler_fixed(wp)
+            t1 = from_man_exp(v,-wp)
+            t2 = mpf_log(xabs,wp)
+            v = mpf_add(t1, t2, prec, rnd)
+    else:
+        if x == fzero: v = fninf
+        elif x == finf: v = finf
+        elif x == fninf: v = fzero
+        else: v = fnan
+    if e1:
+        v = mpf_neg(v)
+    return v
+def mpc_ei(z, prec, rnd=round_fast, e1=False):
+    if e1:
+        z = mpc_neg(z)
+    a, b = z
+    asign, aman, aexp, abc = a
+    bsign, bman, bexp, bbc = b
+    if b == fzero:
+        if e1:
+            x = mpf_neg(mpf_ei(a, prec, rnd))
+            if not asign:
+                y = mpf_neg(mpf_pi(prec, rnd))
+            else:
+                y = fzero
+            return x, y
+        else:
+            return mpf_ei(a, prec, rnd), fzero
+    if a != fzero:
+        if not aman or not bman:
+            return (fnan, fnan)
+    wp = prec + 40
+    amag = aexp+abc
+    bmag = bexp+bbc
+    zmag = max(amag, bmag)
+    can_use_asymp = zmag > wp
+    if not can_use_asymp:
+        zabsint = abs(to_int(a)) + abs(to_int(b))
+        can_use_asymp = zabsint > int(wp*0.693) + 20
+    try:
+        if can_use_asymp:
+            if zmag > wp:
+                v = fone, fzero
+            else:
+                zre = to_fixed(a, wp)
+                zim = to_fixed(b, wp)
+                vre, vim = complex_ei_asymptotic(zre, zim, wp)
+                v = from_man_exp(vre, -wp), from_man_exp(vim, -wp)
+            v = mpc_mul(v, mpc_exp(z, wp), wp)
+            v = mpc_div(v, z, wp)
+            if e1:
+                v = mpc_neg(v, prec, rnd)
+            else:
+                x, y = v
+                if bsign:
+                    v = mpf_pos(x, prec, rnd), mpf_sub(y, mpf_pi(wp), prec, rnd)
+                else:
+                    v = mpf_pos(x, prec, rnd), mpf_add(y, mpf_pi(wp), prec, rnd)
+            return v
+    except NoConvergence:
+        pass
+    #wp += 2*max(0,zmag)
+    wp += 2*int(to_int(mpc_abs(z, 5)))
+    zre = to_fixed(a, wp)
+    zim = to_fixed(b, wp)
+    vre, vim = complex_ei_taylor(zre, zim, wp)
+    vre += euler_fixed(wp)
+    v = from_man_exp(vre,-wp), from_man_exp(vim,-wp)
+    if e1:
+        u = mpc_log(mpc_neg(z),wp)
+    else:
+        u = mpc_log(z,wp)
+    v = mpc_add(v, u, prec, rnd)
+    if e1:
+        v = mpc_neg(v)
+    return v
+def mpf_e1(x, prec, rnd=round_fast):
+    return mpf_ei(x, prec, rnd, True)
+def mpc_e1(x, prec, rnd=round_fast):
+    return mpc_ei(x, prec, rnd, True)
+def mpf_expint(n, x, prec, rnd=round_fast, gamma=False):
+    """
+    E_n(x), n an integer, x real
+    With gamma=True, computes Gamma(n,x)   (upper incomplete gamma function)
+    Returns (real, None) if real, otherwise (real, imag)
+    The imaginary part is an optional branch cut term
+    """
+    sign, man, exp, bc = x
+    if not man:
+        if gamma:
+            if x == fzero:
+                # Actually gamma function pole
+                if n <= 0:
+                    return finf, None
+                return mpf_gamma_int(n, prec, rnd), None
+            if x == finf:
+                return fzero, None
+            # TODO: could return finite imaginary value at -inf
+            return fnan, fnan
+        else:
+            if x == fzero:
+                if n > 1:
+                    return from_rational(1, n-1, prec, rnd), None
+                else:
+                    return finf, None
+            if x == finf:
+                return fzero, None
+            return fnan, fnan
+    n_orig = n
+    if gamma:
+        n = 1-n
+    wp = prec + 20
+    xmag = exp + bc
+    # Beware of near-poles
+    if xmag < -10:
+        raise NotImplementedError
+    nmag = bitcount(abs(n))
+    have_imag = n > 0 and sign
+    negx = mpf_neg(x)
+    # Skip series if direct convergence
+    if n == 0 or 2*nmag - xmag < -wp:
+        if gamma:
+            v = mpf_exp(negx, wp)
+            re = mpf_mul(v, mpf_pow_int(x, n_orig-1, wp), prec, rnd)
+        else:
+            v = mpf_exp(negx, wp)
+            re = mpf_div(v, x, prec, rnd)
+    else:
+        # Finite number of terms, or...
+        can_use_asymptotic_series = -3*wp < n <= 0
+        # ...large enough?
+        if not can_use_asymptotic_series:
+            xi = abs(to_int(x))
+            m = min(max(1, xi-n), 2*wp)
+            siz = -n*nmag + (m+n)*bitcount(abs(m+n)) - m*xmag - (144*m//100)
+            tol = -wp-10
+            can_use_asymptotic_series = siz < tol
+        if can_use_asymptotic_series:
+            r = ((-MPZ_ONE) << (wp+wp)) // to_fixed(x, wp)
+            m = n
+            t = r*m
+            s = MPZ_ONE << wp
+            while m and t:
+                s += t
+                m += 1
+                t = (m*r*t) >> wp
+            v = mpf_exp(negx, wp)
+            if gamma:
+                # ~ exp(-x) * x^(n-1) * (1 + ...)
+                v = mpf_mul(v, mpf_pow_int(x, n_orig-1, wp), wp)
+            else:
+                # ~ exp(-x)/x * (1 + ...)
+                v = mpf_div(v, x, wp)
+            re = mpf_mul(v, from_man_exp(s, -wp), prec, rnd)
+        elif n == 1:
+            re = mpf_neg(mpf_ei(negx, prec, rnd))
+        elif n > 0 and n < 3*wp:
+            T1 = mpf_neg(mpf_ei(negx, wp))
+            if gamma:
+                if n_orig & 1:
+                    T1 = mpf_neg(T1)
+            else:
+                T1 = mpf_mul(T1, mpf_pow_int(negx, n-1, wp), wp)
+            r = t = to_fixed(x, wp)
+            facs = [1] * (n-1)
+            for k in range(1,n-1):
+                facs[k] = facs[k-1] * k
+            facs = facs[::-1]
+            s = facs[0] << wp
+            for k in range(1, n-1):
+                if k & 1:
+                    s -= facs[k] * t
+                else:
+                    s += facs[k] * t
+                t = (t*r) >> wp
+            T2 = from_man_exp(s, -wp, wp)
+            T2 = mpf_mul(T2, mpf_exp(negx, wp))
+            if gamma:
+                T2 = mpf_mul(T2, mpf_pow_int(x, n_orig, wp), wp)
+            R = mpf_add(T1, T2)
+            re = mpf_div(R, from_int(ifac(n-1)), prec, rnd)
+        else:
+            raise NotImplementedError
+    if have_imag:
+        M = from_int(-ifac(n-1))
+        if gamma:
+            im = mpf_div(mpf_pi(wp), M, prec, rnd)
+            if n_orig & 1:
+                im = mpf_neg(im)
+        else:
+            im = mpf_div(mpf_mul(mpf_pi(wp), mpf_pow_int(negx, n_orig-1, wp), wp), M, prec, rnd)
+        return re, im
+    else:
+        return re, None
+def mpf_ci_si_taylor(x, wp, which=0):
+    """
+    0 - Ci(x) - (euler+log(x))
+    1 - Si(x)
+    """
+    x = to_fixed(x, wp)
+    x2 = -(x*x) >> wp
+    if which == 0:
+        s, t, k = 0, (MPZ_ONE<<wp), 2
+    else:
+        s, t, k = x, x, 3
+    while t:
+        t = (t*x2//(k*(k-1)))>>wp
+        s += t//k
+        k += 2
+    return from_man_exp(s, -wp)
+def mpc_ci_si_taylor(re, im, wp, which=0):
+    # The following code is only designed for small arguments,
+    # and not too small arguments (for relative accuracy)
+    if re[1]:
+        mag = re[2]+re[3]
+    elif im[1]:
+        mag = im[2]+im[3]
+    if im[1]:
+        mag = max(mag, im[2]+im[3])
+    if mag > 2 or mag < -wp:
+        raise NotImplementedError
+    wp += (2-mag)
+    zre = to_fixed(re, wp)
+    zim = to_fixed(im, wp)
+    z2re = (zim*zim-zre*zre)>>wp
+    z2im = (-2*zre*zim)>>wp
+    tre = zre
+    tim = zim
+    one = MPZ_ONE<<wp
+    if which == 0:
+        sre, sim, tre, tim, k = 0, 0, (MPZ_ONE<<wp), 0, 2
+    else:
+        sre, sim, tre, tim, k = zre, zim, zre, zim, 3
+    while max(abs(tre), abs(tim)) > 2:
+        f = k*(k-1)
+        tre, tim = ((tre*z2re-tim*z2im)//f)>>wp, ((tre*z2im+tim*z2re)//f)>>wp
+        sre += tre//k
+        sim += tim//k
+        k += 2
+    return from_man_exp(sre, -wp), from_man_exp(sim, -wp)
+def mpf_ci_si(x, prec, rnd=round_fast, which=2):
+    """
+    Calculation of Ci(x), Si(x) for real x.
+    which = 0 -- returns (Ci(x), -)
+    which = 1 -- returns (Si(x), -)
+    which = 2 -- returns (Ci(x), Si(x))
+    Note: if x < 0, Ci(x) needs an additional imaginary term, pi*i.
+    """
+    wp = prec + 20
+    sign, man, exp, bc = x
+    ci, si = None, None
+    if not man:
+        if x == fzero:
+            return (fninf, fzero)
+        if x == fnan:
+            return (x, x)
+        ci = fzero
+        if which != 0:
+            if x == finf:
+                si = mpf_shift(mpf_pi(prec, rnd), -1)
+            if x == fninf:
+                si = mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
+        return (ci, si)
+    # For small x: Ci(x) ~ euler + log(x), Si(x) ~ x
+    mag = exp+bc
+    if mag < -wp:
+        if which != 0:
+            si = mpf_perturb(x, 1-sign, prec, rnd)
+        if which != 1:
+            y = mpf_euler(wp)
+            xabs = mpf_abs(x)
+            ci = mpf_add(y, mpf_log(xabs, wp), prec, rnd)
+        return ci, si
+    # For huge x: Ci(x) ~ sin(x)/x, Si(x) ~ pi/2
+    elif mag > wp:
+        if which != 0:
+            if sign:
+                si = mpf_neg(mpf_pi(prec, negative_rnd[rnd]))
+            else:
+                si = mpf_pi(prec, rnd)
+            si = mpf_shift(si, -1)
+        if which != 1:
+            ci = mpf_div(mpf_sin(x, wp), x, prec, rnd)
+        return ci, si
+    else:
+        wp += abs(mag)
+    # Use an asymptotic series? The smallest value of n!/x^n
+    # occurs for n ~ x, where the magnitude is ~ exp(-x).
+    asymptotic = mag-1 > math.log(wp, 2)
+    # Case 1: convergent series near 0
+    if not asymptotic:
+        if which != 0:
+            si = mpf_pos(mpf_ci_si_taylor(x, wp, 1), prec, rnd)
+        if which != 1:
+            ci = mpf_ci_si_taylor(x, wp, 0)
+            ci = mpf_add(ci, mpf_euler(wp), wp)
+            ci = mpf_add(ci, mpf_log(mpf_abs(x), wp), prec, rnd)
+        return ci, si
+    x = mpf_abs(x)
+    # Case 2: asymptotic series for x >> 1
+    xf = to_fixed(x, wp)
+    xr = (MPZ_ONE<<(2*wp)) // xf   # 1/x
+    s1 = (MPZ_ONE << wp)
+    s2 = xr
+    t = xr
+    k = 2
+    while t:
+        t = -t
+        t = (t*xr*k)>>wp
+        k += 1
+        s1 += t
+        t = (t*xr*k)>>wp
+        k += 1
+        s2 += t
+    s1 = from_man_exp(s1, -wp)
+    s2 = from_man_exp(s2, -wp)
+    s1 = mpf_div(s1, x, wp)
+    s2 = mpf_div(s2, x, wp)
+    cos, sin = mpf_cos_sin(x, wp)
+    # Ci(x) = sin(x)*s1-cos(x)*s2
+    # Si(x) = pi/2-cos(x)*s1-sin(x)*s2
+    if which != 0:
+        si = mpf_add(mpf_mul(cos, s1), mpf_mul(sin, s2), wp)
+        si = mpf_sub(mpf_shift(mpf_pi(wp), -1), si, wp)
+        if sign:
+            si = mpf_neg(si)
+        si = mpf_pos(si, prec, rnd)
+    if which != 1:
+        ci = mpf_sub(mpf_mul(sin, s1), mpf_mul(cos, s2), prec, rnd)
+    return ci, si
+def mpf_ci(x, prec, rnd=round_fast):
+    if mpf_sign(x) < 0:
+        raise ComplexResult
+    return mpf_ci_si(x, prec, rnd, 0)[0]
+def mpf_si(x, prec, rnd=round_fast):
+    return mpf_ci_si(x, prec, rnd, 1)[1]
+def mpc_ci(z, prec, rnd=round_fast):
+    re, im = z
+    if im == fzero:
+        ci = mpf_ci_si(re, prec, rnd, 0)[0]
+        if mpf_sign(re) < 0:
+            return (ci, mpf_pi(prec, rnd))
+        return (ci, fzero)
+    wp = prec + 20
+    cre, cim = mpc_ci_si_taylor(re, im, wp, 0)
+    cre = mpf_add(cre, mpf_euler(wp), wp)
+    ci = mpc_add((cre, cim), mpc_log(z, wp), prec, rnd)
+    return ci
+def mpc_si(z, prec, rnd=round_fast):
+    re, im = z
+    if im == fzero:
+        return (mpf_ci_si(re, prec, rnd, 1)[1], fzero)
+    wp = prec + 20
+    z = mpc_ci_si_taylor(re, im, wp, 1)
+    return mpc_pos(z, prec, rnd)
+#-----------------------------------------------------------------------#
+#                                                                       #
+#                             Bessel functions                          #
+#                                                                       #
+#-----------------------------------------------------------------------#
+# A Bessel function of the first kind of integer order, J_n(x), is
+# given by the power series
+#             oo
+#             ___         k         2 k + n
+#            \        (-1)     / x \
+#    J_n(x) = )    ----------- | - |
+#            /___  k! (k + n)! \ 2 /
+#            k = 0
+# Simplifying the quotient between two successive terms gives the
+# ratio x^2 / (-4*k*(k+n)). Hence, we only need one full-precision
+# multiplication and one division by a small integer per term.
+# The complex version is very similar, the only difference being
+# that the multiplication is actually 4 multiplies.
+# In the general case, we have
+# J_v(x) = (x/2)**v / v! * 0F1(v+1, (-1/4)*z**2)
+# TODO: for extremely large x, we could use an asymptotic
+# trigonometric approximation.
+# TODO: recompute at higher precision if the fixed-point mantissa
+# is very small
+def mpf_besseljn(n, x, prec, rounding=round_fast):
+    prec += 50
+    negate = n < 0 and n & 1
+    mag = x[2]+x[3]
+    n = abs(n)
+    wp = prec + 20 + n*bitcount(n)
+    if mag < 0:
+        wp -= n * mag
+    x = to_fixed(x, wp)
+    x2 = (x**2) >> wp
+    if not n:
+        s = t = MPZ_ONE << wp
+    else:
+        s = t = (x**n // ifac(n)) >> ((n-1)*wp + n)
+    k = 1
+    while t:
+        t = ((t * x2) // (-4*k*(k+n))) >> wp
+        s += t
+        k += 1
+    if negate:
+        s = -s
+    return from_man_exp(s, -wp, prec, rounding)
+def mpc_besseljn(n, z, prec, rounding=round_fast):
+    negate = n < 0 and n & 1
+    n = abs(n)
+    origprec = prec
+    zre, zim = z
+    mag = max(zre[2]+zre[3], zim[2]+zim[3])
+    prec += 20 + n*bitcount(n) + abs(mag)
+    if mag < 0:
+        prec -= n * mag
+    zre = to_fixed(zre, prec)
+    zim = to_fixed(zim, prec)
+    z2re = (zre**2 - zim**2) >> prec
+    z2im = (zre*zim) >> (prec-1)
+    if not n:
+        sre = tre = MPZ_ONE << prec
+        sim = tim = MPZ_ZERO
+    else:
+        re, im = complex_int_pow(zre, zim, n)
+        sre = tre = (re // ifac(n)) >> ((n-1)*prec + n)
+        sim = tim = (im // ifac(n)) >> ((n-1)*prec + n)
+    k = 1
+    while abs(tre) + abs(tim) > 3:
+        p = -4*k*(k+n)
+        tre, tim = tre*z2re - tim*z2im, tim*z2re + tre*z2im
+        tre = (tre // p) >> prec
+        tim = (tim // p) >> prec
+        sre += tre
+        sim += tim
+        k += 1
+    if negate:
+        sre = -sre
+        sim = -sim
+    re = from_man_exp(sre, -prec, origprec, rounding)
+    im = from_man_exp(sim, -prec, origprec, rounding)
+    return (re, im)
+def mpf_agm(a, b, prec, rnd=round_fast):
+    """
+    Computes the arithmetic-geometric mean agm(a,b) for
+    nonnegative mpf values a, b.
+    """
+    asign, aman, aexp, abc = a
+    bsign, bman, bexp, bbc = b
+    if asign or bsign:
+        raise ComplexResult("agm of a negative number")
+    # Handle inf, nan or zero in either operand
+    if not (aman and bman):
+        if a == fnan or b == fnan:
+            return fnan
+        if a == finf:
+            if b == fzero:
+                return fnan
+            return finf
+        if b == finf:
+            if a == fzero:
+                return fnan
+            return finf
+        # agm(0,x) = agm(x,0) = 0
+        return fzero
+    wp = prec + 20
+    amag = aexp+abc
+    bmag = bexp+bbc
+    mag_delta = amag - bmag
+    # Reduce to roughly the same magnitude using floating-point AGM
+    abs_mag_delta = abs(mag_delta)
+    if abs_mag_delta > 10:
+        while abs_mag_delta > 10:
+            a, b = mpf_shift(mpf_add(a,b,wp),-1), \
+                mpf_sqrt(mpf_mul(a,b,wp),wp)
+            abs_mag_delta //= 2
+        asign, aman, aexp, abc = a
+        bsign, bman, bexp, bbc = b
+        amag = aexp+abc
+        bmag = bexp+bbc
+        mag_delta = amag - bmag
+    #print to_float(a), to_float(b)
+    # Use agm(a,b) = agm(x*a,x*b)/x to obtain a, b ~= 1
+    min_mag = min(amag,bmag)
+    max_mag = max(amag,bmag)
+    n = 0
+    # If too small, we lose precision when going to fixed-point
+    if min_mag < -8:
+        n = -min_mag
+    # If too large, we waste time using fixed-point with large numbers
+    elif max_mag > 20:
+        n = -max_mag
+    if n:
+        a = mpf_shift(a, n)
+        b = mpf_shift(b, n)
+    #print to_float(a), to_float(b)
+    af = to_fixed(a, wp)
+    bf = to_fixed(b, wp)
+    g = agm_fixed(af, bf, wp)
+    return from_man_exp(g, -wp-n, prec, rnd)
+def mpf_agm1(a, prec, rnd=round_fast):
+    """
+    Computes the arithmetic-geometric mean agm(1,a) for a nonnegative
+    mpf value a.
+    """
+    return mpf_agm(fone, a, prec, rnd)
+def mpc_agm(a, b, prec, rnd=round_fast):
+    """
+    Complex AGM.
+    TODO:
+    * check that convergence works as intended
+    * optimize
+    * select a nonarbitrary branch
+    """
+    if mpc_is_infnan(a) or mpc_is_infnan(b):
+        return fnan, fnan
+    if mpc_zero in (a, b):
+        return fzero, fzero
+    if mpc_neg(a) == b:
+        return fzero, fzero
+    wp = prec+20
+    eps = mpf_shift(fone, -wp+10)
+    while 1:
+        a1 = mpc_shift(mpc_add(a, b, wp), -1)
+        b1 = mpc_sqrt(mpc_mul(a, b, wp), wp)
+        a, b = a1, b1
+        size = mpf_min_max([mpc_abs(a,10), mpc_abs(b,10)])[1]
+        err = mpc_abs(mpc_sub(a, b, 10), 10)
+        if size == fzero or mpf_lt(err, mpf_mul(eps, size)):
+            return a
+def mpc_agm1(a, prec, rnd=round_fast):
+    return mpc_agm(mpc_one, a, prec, rnd)
+def mpf_ellipk(x, prec, rnd=round_fast):
+    if not x[1]:
+        if x == fzero:
+            return mpf_shift(mpf_pi(prec, rnd), -1)
+        if x == fninf:
+            return fzero
+        if x == fnan:
+            return x
+    if x == fone:
+        return finf
+    # TODO: for |x| << 1/2, one could use fall back to
+    # pi/2 * hyp2f1_rat((1,2),(1,2),(1,1), x)
+    wp = prec + 15
+    # Use K(x) = pi/2/agm(1,a) where a = sqrt(1-x)
+    # The sqrt raises ComplexResult if x > 0
+    a = mpf_sqrt(mpf_sub(fone, x, wp), wp)
+    v = mpf_agm1(a, wp)
+    r = mpf_div(mpf_pi(wp), v, prec, rnd)
+    return mpf_shift(r, -1)
+def mpc_ellipk(z, prec, rnd=round_fast):
+    re, im = z
+    if im == fzero:
+        if re == finf:
+            return mpc_zero
+        if mpf_le(re, fone):
+            return mpf_ellipk(re, prec, rnd), fzero
+    wp = prec + 15
+    a = mpc_sqrt(mpc_sub(mpc_one, z, wp), wp)
+    v = mpc_agm1(a, wp)
+    r = mpc_mpf_div(mpf_pi(wp), v, prec, rnd)
+    return mpc_shift(r, -1)
+def mpf_ellipe(x, prec, rnd=round_fast):
+    # http://functions.wolfram.com/EllipticIntegrals/
+    # EllipticK/20/01/0001/
+    # E = (1-m)*(K'(m)*2*m + K(m))
+    sign, man, exp, bc = x
+    if not man:
+        if x == fzero:
+            return mpf_shift(mpf_pi(prec, rnd), -1)
+        if x == fninf:
+            return finf
+        if x == fnan:
+            return x
+        if x == finf:
+            raise ComplexResult
+    if x == fone:
+        return fone
+    wp = prec+20
+    mag = exp+bc
+    if mag < -wp:
+        return mpf_shift(mpf_pi(prec, rnd), -1)
+    # Compute a finite difference for K'
+    p = max(mag, 0) - wp
+    h = mpf_shift(fone, p)
+    K = mpf_ellipk(x, 2*wp)
+    Kh = mpf_ellipk(mpf_sub(x, h), 2*wp)
+    Kdiff = mpf_shift(mpf_sub(K, Kh), -p)
+    t = mpf_sub(fone, x)
+    b = mpf_mul(Kdiff, mpf_shift(x,1), wp)
+    return mpf_mul(t, mpf_add(K, b), prec, rnd)
+def mpc_ellipe(z, prec, rnd=round_fast):
+    re, im = z
+    if im == fzero:
+        if re == finf:
+            return (fzero, finf)
+        if mpf_le(re, fone):
+            return mpf_ellipe(re, prec, rnd), fzero
+    wp = prec + 15
+    mag = mpc_abs(z, 1)
+    p = max(mag[2]+mag[3], 0) - wp
+    h = mpf_shift(fone, p)
+    K = mpc_ellipk(z, 2*wp)
+    Kh = mpc_ellipk(mpc_add_mpf(z, h, 2*wp), 2*wp)
+    Kdiff = mpc_shift(mpc_sub(Kh, K, wp), -p)
+    t = mpc_sub(mpc_one, z, wp)
+    b = mpc_mul(Kdiff, mpc_shift(z,1), wp)
+    return mpc_mul(t, mpc_add(K, b, wp), prec, rnd)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libintmath.py ADDED Viewed

	@@ -0,0 +1,584 @@

+"""
+Utility functions for integer math.
+TODO: rename, cleanup, perhaps move the gmpy wrapper code
+here from settings.py
+"""
+import math
+from bisect import bisect
+from .backend import xrange
+from .backend import BACKEND, gmpy, sage, sage_utils, MPZ, MPZ_ONE, MPZ_ZERO
+small_trailing = [0] * 256
+for j in range(1,8):
+    small_trailing[1<<j::1<<(j+1)] = [j] * (1<<(7-j))
+def giant_steps(start, target, n=2):
+    """
+    Return a list of integers ~=
+    [start, n*start, ..., target/n^2, target/n, target]
+    but conservatively rounded so that the quotient between two
+    successive elements is actually slightly less than n.
+    With n = 2, this describes suitable precision steps for a
+    quadratically convergent algorithm such as Newton's method;
+    with n = 3 steps for cubic convergence (Halley's method), etc.
+        >>> giant_steps(50,1000)
+        [66, 128, 253, 502, 1000]
+        >>> giant_steps(50,1000,4)
+        [65, 252, 1000]
+    """
+    L = [target]
+    while L[-1] > start*n:
+        L = L + [L[-1]//n + 2]
+    return L[::-1]
+def rshift(x, n):
+    """For an integer x, calculate x >> n with the fastest (floor)
+    rounding. Unlike the plain Python expression (x >> n), n is
+    allowed to be negative, in which case a left shift is performed."""
+    if n >= 0: return x >> n
+    else:      return x << (-n)
+def lshift(x, n):
+    """For an integer x, calculate x << n. Unlike the plain Python
+    expression (x << n), n is allowed to be negative, in which case a
+    right shift with default (floor) rounding is performed."""
+    if n >= 0: return x << n
+    else:      return x >> (-n)
+if BACKEND == 'sage':
+    import operator
+    rshift = operator.rshift
+    lshift = operator.lshift
+def python_trailing(n):
+    """Count the number of trailing zero bits in abs(n)."""
+    if not n:
+        return 0
+    low_byte = n & 0xff
+    if low_byte:
+        return small_trailing[low_byte]
+    t = 8
+    n >>= 8
+    while not n & 0xff:
+        n >>= 8
+        t += 8
+    return t + small_trailing[n & 0xff]
+if BACKEND == 'gmpy':
+    if gmpy.version() >= '2':
+        def gmpy_trailing(n):
+            """Count the number of trailing zero bits in abs(n) using gmpy."""
+            if n: return MPZ(n).bit_scan1()
+            else: return 0
+    else:
+        def gmpy_trailing(n):
+            """Count the number of trailing zero bits in abs(n) using gmpy."""
+            if n: return MPZ(n).scan1()
+            else: return 0
+# Small powers of 2
+powers = [1<<_ for _ in range(300)]
+def python_bitcount(n):
+    """Calculate bit size of the nonnegative integer n."""
+    bc = bisect(powers, n)
+    if bc != 300:
+        return bc
+    bc = int(math.log(n, 2)) - 4
+    return bc + bctable[n>>bc]
+def gmpy_bitcount(n):
+    """Calculate bit size of the nonnegative integer n."""
+    if n: return MPZ(n).numdigits(2)
+    else: return 0
+#def sage_bitcount(n):
+#    if n: return MPZ(n).nbits()
+#    else: return 0
+def sage_trailing(n):
+    return MPZ(n).trailing_zero_bits()
+if BACKEND == 'gmpy':
+    bitcount = gmpy_bitcount
+    trailing = gmpy_trailing
+elif BACKEND == 'sage':
+    sage_bitcount = sage_utils.bitcount
+    bitcount = sage_bitcount
+    trailing = sage_trailing
+else:
+    bitcount = python_bitcount
+    trailing = python_trailing
+if BACKEND == 'gmpy' and 'bit_length' in dir(gmpy):
+    bitcount = gmpy.bit_length
+# Used to avoid slow function calls as far as possible
+trailtable = [trailing(n) for n in range(256)]
+bctable = [bitcount(n) for n in range(1024)]
+# TODO: speed up for bases 2, 4, 8, 16, ...
+def bin_to_radix(x, xbits, base, bdigits):
+    """Changes radix of a fixed-point number; i.e., converts
+    x * 2**xbits to floor(x * 10**bdigits)."""
+    return x * (MPZ(base)**bdigits) >> xbits
+stddigits = '0123456789abcdefghijklmnopqrstuvwxyz'
+def small_numeral(n, base=10, digits=stddigits):
+    """Return the string numeral of a positive integer in an arbitrary
+    base. Most efficient for small input."""
+    if base == 10:
+        return str(n)
+    digs = []
+    while n:
+        n, digit = divmod(n, base)
+        digs.append(digits[digit])
+    return "".join(digs[::-1])
+def numeral_python(n, base=10, size=0, digits=stddigits):
+    """Represent the integer n as a string of digits in the given base.
+    Recursive division is used to make this function about 3x faster
+    than Python's str() for converting integers to decimal strings.
+    The 'size' parameters specifies the number of digits in n; this
+    number is only used to determine splitting points and need not be
+    exact."""
+    if n <= 0:
+        if not n:
+            return "0"
+        return "-" + numeral(-n, base, size, digits)
+    # Fast enough to do directly
+    if size < 250:
+        return small_numeral(n, base, digits)
+    # Divide in half
+    half = (size // 2) + (size & 1)
+    A, B = divmod(n, base**half)
+    ad = numeral(A, base, half, digits)
+    bd = numeral(B, base, half, digits).rjust(half, "0")
+    return ad + bd
+def numeral_gmpy(n, base=10, size=0, digits=stddigits):
+    """Represent the integer n as a string of digits in the given base.
+    Recursive division is used to make this function about 3x faster
+    than Python's str() for converting integers to decimal strings.
+    The 'size' parameters specifies the number of digits in n; this
+    number is only used to determine splitting points and need not be
+    exact."""
+    if n < 0:
+        return "-" + numeral(-n, base, size, digits)
+    # gmpy.digits() may cause a segmentation fault when trying to convert
+    # extremely large values to a string. The size limit may need to be
+    # adjusted on some platforms, but 1500000 works on Windows and Linux.
+    if size < 1500000:
+        return gmpy.digits(n, base)
+    # Divide in half
+    half = (size // 2) + (size & 1)
+    A, B = divmod(n, MPZ(base)**half)
+    ad = numeral(A, base, half, digits)
+    bd = numeral(B, base, half, digits).rjust(half, "0")
+    return ad + bd
+if BACKEND == "gmpy":
+    numeral = numeral_gmpy
+else:
+    numeral = numeral_python
+_1_800 = 1<<800
+_1_600 = 1<<600
+_1_400 = 1<<400
+_1_200 = 1<<200
+_1_100 = 1<<100
+_1_50 = 1<<50
+def isqrt_small_python(x):
+    """
+    Correctly (floor) rounded integer square root, using
+    division. Fast up to ~200 digits.
+    """
+    if not x:
+        return x
+    if x < _1_800:
+        # Exact with IEEE double precision arithmetic
+        if x < _1_50:
+            return int(x**0.5)
+        # Initial estimate can be any integer >= the true root; round up
+        r = int(x**0.5 * 1.00000000000001) + 1
+    else:
+        bc = bitcount(x)
+        n = bc//2
+        r = int((x>>(2*n-100))**0.5+2)<<(n-50)  # +2 is to round up
+    # The following iteration now precisely computes floor(sqrt(x))
+    # See e.g. Crandall & Pomerance, "Prime Numbers: A Computational
+    # Perspective"
+    while 1:
+        y = (r+x//r)>>1
+        if y >= r:
+            return r
+        r = y
+def isqrt_fast_python(x):
+    """
+    Fast approximate integer square root, computed using division-free
+    Newton iteration for large x. For random integers the result is almost
+    always correct (floor(sqrt(x))), but is 1 ulp too small with a roughly
+    0.1% probability. If x is very close to an exact square, the answer is
+    1 ulp wrong with high probability.
+    With 0 guard bits, the largest error over a set of 10^5 random
+    inputs of size 1-10^5 bits was 3 ulp. The use of 10 guard bits
+    almost certainly guarantees a max 1 ulp error.
+    """
+    # Use direct division-based iteration if sqrt(x) < 2^400
+    # Assume floating-point square root accurate to within 1 ulp, then:
+    # 0 Newton iterations good to 52 bits
+    # 1 Newton iterations good to 104 bits
+    # 2 Newton iterations good to 208 bits
+    # 3 Newton iterations good to 416 bits
+    if x < _1_800:
+        y = int(x**0.5)
+        if x >= _1_100:
+            y = (y + x//y) >> 1
+            if x >= _1_200:
+                y = (y + x//y) >> 1
+                if x >= _1_400:
+                    y = (y + x//y) >> 1
+        return y
+    bc = bitcount(x)
+    guard_bits = 10
+    x <<= 2*guard_bits
+    bc += 2*guard_bits
+    bc += (bc&1)
+    hbc = bc//2
+    startprec = min(50, hbc)
+    # Newton iteration for 1/sqrt(x), with floating-point starting value
+    r = int(2.0**(2*startprec) * (x >> (bc-2*startprec)) ** -0.5)
+    pp = startprec
+    for p in giant_steps(startprec, hbc):
+        # r**2, scaled from real size 2**(-bc) to 2**p
+        r2 = (r*r) >> (2*pp - p)
+        # x*r**2, scaled from real size ~1.0 to 2**p
+        xr2 = ((x >> (bc-p)) * r2) >> p
+        # New value of r, scaled from real size 2**(-bc/2) to 2**p
+        r = (r * ((3<<p) - xr2)) >> (pp+1)
+        pp = p
+    # (1/sqrt(x))*x = sqrt(x)
+    return (r*(x>>hbc)) >> (p+guard_bits)
+def sqrtrem_python(x):
+    """Correctly rounded integer (floor) square root with remainder."""
+    # to check cutoff:
+    # plot(lambda x: timing(isqrt, 2**int(x)), [0,2000])
+    if x < _1_600:
+        y = isqrt_small_python(x)
+        return y, x - y*y
+    y = isqrt_fast_python(x) + 1
+    rem = x - y*y
+    # Correct remainder
+    while rem < 0:
+        y -= 1
+        rem += (1+2*y)
+    else:
+        if rem:
+            while rem > 2*(1+y):
+                y += 1
+                rem -= (1+2*y)
+    return y, rem
+def isqrt_python(x):
+    """Integer square root with correct (floor) rounding."""
+    return sqrtrem_python(x)[0]
+def sqrt_fixed(x, prec):
+    return isqrt_fast(x<<prec)
+sqrt_fixed2 = sqrt_fixed
+if BACKEND == 'gmpy':
+    if gmpy.version() >= '2':
+        isqrt_small = isqrt_fast = isqrt = gmpy.isqrt
+        sqrtrem = gmpy.isqrt_rem
+    else:
+        isqrt_small = isqrt_fast = isqrt = gmpy.sqrt
+        sqrtrem = gmpy.sqrtrem
+elif BACKEND == 'sage':
+    isqrt_small = isqrt_fast = isqrt = \
+        getattr(sage_utils, "isqrt", lambda n: MPZ(n).isqrt())
+    sqrtrem = lambda n: MPZ(n).sqrtrem()
+else:
+    isqrt_small = isqrt_small_python
+    isqrt_fast = isqrt_fast_python
+    isqrt = isqrt_python
+    sqrtrem = sqrtrem_python
+def ifib(n, _cache={}):
+    """Computes the nth Fibonacci number as an integer, for
+    integer n."""
+    if n < 0:
+        return (-1)**(-n+1) * ifib(-n)
+    if n in _cache:
+        return _cache[n]
+    m = n
+    # Use Dijkstra's logarithmic algorithm
+    # The following implementation is basically equivalent to
+    # http://en.literateprograms.org/Fibonacci_numbers_(Scheme)
+    a, b, p, q = MPZ_ONE, MPZ_ZERO, MPZ_ZERO, MPZ_ONE
+    while n:
+        if n & 1:
+            aq = a*q
+            a, b = b*q+aq+a*p, b*p+aq
+            n -= 1
+        else:
+            qq = q*q
+            p, q = p*p+qq, qq+2*p*q
+            n >>= 1
+    if m < 250:
+        _cache[m] = b
+    return b
+MAX_FACTORIAL_CACHE = 1000
+def ifac(n, memo={0:1, 1:1}):
+    """Return n factorial (for integers n >= 0 only)."""
+    f = memo.get(n)
+    if f:
+        return f
+    k = len(memo)
+    p = memo[k-1]
+    MAX = MAX_FACTORIAL_CACHE
+    while k <= n:
+        p *= k
+        if k <= MAX:
+            memo[k] = p
+        k += 1
+    return p
+def ifac2(n, memo_pair=[{0:1}, {1:1}]):
+    """Return n!! (double factorial), integers n >= 0 only."""
+    memo = memo_pair[n&1]
+    f = memo.get(n)
+    if f:
+        return f
+    k = max(memo)
+    p = memo[k]
+    MAX = MAX_FACTORIAL_CACHE
+    while k < n:
+        k += 2
+        p *= k
+        if k <= MAX:
+            memo[k] = p
+    return p
+if BACKEND == 'gmpy':
+    ifac = gmpy.fac
+elif BACKEND == 'sage':
+    ifac = lambda n: int(sage.factorial(n))
+    ifib = sage.fibonacci
+def list_primes(n):
+    n = n + 1
+    sieve = list(xrange(n))
+    sieve[:2] = [0, 0]
+    for i in xrange(2, int(n**0.5)+1):
+        if sieve[i]:
+            for j in xrange(i**2, n, i):
+                sieve[j] = 0
+    return [p for p in sieve if p]
+if BACKEND == 'sage':
+    # Note: it is *VERY* important for performance that we convert
+    # the list to Python ints.
+    def list_primes(n):
+        return [int(_) for _ in sage.primes(n+1)]
+small_odd_primes = (3,5,7,11,13,17,19,23,29,31,37,41,43,47)
+small_odd_primes_set = set(small_odd_primes)
+def isprime(n):
+    """
+    Determines whether n is a prime number. A probabilistic test is
+    performed if n is very large. No special trick is used for detecting
+    perfect powers.
+        >>> sum(list_primes(100000))
+        454396537
+        >>> sum(n*isprime(n) for n in range(100000))
+        454396537
+    """
+    n = int(n)
+    if not n & 1:
+        return n == 2
+    if n < 50:
+        return n in small_odd_primes_set
+    for p in small_odd_primes:
+        if not n % p:
+            return False
+    m = n-1
+    s = trailing(m)
+    d = m >> s
+    def test(a):
+        x = pow(a,d,n)
+        if x == 1 or x == m:
+            return True
+        for r in xrange(1,s):
+            x = x**2 % n
+            if x == m:
+                return True
+        return False
+    # See http://primes.utm.edu/prove/prove2_3.html
+    if n < 1373653:
+        witnesses = [2,3]
+    elif n < 341550071728321:
+        witnesses = [2,3,5,7,11,13,17]
+    else:
+        witnesses = small_odd_primes
+    for a in witnesses:
+        if not test(a):
+            return False
+    return True
+def moebius(n):
+    """
+    Evaluates the Moebius function which is `mu(n) = (-1)^k` if `n`
+    is a product of `k` distinct primes and `mu(n) = 0` otherwise.
+    TODO: speed up using factorization
+    """
+    n = abs(int(n))
+    if n < 2:
+        return n
+    factors = []
+    for p in xrange(2, n+1):
+        if not (n % p):
+            if not (n % p**2):
+                return 0
+            if not sum(p % f for f in factors):
+                factors.append(p)
+    return (-1)**len(factors)
+def gcd(*args):
+    a = 0
+    for b in args:
+        if a:
+            while b:
+                a, b = b, a % b
+        else:
+            a = b
+    return a
+#  Comment by Juan Arias de Reyna:
+#
+#  I learn this method to compute EulerE[2n] from van de Lune.
+#
+#  We apply the formula   EulerE[2n] = (-1)^n 2**(-2n) sum_{j=0}^n a(2n,2j+1)
+#
+#  where the numbers a(n,j) vanish for  j > n+1 or j <= -1  and satisfies
+#
+#  a(0,-1) = a(0,0) = 0;  a(0,1)= 1; a(0,2) = a(0,3) = 0
+#
+#  a(n,j) = a(n-1,j)                              when n+j is even
+#  a(n,j) = (j-1) a(n-1,j-1) + (j+1) a(n-1,j+1)   when n+j is odd
+#
+#
+#  But we can use only one array unidimensional a(j) since to compute
+#  a(n,j) we only need to know a(n-1,k) where k and j are of different parity
+#  and we have not to conserve the used values.
+#
+#  We cached up the values of Euler numbers to sufficiently high order.
+#
+#  Important Observation: If we pretend to use the numbers
+#     EulerE[1], EulerE[2], ... , EulerE[n]
+#     it is convenient to compute first EulerE[n], since the algorithm
+#     computes first all
+#     the previous ones, and keeps them in the CACHE
+MAX_EULER_CACHE = 500
+def eulernum(m, _cache={0:MPZ_ONE}):
+    r"""
+    Computes the Euler numbers `E(n)`, which can be defined as
+    coefficients of the Taylor expansion of `1/cosh x`:
+    .. math ::
+        \frac{1}{\cosh x} = \sum_{n=0}^\infty \frac{E_n}{n!} x^n
+    Example::
+        >>> [int(eulernum(n)) for n in range(11)]
+        [1, 0, -1, 0, 5, 0, -61, 0, 1385, 0, -50521]
+        >>> [int(eulernum(n)) for n in range(11)]   # test cache
+        [1, 0, -1, 0, 5, 0, -61, 0, 1385, 0, -50521]
+    """
+    # for odd m > 1, the Euler numbers are zero
+    if m & 1:
+        return MPZ_ZERO
+    f = _cache.get(m)
+    if f:
+        return f
+    MAX = MAX_EULER_CACHE
+    n = m
+    a = [MPZ(_) for _ in [0,0,1,0,0,0]]
+    for  n in range(1, m+1):
+        for j in range(n+1, -1, -2):
+            a[j+1] = (j-1)*a[j] + (j+1)*a[j+2]
+        a.append(0)
+        suma = 0
+        for k in range(n+1, -1, -2):
+            suma += a[k+1]
+            if n <= MAX:
+                _cache[n] = ((-1)**(n//2))*(suma // 2**n)
+        if n == m:
+            return ((-1)**(n//2))*suma // 2**n
+def stirling1(n, k):
+    """
+    Stirling number of the first kind.
+    """
+    if n < 0 or k < 0:
+        raise ValueError
+    if k >= n:
+        return MPZ(n == k)
+    if k < 1:
+        return MPZ_ZERO
+    L = [MPZ_ZERO] * (k+1)
+    L[1] = MPZ_ONE
+    for m in xrange(2, n+1):
+        for j in xrange(min(k, m), 0, -1):
+            L[j] = (m-1) * L[j] + L[j-1]
+    return (-1)**(n+k) * L[k]
+def stirling2(n, k):
+    """
+    Stirling number of the second kind.
+    """
+    if n < 0 or k < 0:
+        raise ValueError
+    if k >= n:
+        return MPZ(n == k)
+    if k <= 1:
+        return MPZ(k == 1)
+    s = MPZ_ZERO
+    t = MPZ_ONE
+    for j in xrange(k+1):
+        if (k + j) & 1:
+            s -= t * MPZ(j)**n
+        else:
+            s += t * MPZ(j)**n
+        t = t * (k - j) // (j + 1)
+    return s // ifac(k)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libmpc.py ADDED Viewed

	@@ -0,0 +1,835 @@

+"""
+Low-level functions for complex arithmetic.
+"""
+import sys
+from .backend import MPZ, MPZ_ZERO, MPZ_ONE, MPZ_TWO, BACKEND
+from .libmpf import (\
+    round_floor, round_ceiling, round_down, round_up,
+    round_nearest, round_fast, bitcount,
+    bctable, normalize, normalize1, reciprocal_rnd, rshift, lshift, giant_steps,
+    negative_rnd,
+    to_str, to_fixed, from_man_exp, from_float, to_float, from_int, to_int,
+    fzero, fone, ftwo, fhalf, finf, fninf, fnan, fnone,
+    mpf_abs, mpf_pos, mpf_neg, mpf_add, mpf_sub, mpf_mul,
+    mpf_div, mpf_mul_int, mpf_shift, mpf_sqrt, mpf_hypot,
+    mpf_rdiv_int, mpf_floor, mpf_ceil, mpf_nint, mpf_frac,
+    mpf_sign, mpf_hash,
+    ComplexResult
+)
+from .libelefun import (\
+    mpf_pi, mpf_exp, mpf_log, mpf_cos_sin, mpf_cosh_sinh, mpf_tan, mpf_pow_int,
+    mpf_log_hypot,
+    mpf_cos_sin_pi, mpf_phi,
+    mpf_cos, mpf_sin, mpf_cos_pi, mpf_sin_pi,
+    mpf_atan, mpf_atan2, mpf_cosh, mpf_sinh, mpf_tanh,
+    mpf_asin, mpf_acos, mpf_acosh, mpf_nthroot, mpf_fibonacci
+)
+# An mpc value is a (real, imag) tuple
+mpc_one = fone, fzero
+mpc_zero = fzero, fzero
+mpc_two = ftwo, fzero
+mpc_half = (fhalf, fzero)
+_infs = (finf, fninf)
+_infs_nan = (finf, fninf, fnan)
+def mpc_is_inf(z):
+    """Check if either real or imaginary part is infinite"""
+    re, im = z
+    if re in _infs: return True
+    if im in _infs: return True
+    return False
+def mpc_is_infnan(z):
+    """Check if either real or imaginary part is infinite or nan"""
+    re, im = z
+    if re in _infs_nan: return True
+    if im in _infs_nan: return True
+    return False
+def mpc_to_str(z, dps, **kwargs):
+    re, im = z
+    rs = to_str(re, dps)
+    if im[0]:
+        return rs + " - " + to_str(mpf_neg(im), dps, **kwargs) + "j"
+    else:
+        return rs + " + " + to_str(im, dps, **kwargs) + "j"
+def mpc_to_complex(z, strict=False, rnd=round_fast):
+    re, im = z
+    return complex(to_float(re, strict, rnd), to_float(im, strict, rnd))
+def mpc_hash(z):
+    if sys.version_info >= (3, 2):
+        re, im = z
+        h = mpf_hash(re) + sys.hash_info.imag * mpf_hash(im)
+        # Need to reduce either module 2^32 or 2^64
+        h = h % (2**sys.hash_info.width)
+        return int(h)
+    else:
+        try:
+            return hash(mpc_to_complex(z, strict=True))
+        except OverflowError:
+            return hash(z)
+def mpc_conjugate(z, prec, rnd=round_fast):
+    re, im = z
+    return re, mpf_neg(im, prec, rnd)
+def mpc_is_nonzero(z):
+    return z != mpc_zero
+def mpc_add(z, w, prec, rnd=round_fast):
+    a, b = z
+    c, d = w
+    return mpf_add(a, c, prec, rnd), mpf_add(b, d, prec, rnd)
+def mpc_add_mpf(z, x, prec, rnd=round_fast):
+    a, b = z
+    return mpf_add(a, x, prec, rnd), b
+def mpc_sub(z, w, prec=0, rnd=round_fast):
+    a, b = z
+    c, d = w
+    return mpf_sub(a, c, prec, rnd), mpf_sub(b, d, prec, rnd)
+def mpc_sub_mpf(z, p, prec=0, rnd=round_fast):
+    a, b = z
+    return mpf_sub(a, p, prec, rnd), b
+def mpc_pos(z, prec, rnd=round_fast):
+    a, b = z
+    return mpf_pos(a, prec, rnd), mpf_pos(b, prec, rnd)
+def mpc_neg(z, prec=None, rnd=round_fast):
+    a, b = z
+    return mpf_neg(a, prec, rnd), mpf_neg(b, prec, rnd)
+def mpc_shift(z, n):
+    a, b = z
+    return mpf_shift(a, n), mpf_shift(b, n)
+def mpc_abs(z, prec, rnd=round_fast):
+    """Absolute value of a complex number, |a+bi|.
+    Returns an mpf value."""
+    a, b = z
+    return mpf_hypot(a, b, prec, rnd)
+def mpc_arg(z, prec, rnd=round_fast):
+    """Argument of a complex number. Returns an mpf value."""
+    a, b = z
+    return mpf_atan2(b, a, prec, rnd)
+def mpc_floor(z, prec, rnd=round_fast):
+    a, b = z
+    return mpf_floor(a, prec, rnd), mpf_floor(b, prec, rnd)
+def mpc_ceil(z, prec, rnd=round_fast):
+    a, b = z
+    return mpf_ceil(a, prec, rnd), mpf_ceil(b, prec, rnd)
+def mpc_nint(z, prec, rnd=round_fast):
+    a, b = z
+    return mpf_nint(a, prec, rnd), mpf_nint(b, prec, rnd)
+def mpc_frac(z, prec, rnd=round_fast):
+    a, b = z
+    return mpf_frac(a, prec, rnd), mpf_frac(b, prec, rnd)
+def mpc_mul(z, w, prec, rnd=round_fast):
+    """
+    Complex multiplication.
+    Returns the real and imaginary part of (a+bi)*(c+di), rounded to
+    the specified precision. The rounding mode applies to the real and
+    imaginary parts separately.
+    """
+    a, b = z
+    c, d = w
+    p = mpf_mul(a, c)
+    q = mpf_mul(b, d)
+    r = mpf_mul(a, d)
+    s = mpf_mul(b, c)
+    re = mpf_sub(p, q, prec, rnd)
+    im = mpf_add(r, s, prec, rnd)
+    return re, im
+def mpc_square(z, prec, rnd=round_fast):
+    # (a+b*I)**2 == a**2 - b**2 + 2*I*a*b
+    a, b = z
+    p = mpf_mul(a,a)
+    q = mpf_mul(b,b)
+    r = mpf_mul(a,b, prec, rnd)
+    re = mpf_sub(p, q, prec, rnd)
+    im = mpf_shift(r, 1)
+    return re, im
+def mpc_mul_mpf(z, p, prec, rnd=round_fast):
+    a, b = z
+    re = mpf_mul(a, p, prec, rnd)
+    im = mpf_mul(b, p, prec, rnd)
+    return re, im
+def mpc_mul_imag_mpf(z, x, prec, rnd=round_fast):
+    """
+    Multiply the mpc value z by I*x where x is an mpf value.
+    """
+    a, b = z
+    re = mpf_neg(mpf_mul(b, x, prec, rnd))
+    im = mpf_mul(a, x, prec, rnd)
+    return re, im
+def mpc_mul_int(z, n, prec, rnd=round_fast):
+    a, b = z
+    re = mpf_mul_int(a, n, prec, rnd)
+    im = mpf_mul_int(b, n, prec, rnd)
+    return re, im
+def mpc_div(z, w, prec, rnd=round_fast):
+    a, b = z
+    c, d = w
+    wp = prec + 10
+    # mag = c*c + d*d
+    mag = mpf_add(mpf_mul(c, c), mpf_mul(d, d), wp)
+    # (a*c+b*d)/mag, (b*c-a*d)/mag
+    t = mpf_add(mpf_mul(a,c), mpf_mul(b,d), wp)
+    u = mpf_sub(mpf_mul(b,c), mpf_mul(a,d), wp)
+    return mpf_div(t,mag,prec,rnd), mpf_div(u,mag,prec,rnd)
+def mpc_div_mpf(z, p, prec, rnd=round_fast):
+    """Calculate z/p where p is real"""
+    a, b = z
+    re = mpf_div(a, p, prec, rnd)
+    im = mpf_div(b, p, prec, rnd)
+    return re, im
+def mpc_reciprocal(z, prec, rnd=round_fast):
+    """Calculate 1/z efficiently"""
+    a, b = z
+    m = mpf_add(mpf_mul(a,a),mpf_mul(b,b),prec+10)
+    re = mpf_div(a, m, prec, rnd)
+    im = mpf_neg(mpf_div(b, m, prec, rnd))
+    return re, im
+def mpc_mpf_div(p, z, prec, rnd=round_fast):
+    """Calculate p/z where p is real efficiently"""
+    a, b = z
+    m = mpf_add(mpf_mul(a,a),mpf_mul(b,b), prec+10)
+    re = mpf_div(mpf_mul(a,p), m, prec, rnd)
+    im = mpf_div(mpf_neg(mpf_mul(b,p)), m, prec, rnd)
+    return re, im
+def complex_int_pow(a, b, n):
+    """Complex integer power: computes (a+b*I)**n exactly for
+    nonnegative n (a and b must be Python ints)."""
+    wre = 1
+    wim = 0
+    while n:
+        if n & 1:
+            wre, wim = wre*a - wim*b, wim*a + wre*b
+            n -= 1
+        a, b = a*a - b*b, 2*a*b
+        n //= 2
+    return wre, wim
+def mpc_pow(z, w, prec, rnd=round_fast):
+    if w[1] == fzero:
+        return mpc_pow_mpf(z, w[0], prec, rnd)
+    return mpc_exp(mpc_mul(mpc_log(z, prec+10), w, prec+10), prec, rnd)
+def mpc_pow_mpf(z, p, prec, rnd=round_fast):
+    psign, pman, pexp, pbc = p
+    if pexp >= 0:
+        return mpc_pow_int(z, (-1)**psign * (pman<<pexp), prec, rnd)
+    if pexp == -1:
+        sqrtz = mpc_sqrt(z, prec+10)
+        return mpc_pow_int(sqrtz, (-1)**psign * pman, prec, rnd)
+    return mpc_exp(mpc_mul_mpf(mpc_log(z, prec+10), p, prec+10), prec, rnd)
+def mpc_pow_int(z, n, prec, rnd=round_fast):
+    a, b = z
+    if b == fzero:
+        return mpf_pow_int(a, n, prec, rnd), fzero
+    if a == fzero:
+        v = mpf_pow_int(b, n, prec, rnd)
+        n %= 4
+        if n == 0:
+            return v, fzero
+        elif n == 1:
+            return fzero, v
+        elif n == 2:
+            return mpf_neg(v), fzero
+        elif n == 3:
+            return fzero, mpf_neg(v)
+    if n == 0: return mpc_one
+    if n == 1: return mpc_pos(z, prec, rnd)
+    if n == 2: return mpc_square(z, prec, rnd)
+    if n == -1: return mpc_reciprocal(z, prec, rnd)
+    if n < 0: return mpc_reciprocal(mpc_pow_int(z, -n, prec+4), prec, rnd)
+    asign, aman, aexp, abc = a
+    bsign, bman, bexp, bbc = b
+    if asign: aman = -aman
+    if bsign: bman = -bman
+    de = aexp - bexp
+    abs_de = abs(de)
+    exact_size = n*(abs_de + max(abc, bbc))
+    if exact_size < 10000:
+        if de > 0:
+            aman <<= de
+            aexp = bexp
+        else:
+            bman <<= (-de)
+            bexp = aexp
+        re, im = complex_int_pow(aman, bman, n)
+        re = from_man_exp(re, int(n*aexp), prec, rnd)
+        im = from_man_exp(im, int(n*bexp), prec, rnd)
+        return re, im
+    return mpc_exp(mpc_mul_int(mpc_log(z, prec+10), n, prec+10), prec, rnd)
+def mpc_sqrt(z, prec, rnd=round_fast):
+    """Complex square root (principal branch).
+    We have sqrt(a+bi) = sqrt((r+a)/2) + b/sqrt(2*(r+a))*i where
+    r = abs(a+bi), when a+bi is not a negative real number."""
+    a, b = z
+    if b == fzero:
+        if a == fzero:
+            return (a, b)
+        # When a+bi is a negative real number, we get a real sqrt times i
+        if a[0]:
+            im = mpf_sqrt(mpf_neg(a), prec, rnd)
+            return (fzero, im)
+        else:
+            re = mpf_sqrt(a, prec, rnd)
+            return (re, fzero)
+    wp = prec+20
+    if not a[0]:                               # case a positive
+        t  = mpf_add(mpc_abs((a, b), wp), a, wp)  # t = abs(a+bi) + a
+        u = mpf_shift(t, -1)                      # u = t/2
+        re = mpf_sqrt(u, prec, rnd)               # re = sqrt(u)
+        v = mpf_shift(t, 1)                       # v = 2*t
+        w  = mpf_sqrt(v, wp)                      # w = sqrt(v)
+        im = mpf_div(b, w, prec, rnd)             # im = b / w
+    else:                                      # case a negative
+        t = mpf_sub(mpc_abs((a, b), wp), a, wp)   # t = abs(a+bi) - a
+        u = mpf_shift(t, -1)                      # u = t/2
+        im = mpf_sqrt(u, prec, rnd)               # im = sqrt(u)
+        v = mpf_shift(t, 1)                       # v = 2*t
+        w  = mpf_sqrt(v, wp)                      # w = sqrt(v)
+        re = mpf_div(b, w, prec, rnd)             # re = b/w
+        if b[0]:
+            re = mpf_neg(re)
+            im = mpf_neg(im)
+    return re, im
+def mpc_nthroot_fixed(a, b, n, prec):
+    # a, b signed integers at fixed precision prec
+    start = 50
+    a1 = int(rshift(a, prec - n*start))
+    b1 = int(rshift(b, prec - n*start))
+    try:
+        r = (a1 + 1j * b1)**(1.0/n)
+        re = r.real
+        im = r.imag
+        re = MPZ(int(re))
+        im = MPZ(int(im))
+    except OverflowError:
+        a1 = from_int(a1, start)
+        b1 = from_int(b1, start)
+        fn = from_int(n)
+        nth = mpf_rdiv_int(1, fn, start)
+        re, im = mpc_pow((a1, b1), (nth, fzero), start)
+        re = to_int(re)
+        im = to_int(im)
+    extra = 10
+    prevp = start
+    extra1 = n
+    for p in giant_steps(start, prec+extra):
+        # this is slow for large n, unlike int_pow_fixed
+        re2, im2 = complex_int_pow(re, im, n-1)
+        re2 = rshift(re2, (n-1)*prevp - p - extra1)
+        im2 = rshift(im2, (n-1)*prevp - p - extra1)
+        r4 = (re2*re2 + im2*im2) >> (p + extra1)
+        ap = rshift(a, prec - p)
+        bp = rshift(b, prec - p)
+        rec = (ap * re2 + bp * im2) >> p
+        imc = (-ap * im2 + bp * re2) >> p
+        reb = (rec << p) // r4
+        imb = (imc << p) // r4
+        re = (reb + (n-1)*lshift(re, p-prevp))//n
+        im = (imb + (n-1)*lshift(im, p-prevp))//n
+        prevp = p
+    return re, im
+def mpc_nthroot(z, n, prec, rnd=round_fast):
+    """
+    Complex n-th root.
+    Use Newton method as in the real case when it is faster,
+    otherwise use z**(1/n)
+    """
+    a, b = z
+    if a[0] == 0 and b == fzero:
+        re = mpf_nthroot(a, n, prec, rnd)
+        return (re, fzero)
+    if n < 2:
+        if n == 0:
+            return mpc_one
+        if n == 1:
+            return mpc_pos((a, b), prec, rnd)
+        if n == -1:
+            return mpc_div(mpc_one, (a, b), prec, rnd)
+        inverse = mpc_nthroot((a, b), -n, prec+5, reciprocal_rnd[rnd])
+        return mpc_div(mpc_one, inverse, prec, rnd)
+    if n <= 20:
+        prec2 = int(1.2 * (prec + 10))
+        asign, aman, aexp, abc = a
+        bsign, bman, bexp, bbc = b
+        pf = mpc_abs((a,b), prec)
+        if pf[-2] + pf[-1] > -10  and pf[-2] + pf[-1] < prec:
+            af = to_fixed(a, prec2)
+            bf = to_fixed(b, prec2)
+            re, im = mpc_nthroot_fixed(af, bf, n, prec2)
+            extra = 10
+            re = from_man_exp(re, -prec2-extra, prec2, rnd)
+            im = from_man_exp(im, -prec2-extra, prec2, rnd)
+            return re, im
+    fn = from_int(n)
+    prec2 = prec+10 + 10
+    nth = mpf_rdiv_int(1, fn, prec2)
+    re, im = mpc_pow((a, b), (nth, fzero), prec2, rnd)
+    re = normalize(re[0], re[1], re[2], re[3], prec, rnd)
+    im = normalize(im[0], im[1], im[2], im[3], prec, rnd)
+    return re, im
+def mpc_cbrt(z, prec, rnd=round_fast):
+    """
+    Complex cubic root.
+    """
+    return mpc_nthroot(z, 3, prec, rnd)
+def mpc_exp(z, prec, rnd=round_fast):
+    """
+    Complex exponential function.
+    We use the direct formula exp(a+bi) = exp(a) * (cos(b) + sin(b)*i)
+    for the computation. This formula is very nice because it is
+    pefectly stable; since we just do real multiplications, the only
+    numerical errors that can creep in are single-ulp rounding errors.
+    The formula is efficient since mpmath's real exp is quite fast and
+    since we can compute cos and sin simultaneously.
+    It is no problem if a and b are large; if the implementations of
+    exp/cos/sin are accurate and efficient for all real numbers, then
+    so is this function for all complex numbers.
+    """
+    a, b = z
+    if a == fzero:
+        return mpf_cos_sin(b, prec, rnd)
+    if b == fzero:
+        return mpf_exp(a, prec, rnd), fzero
+    mag = mpf_exp(a, prec+4, rnd)
+    c, s = mpf_cos_sin(b, prec+4, rnd)
+    re = mpf_mul(mag, c, prec, rnd)
+    im = mpf_mul(mag, s, prec, rnd)
+    return re, im
+def mpc_log(z, prec, rnd=round_fast):
+    re = mpf_log_hypot(z[0], z[1], prec, rnd)
+    im = mpc_arg(z, prec, rnd)
+    return re, im
+def mpc_cos(z, prec, rnd=round_fast):
+    """Complex cosine. The formula used is cos(a+bi) = cos(a)*cosh(b) -
+    sin(a)*sinh(b)*i.
+    The same comments apply as for the complex exp: only real
+    multiplications are pewrormed, so no cancellation errors are
+    possible. The formula is also efficient since we can compute both
+    pairs (cos, sin) and (cosh, sinh) in single stwps."""
+    a, b = z
+    if b == fzero:
+        return mpf_cos(a, prec, rnd), fzero
+    if a == fzero:
+        return mpf_cosh(b, prec, rnd), fzero
+    wp = prec + 6
+    c, s = mpf_cos_sin(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    re = mpf_mul(c, ch, prec, rnd)
+    im = mpf_mul(s, sh, prec, rnd)
+    return re, mpf_neg(im)
+def mpc_sin(z, prec, rnd=round_fast):
+    """Complex sine. We have sin(a+bi) = sin(a)*cosh(b) +
+    cos(a)*sinh(b)*i. See the docstring for mpc_cos for additional
+    comments."""
+    a, b = z
+    if b == fzero:
+        return mpf_sin(a, prec, rnd), fzero
+    if a == fzero:
+        return fzero, mpf_sinh(b, prec, rnd)
+    wp = prec + 6
+    c, s = mpf_cos_sin(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    re = mpf_mul(s, ch, prec, rnd)
+    im = mpf_mul(c, sh, prec, rnd)
+    return re, im
+def mpc_tan(z, prec, rnd=round_fast):
+    """Complex tangent. Computed as tan(a+bi) = sin(2a)/M + sinh(2b)/M*i
+    where M = cos(2a) + cosh(2b)."""
+    a, b = z
+    asign, aman, aexp, abc = a
+    bsign, bman, bexp, bbc = b
+    if b == fzero: return mpf_tan(a, prec, rnd), fzero
+    if a == fzero: return fzero, mpf_tanh(b, prec, rnd)
+    wp = prec + 15
+    a = mpf_shift(a, 1)
+    b = mpf_shift(b, 1)
+    c, s = mpf_cos_sin(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    # TODO: handle cancellation when c ~=  -1 and ch ~= 1
+    mag = mpf_add(c, ch, wp)
+    re = mpf_div(s, mag, prec, rnd)
+    im = mpf_div(sh, mag, prec, rnd)
+    return re, im
+def mpc_cos_pi(z, prec, rnd=round_fast):
+    a, b = z
+    if b == fzero:
+        return mpf_cos_pi(a, prec, rnd), fzero
+    b = mpf_mul(b, mpf_pi(prec+5), prec+5)
+    if a == fzero:
+        return mpf_cosh(b, prec, rnd), fzero
+    wp = prec + 6
+    c, s = mpf_cos_sin_pi(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    re = mpf_mul(c, ch, prec, rnd)
+    im = mpf_mul(s, sh, prec, rnd)
+    return re, mpf_neg(im)
+def mpc_sin_pi(z, prec, rnd=round_fast):
+    a, b = z
+    if b == fzero:
+        return mpf_sin_pi(a, prec, rnd), fzero
+    b = mpf_mul(b, mpf_pi(prec+5), prec+5)
+    if a == fzero:
+        return fzero, mpf_sinh(b, prec, rnd)
+    wp = prec + 6
+    c, s = mpf_cos_sin_pi(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    re = mpf_mul(s, ch, prec, rnd)
+    im = mpf_mul(c, sh, prec, rnd)
+    return re, im
+def mpc_cos_sin(z, prec, rnd=round_fast):
+    a, b = z
+    if a == fzero:
+        ch, sh = mpf_cosh_sinh(b, prec, rnd)
+        return (ch, fzero), (fzero, sh)
+    if b == fzero:
+        c, s = mpf_cos_sin(a, prec, rnd)
+        return (c, fzero), (s, fzero)
+    wp = prec + 6
+    c, s = mpf_cos_sin(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    cre = mpf_mul(c, ch, prec, rnd)
+    cim = mpf_mul(s, sh, prec, rnd)
+    sre = mpf_mul(s, ch, prec, rnd)
+    sim = mpf_mul(c, sh, prec, rnd)
+    return (cre, mpf_neg(cim)), (sre, sim)
+def mpc_cos_sin_pi(z, prec, rnd=round_fast):
+    a, b = z
+    if b == fzero:
+        c, s = mpf_cos_sin_pi(a, prec, rnd)
+        return (c, fzero), (s, fzero)
+    b = mpf_mul(b, mpf_pi(prec+5), prec+5)
+    if a == fzero:
+        ch, sh = mpf_cosh_sinh(b, prec, rnd)
+        return (ch, fzero), (fzero, sh)
+    wp = prec + 6
+    c, s = mpf_cos_sin_pi(a, wp)
+    ch, sh = mpf_cosh_sinh(b, wp)
+    cre = mpf_mul(c, ch, prec, rnd)
+    cim = mpf_mul(s, sh, prec, rnd)
+    sre = mpf_mul(s, ch, prec, rnd)
+    sim = mpf_mul(c, sh, prec, rnd)
+    return (cre, mpf_neg(cim)), (sre, sim)
+def mpc_cosh(z, prec, rnd=round_fast):
+    """Complex hyperbolic cosine. Computed as cosh(z) = cos(z*i)."""
+    a, b = z
+    return mpc_cos((b, mpf_neg(a)), prec, rnd)
+def mpc_sinh(z, prec, rnd=round_fast):
+    """Complex hyperbolic sine. Computed as sinh(z) = -i*sin(z*i)."""
+    a, b = z
+    b, a = mpc_sin((b, a), prec, rnd)
+    return a, b
+def mpc_tanh(z, prec, rnd=round_fast):
+    """Complex hyperbolic tangent. Computed as tanh(z) = -i*tan(z*i)."""
+    a, b = z
+    b, a = mpc_tan((b, a), prec, rnd)
+    return a, b
+# TODO: avoid loss of accuracy
+def mpc_atan(z, prec, rnd=round_fast):
+    a, b = z
+    # atan(z) = (I/2)*(log(1-I*z) - log(1+I*z))
+    # x = 1-I*z = 1 + b - I*a
+    # y = 1+I*z = 1 - b + I*a
+    wp = prec + 15
+    x = mpf_add(fone, b, wp), mpf_neg(a)
+    y = mpf_sub(fone, b, wp), a
+    l1 = mpc_log(x, wp)
+    l2 = mpc_log(y, wp)
+    a, b = mpc_sub(l1, l2, prec, rnd)
+    # (I/2) * (a+b*I) = (-b/2 + a/2*I)
+    v = mpf_neg(mpf_shift(b,-1)), mpf_shift(a,-1)
+    # Subtraction at infinity gives correct real part but
+    # wrong imaginary part (should be zero)
+    if v[1] == fnan and mpc_is_inf(z):
+        v = (v[0], fzero)
+    return v
+beta_crossover = from_float(0.6417)
+alpha_crossover = from_float(1.5)
+def acos_asin(z, prec, rnd, n):
+    """ complex acos for n = 0, asin for n = 1
+    The algorithm is described in
+    T.E. Hull, T.F. Fairgrieve and P.T.P. Tang
+    'Implementing the Complex Arcsine and Arcosine Functions
+    using Exception Handling',
+    ACM Trans. on Math. Software Vol. 23 (1997), p299
+    The complex acos and asin can be defined as
+    acos(z) = acos(beta) - I*sign(a)* log(alpha + sqrt(alpha**2 -1))
+    asin(z) = asin(beta) + I*sign(a)* log(alpha + sqrt(alpha**2 -1))
+    where z = a + I*b
+    alpha = (1/2)*(r + s); beta = (1/2)*(r - s) = a/alpha
+    r = sqrt((a+1)**2 + y**2); s = sqrt((a-1)**2 + y**2)
+    These expressions are rewritten in different ways in different
+    regions, delimited by two crossovers alpha_crossover and beta_crossover,
+    and by abs(a) <= 1, in order to improve the numerical accuracy.
+    """
+    a, b = z
+    wp = prec + 10
+    # special cases with real argument
+    if b == fzero:
+        am = mpf_sub(fone, mpf_abs(a), wp)
+        # case abs(a) <= 1
+        if not am[0]:
+            if n == 0:
+                return mpf_acos(a, prec, rnd), fzero
+            else:
+                return mpf_asin(a, prec, rnd), fzero
+        # cases abs(a) > 1
+        else:
+            # case a < -1
+            if a[0]:
+                pi = mpf_pi(prec, rnd)
+                c = mpf_acosh(mpf_neg(a), prec, rnd)
+                if n == 0:
+                    return pi, mpf_neg(c)
+                else:
+                    return mpf_neg(mpf_shift(pi, -1)), c
+            # case a > 1
+            else:
+                c = mpf_acosh(a, prec, rnd)
+                if n == 0:
+                    return fzero, c
+                else:
+                    pi = mpf_pi(prec, rnd)
+                    return mpf_shift(pi, -1), mpf_neg(c)
+    asign = bsign = 0
+    if a[0]:
+        a = mpf_neg(a)
+        asign = 1
+    if b[0]:
+        b = mpf_neg(b)
+        bsign = 1
+    am = mpf_sub(fone, a, wp)
+    ap = mpf_add(fone, a, wp)
+    r = mpf_hypot(ap, b, wp)
+    s = mpf_hypot(am, b, wp)
+    alpha = mpf_shift(mpf_add(r, s, wp), -1)
+    beta = mpf_div(a, alpha, wp)
+    b2 = mpf_mul(b,b, wp)
+    # case beta <= beta_crossover
+    if not mpf_sub(beta_crossover, beta, wp)[0]:
+        if n == 0:
+            re = mpf_acos(beta, wp)
+        else:
+            re = mpf_asin(beta, wp)
+    else:
+        # to compute the real part in this region use the identity
+        # asin(beta) = atan(beta/sqrt(1-beta**2))
+        # beta/sqrt(1-beta**2) = (alpha + a) * (alpha - a)
+        # alpha + a is numerically accurate; alpha - a can have
+        # cancellations leading to numerical inaccuracies, so rewrite
+        # it in differente ways according to the region
+        Ax = mpf_add(alpha, a, wp)
+        # case a <= 1
+        if not am[0]:
+            # c = b*b/(r + (a+1)); d = (s + (1-a))
+            # alpha - a = (1/2)*(c + d)
+            # case n=0: re = atan(sqrt((1/2) * Ax * (c + d))/a)
+            # case n=1: re = atan(a/sqrt((1/2) * Ax * (c + d)))
+            c = mpf_div(b2, mpf_add(r, ap, wp), wp)
+            d = mpf_add(s, am, wp)
+            re = mpf_shift(mpf_mul(Ax, mpf_add(c, d, wp), wp), -1)
+            if n == 0:
+                re = mpf_atan(mpf_div(mpf_sqrt(re, wp), a, wp), wp)
+            else:
+                re = mpf_atan(mpf_div(a, mpf_sqrt(re, wp), wp), wp)
+        else:
+            # c = Ax/(r + (a+1)); d = Ax/(s - (1-a))
+            # alpha - a = (1/2)*(c + d)
+            # case n = 0: re = atan(b*sqrt(c + d)/2/a)
+            # case n = 1: re = atan(a/(b*sqrt(c + d)/2)
+            c = mpf_div(Ax, mpf_add(r, ap, wp), wp)
+            d = mpf_div(Ax, mpf_sub(s, am, wp), wp)
+            re = mpf_shift(mpf_add(c, d, wp), -1)
+            re = mpf_mul(b, mpf_sqrt(re, wp), wp)
+            if n == 0:
+                re = mpf_atan(mpf_div(re, a, wp), wp)
+            else:
+                re = mpf_atan(mpf_div(a, re, wp), wp)
+    # to compute alpha + sqrt(alpha**2 - 1), if alpha <= alpha_crossover
+    # replace it with 1 + Am1 + sqrt(Am1*(alpha+1)))
+    # where Am1 = alpha -1
+    # if alpha <= alpha_crossover:
+    if not mpf_sub(alpha_crossover, alpha, wp)[0]:
+        c1 = mpf_div(b2, mpf_add(r, ap, wp), wp)
+        # case a < 1
+        if mpf_neg(am)[0]:
+            # Am1 = (1/2) * (b*b/(r + (a+1)) + b*b/(s + (1-a))
+            c2 = mpf_add(s, am, wp)
+            c2 = mpf_div(b2, c2, wp)
+            Am1 = mpf_shift(mpf_add(c1, c2, wp), -1)
+        else:
+            # Am1 = (1/2) * (b*b/(r + (a+1)) + (s - (1-a)))
+            c2 = mpf_sub(s, am, wp)
+            Am1 = mpf_shift(mpf_add(c1, c2, wp), -1)
+        # im = log(1 + Am1 + sqrt(Am1*(alpha+1)))
+        im = mpf_mul(Am1, mpf_add(alpha, fone, wp), wp)
+        im = mpf_log(mpf_add(fone, mpf_add(Am1, mpf_sqrt(im, wp), wp), wp), wp)
+    else:
+        # im = log(alpha + sqrt(alpha*alpha - 1))
+        im = mpf_sqrt(mpf_sub(mpf_mul(alpha, alpha, wp), fone, wp), wp)
+        im = mpf_log(mpf_add(alpha, im, wp), wp)
+    if asign:
+        if n == 0:
+            re = mpf_sub(mpf_pi(wp), re, wp)
+        else:
+            re = mpf_neg(re)
+    if not bsign and n == 0:
+        im = mpf_neg(im)
+    if bsign and n == 1:
+        im = mpf_neg(im)
+    re = normalize(re[0], re[1], re[2], re[3], prec, rnd)
+    im = normalize(im[0], im[1], im[2], im[3], prec, rnd)
+    return re, im
+def mpc_acos(z, prec, rnd=round_fast):
+    return acos_asin(z, prec, rnd, 0)
+def mpc_asin(z, prec, rnd=round_fast):
+    return acos_asin(z, prec, rnd, 1)
+def mpc_asinh(z, prec, rnd=round_fast):
+    # asinh(z) = I * asin(-I z)
+    a, b = z
+    a, b =  mpc_asin((b, mpf_neg(a)), prec, rnd)
+    return mpf_neg(b), a
+def mpc_acosh(z, prec, rnd=round_fast):
+    # acosh(z) = -I * acos(z)   for Im(acos(z)) <= 0
+    #            +I * acos(z)   otherwise
+    a, b = mpc_acos(z, prec, rnd)
+    if b[0] or b == fzero:
+        return mpf_neg(b), a
+    else:
+        return b, mpf_neg(a)
+def mpc_atanh(z, prec, rnd=round_fast):
+    # atanh(z) = (log(1+z)-log(1-z))/2
+    wp = prec + 15
+    a = mpc_add(z, mpc_one, wp)
+    b = mpc_sub(mpc_one, z, wp)
+    a = mpc_log(a, wp)
+    b = mpc_log(b, wp)
+    v = mpc_shift(mpc_sub(a, b, wp), -1)
+    # Subtraction at infinity gives correct imaginary part but
+    # wrong real part (should be zero)
+    if v[0] == fnan and mpc_is_inf(z):
+        v = (fzero, v[1])
+    return v
+def mpc_fibonacci(z, prec, rnd=round_fast):
+    re, im = z
+    if im == fzero:
+        return (mpf_fibonacci(re, prec, rnd), fzero)
+    size = max(abs(re[2]+re[3]), abs(re[2]+re[3]))
+    wp = prec + size + 20
+    a = mpf_phi(wp)
+    b = mpf_add(mpf_shift(a, 1), fnone, wp)
+    u = mpc_pow((a, fzero), z, wp)
+    v = mpc_cos_pi(z, wp)
+    v = mpc_div(v, u, wp)
+    u = mpc_sub(u, v, wp)
+    u = mpc_div_mpf(u, b, prec, rnd)
+    return u
+def mpf_expj(x, prec, rnd='f'):
+    raise ComplexResult
+def mpc_expj(z, prec, rnd='f'):
+    re, im = z
+    if im == fzero:
+        return mpf_cos_sin(re, prec, rnd)
+    if re == fzero:
+        return mpf_exp(mpf_neg(im), prec, rnd), fzero
+    ey = mpf_exp(mpf_neg(im), prec+10)
+    c, s = mpf_cos_sin(re, prec+10)
+    re = mpf_mul(ey, c, prec, rnd)
+    im = mpf_mul(ey, s, prec, rnd)
+    return re, im
+def mpf_expjpi(x, prec, rnd='f'):
+    raise ComplexResult
+def mpc_expjpi(z, prec, rnd='f'):
+    re, im = z
+    if im == fzero:
+        return mpf_cos_sin_pi(re, prec, rnd)
+    sign, man, exp, bc = im
+    wp = prec+10
+    if man:
+        wp += max(0, exp+bc)
+    im = mpf_neg(mpf_mul(mpf_pi(wp), im, wp))
+    if re == fzero:
+        return mpf_exp(im, prec, rnd), fzero
+    ey = mpf_exp(im, prec+10)
+    c, s = mpf_cos_sin_pi(re, prec+10)
+    re = mpf_mul(ey, c, prec, rnd)
+    im = mpf_mul(ey, s, prec, rnd)
+    return re, im
+if BACKEND == 'sage':
+    try:
+        import sage.libs.mpmath.ext_libmp as _lbmp
+        mpc_exp = _lbmp.mpc_exp
+        mpc_sqrt = _lbmp.mpc_sqrt
+    except (ImportError, AttributeError):
+        print("Warning: Sage imports in libmpc failed")

tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c96a8c60ccaff3dbe94603afb496582f94300b3dd5c8ec016ff0c7e71f975baf
+size 172649

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h ADDED Viewed

	@@ -0,0 +1,98 @@

+/*
+ * Copyright 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#include <cuda_stdint.h>
+#if !defined(_CUPTI_OPENACC_H_)
+#define _CUPTI_OPENACC_H_
+#ifndef CUPTIAPI
+#ifdef _WIN32
+#define CUPTIAPI __stdcall
+#else
+#define CUPTIAPI
+#endif
+#endif
+#if defined(__LP64__)
+#define CUPTILP64 1
+#elif defined(_WIN64)
+#define CUPTILP64 1
+#else
+#undef CUPTILP64
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \brief Initialize OpenACC support
+ *
+ * \param profRegister function of type acc_prof_reg as obtained from acc_register_library
+ * \param profUnregister function of type acc_prof_reg as obtained from acc_register_library
+ * \param profLookup function of type acc_prof_lookup as obtained from acc_register_library
+ */
+CUptiResult CUPTIAPI
+cuptiOpenACCInitialize(void *profRegister, void *profUnregister, void *profLookup);
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif /*_CUPTI_OPENACC_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_activity.h ADDED Viewed

The diff for this file is too large to render. See raw diff

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_events.h ADDED Viewed

	@@ -0,0 +1,1371 @@

+/*
+ * Copyright 2010-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(_CUPTI_EVENTS_H_)
+#define _CUPTI_EVENTS_H_
+#include <cuda.h>
+#include <string.h>
+#include <cuda_stdint.h>
+#include <cupti_result.h>
+#ifndef CUPTIAPI
+#ifdef _WIN32
+#define CUPTIAPI __stdcall
+#else
+#define CUPTIAPI
+#endif
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \defgroup CUPTI_EVENT_API CUPTI Event API
+ * Functions, types, and enums that implement the CUPTI Event API.
+ *
+ * \note CUPTI event API from the header cupti_events.h are not supported on devices
+ * with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
+ * These API will be deprecated in a future CUDA release. These are replaced by
+ * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
+ * in the headers nvperf_host.h and nvperf_target.h which are supported on
+ * devices with compute capability 7.0 and higher (i.e. Volta and later GPU
+ * architectures).
+ *
+ * @{
+ */
+/**
+ * \brief ID for an event.
+ *
+ * An event represents a countable activity, action, or occurrence on
+ * the device.
+ */
+typedef uint32_t CUpti_EventID;
+/**
+ * \brief ID for an event domain.
+ *
+ * ID for an event domain. An event domain represents a group of
+ * related events. A device may have multiple instances of a domain,
+ * indicating that the device can simultaneously record multiple
+ * instances of each event within that domain.
+ */
+typedef uint32_t CUpti_EventDomainID;
+/**
+ * \brief A group of events.
+ *
+ * An event group is a collection of events that are managed
+ * together. All events in an event group must belong to the same
+ * domain.
+ */
+typedef void *CUpti_EventGroup;
+/**
+ * \brief Device class.
+ *
+ * Enumeration of device classes for device attribute
+ * CUPTI_DEVICE_ATTR_DEVICE_CLASS.
+ */
+typedef enum {
+  CUPTI_DEVICE_ATTR_DEVICE_CLASS_TESLA              = 0,
+  CUPTI_DEVICE_ATTR_DEVICE_CLASS_QUADRO             = 1,
+  CUPTI_DEVICE_ATTR_DEVICE_CLASS_GEFORCE            = 2,
+  CUPTI_DEVICE_ATTR_DEVICE_CLASS_TEGRA              = 3,
+} CUpti_DeviceAttributeDeviceClass;
+/**
+ * \brief Device attributes.
+ *
+ * CUPTI device attributes. These attributes can be read using \ref
+ * cuptiDeviceGetAttribute.
+ */
+typedef enum {
+  /**
+   * Number of event IDs for a device. Value is a uint32_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_EVENT_ID                            = 1,
+  /**
+   * Number of event domain IDs for a device. Value is a uint32_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID                     = 2,
+  /**
+   * Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH                 = 3,
+  /**
+   * Get theoretical maximum number of instructions per cycle. Value
+   * is a uint32_t.
+   */
+  CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE                   = 4,
+  /**
+   * Get theoretical maximum number of single precision instructions
+   * that can be executed per second. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5,
+  /**
+   * Get number of frame buffers for device.  Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS                       = 6,
+  /**
+   * Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type
+   * is non-PCIE. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_PCIE_LINK_RATE                          = 7,
+  /**
+   * Get PCIE link width for device. Return 0 if bus-type
+   * is non-PCIE. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH                         = 8,
+  /**
+   * Get PCIE generation for device. Return 0 if bus-type
+   * is non-PCIE. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_PCIE_GEN                                = 9,
+  /**
+   * Get the class for the device. Value is a
+   * CUpti_DeviceAttributeDeviceClass.
+   */
+  CUPTI_DEVICE_ATTR_DEVICE_CLASS                            = 10,
+  /**
+   * Get the peak single precision flop per cycle. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE                       = 11,
+  /**
+   * Get the peak double precision flop per cycle. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE                       = 12,
+  /**
+   * Get number of L2 units. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_L2_UNITS                           = 13,
+  /**
+   * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED
+   * preference. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14,
+  /**
+   * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1
+   * preference. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15,
+  /**
+   * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL
+   * preference. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16,
+  /**
+   * Get the peak half precision flop per cycle. Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE                       = 17,
+  /**
+   * Check if Nvlink is connected to device. Returns 1, if at least one
+   * Nvlink is connected to the device, returns 0 otherwise.
+   * Value is a uint32_t.
+   */
+  CUPTI_DEVICE_ATTR_NVLINK_PRESENT                          = 18,
+    /**
+   * Check if Nvlink is present between GPU and CPU. Returns Bandwidth,
+   * in Bytes/sec, if Nvlink is present, returns 0 otherwise.
+   * Value is a uint64_t.
+   */
+  CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW                       = 19,
+  /**
+   * Check if NVSwitch is present in the underlying topology.
+   * Returns 1, if present, returns 0 otherwise.
+   * Value is a uint32_t.
+   */
+  CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT                        = 20,
+  CUPTI_DEVICE_ATTR_FORCE_INT                               = 0x7fffffff,
+} CUpti_DeviceAttribute;
+/**
+ * \brief Event domain attributes.
+ *
+ * Event domain attributes. Except where noted, all the attributes can
+ * be read using either \ref cuptiDeviceGetEventDomainAttribute or
+ * \ref cuptiEventDomainGetAttribute.
+ */
+typedef enum {
+  /**
+   * Event domain name. Value is a null terminated const c-string.
+   */
+  CUPTI_EVENT_DOMAIN_ATTR_NAME                 = 0,
+  /**
+   * Number of instances of the domain for which event counts will be
+   * collected.  The domain may have additional instances that cannot
+   * be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT).
+   * Can be read only with \ref
+   * cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
+   */
+  CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT       = 1,
+  /**
+   * Total number of instances of the domain, including instances that
+   * cannot be profiled.  Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT
+   * to get the number of instances that can be profiled. Can be read
+   * only with \ref cuptiDeviceGetEventDomainAttribute. Value is a
+   * uint32_t.
+   */
+  CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3,
+  /**
+   * Collection method used for events contained in the event domain.
+   * Value is a \ref CUpti_EventCollectionMethod.
+   */
+  CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD    = 4,
+  CUPTI_EVENT_DOMAIN_ATTR_FORCE_INT      = 0x7fffffff,
+} CUpti_EventDomainAttribute;
+/**
+ * \brief The collection method used for an event.
+ *
+ * The collection method indicates how an event is collected.
+ */
+typedef enum {
+  /**
+   * Event is collected using a hardware global performance monitor.
+   */
+  CUPTI_EVENT_COLLECTION_METHOD_PM                  = 0,
+  /**
+   * Event is collected using a hardware SM performance monitor.
+   */
+  CUPTI_EVENT_COLLECTION_METHOD_SM                  = 1,
+  /**
+   * Event is collected using software instrumentation.
+   */
+  CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED        = 2,
+  /**
+   * Event is collected using NvLink throughput counter method.
+   */
+  CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC           = 3,
+  CUPTI_EVENT_COLLECTION_METHOD_FORCE_INT           = 0x7fffffff
+} CUpti_EventCollectionMethod;
+/**
+ * \brief Event group attributes.
+ *
+ * Event group attributes. These attributes can be read using \ref
+ * cuptiEventGroupGetAttribute. Attributes marked [rw] can also be
+ * written using \ref cuptiEventGroupSetAttribute.
+ */
+typedef enum {
+  /**
+   * The domain to which the event group is bound. This attribute is
+   * set when the first event is added to the group.  Value is a
+   * CUpti_EventDomainID.
+   */
+  CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID              = 0,
+  /**
+   * [rw] Profile all the instances of the domain for this
+   * eventgroup. This feature can be used to get load balancing
+   * across all instances of a domain. Value is an integer.
+   */
+  CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1,
+  /**
+   * [rw] Reserved for user data.
+   */
+  CUPTI_EVENT_GROUP_ATTR_USER_DATA                    = 2,
+  /**
+   * Number of events in the group. Value is a uint32_t.
+   */
+  CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS                   = 3,
+  /**
+   * Enumerates events in the group. Value is a pointer to buffer of
+   * size sizeof(CUpti_EventID) * num_of_events in the eventgroup.
+   * num_of_events can be queried using
+   * CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS.
+   */
+  CUPTI_EVENT_GROUP_ATTR_EVENTS                       = 4,
+  /**
+   * Number of instances of the domain bound to this event group that
+   * will be counted.  Value is a uint32_t.
+   */
+  CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT               = 5,
+  /**
+   * Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
+   * CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, before
+   * adding any event.
+   * Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
+   * CUPTI_EVENT_PROFILING_SCOPE_CONTEXT when the scope of the events
+   * that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH.
+   * If profiling scope of event is either
+   * CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT
+   * then setting this attribute will not affect the default scope.
+   * It is not allowed to add events of different scope to same eventgroup.
+   * Value is a uint32_t.
+   */
+  CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE               = 6,
+  CUPTI_EVENT_GROUP_ATTR_FORCE_INT                     = 0x7fffffff,
+} CUpti_EventGroupAttribute;
+/**
+* \brief Profiling scope for event.
+*
+* Profiling scope of event indicates if the event can be collected at context
+* scope or device scope or both i.e. it can be collected at any of context or
+* device scope.
+*/
+typedef enum {
+  /**
+   * Event is collected at context scope.
+   */
+  CUPTI_EVENT_PROFILING_SCOPE_CONTEXT                 = 0,
+  /**
+   * Event is collected at device scope.
+   */
+  CUPTI_EVENT_PROFILING_SCOPE_DEVICE                  = 1,
+  /**
+   * Event can be collected at device or context scope.
+   * The scope can be set using \ref cuptiEventGroupSetAttribute API.
+   */
+  CUPTI_EVENT_PROFILING_SCOPE_BOTH                    = 2,
+  CUPTI_EVENT_PROFILING_SCOPE_FORCE_INT               = 0x7fffffff
+} CUpti_EventProfilingScope;
+/**
+ * \brief Event attributes.
+ *
+ * Event attributes. These attributes can be read using \ref
+ * cuptiEventGetAttribute.
+ */
+typedef enum {
+  /**
+   * Event name. Value is a null terminated const c-string.
+   */
+  CUPTI_EVENT_ATTR_NAME              = 0,
+  /**
+   * Short description of event. Value is a null terminated const
+   * c-string.
+   */
+  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1,
+  /**
+   * Long description of event. Value is a null terminated const
+   * c-string.
+   */
+  CUPTI_EVENT_ATTR_LONG_DESCRIPTION  = 2,
+  /**
+   * Category of event. Value is CUpti_EventCategory.
+   */
+  CUPTI_EVENT_ATTR_CATEGORY          = 3,
+  /**
+   * Profiling scope of the events. It can be either device or context or both.
+   * Value is a \ref CUpti_EventProfilingScope.
+   */
+  CUPTI_EVENT_ATTR_PROFILING_SCOPE   = 5,
+  CUPTI_EVENT_ATTR_FORCE_INT         = 0x7fffffff,
+} CUpti_EventAttribute;
+/**
+ * \brief Event collection modes.
+ *
+ * The event collection mode determines the period over which the
+ * events within the enabled event groups will be collected.
+ */
+typedef enum {
+  /**
+   * Events are collected for the entire duration between the
+   * cuptiEventGroupEnable and cuptiEventGroupDisable calls.
+   * Event values are reset when the events are read.
+   * For CUDA toolkit v6.0 and older this was the default mode.
+   */
+  CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS          = 0,
+  /**
+   * Events are collected only for the durations of kernel executions
+   * that occur between the cuptiEventGroupEnable and
+   * cuptiEventGroupDisable calls. Event collection begins when a
+   * kernel execution begins, and stops when kernel execution
+   * completes. Event values are reset to zero when each kernel
+   * execution begins. If multiple kernel executions occur between the
+   * cuptiEventGroupEnable and cuptiEventGroupDisable calls then the
+   * event values must be read after each kernel launch if those
+   * events need to be associated with the specific kernel launch.
+   * Note that collection in this mode may significantly change the
+   * overall performance characteristics of the application because
+   * kernel executions that occur between the cuptiEventGroupEnable and
+   * cuptiEventGroupDisable calls are serialized on the GPU.
+   * This is the default mode from CUDA toolkit v6.5
+   */
+  CUPTI_EVENT_COLLECTION_MODE_KERNEL              = 1,
+  CUPTI_EVENT_COLLECTION_MODE_FORCE_INT           = 0x7fffffff
+} CUpti_EventCollectionMode;
+/**
+ * \brief An event category.
+ *
+ * Each event is assigned to a category that represents the general
+ * type of the event. A event's category is accessed using \ref
+ * cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute.
+ */
+typedef enum {
+  /**
+   * An instruction related event.
+   */
+  CUPTI_EVENT_CATEGORY_INSTRUCTION     = 0,
+  /**
+   * A memory related event.
+   */
+  CUPTI_EVENT_CATEGORY_MEMORY          = 1,
+  /**
+   * A cache related event.
+   */
+  CUPTI_EVENT_CATEGORY_CACHE           = 2,
+  /**
+   * A profile-trigger event.
+   */
+  CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3,
+  /**
+   * A system event.
+   */
+  CUPTI_EVENT_CATEGORY_SYSTEM  = 4,
+  CUPTI_EVENT_CATEGORY_FORCE_INT       = 0x7fffffff
+} CUpti_EventCategory;
+/**
+ * \brief The overflow value for a CUPTI event.
+ *
+ * The CUPTI event value that indicates an overflow.
+ */
+#define CUPTI_EVENT_OVERFLOW ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+/**
+ * \brief The value that indicates the event value is invalid
+ */
+#define CUPTI_EVENT_INVALID ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
+/**
+ * \brief Flags for cuptiEventGroupReadEvent an
+ * cuptiEventGroupReadAllEvents.
+ *
+ * Flags for \ref cuptiEventGroupReadEvent an \ref
+ * cuptiEventGroupReadAllEvents.
+ */
+typedef enum {
+  /**
+   * No flags.
+   */
+  CUPTI_EVENT_READ_FLAG_NONE          = 0,
+  CUPTI_EVENT_READ_FLAG_FORCE_INT     = 0x7fffffff,
+} CUpti_ReadEventFlags;
+/**
+ * \brief A set of event groups.
+ *
+ * A set of event groups. When returned by \ref
+ * cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
+ * a set indicates that event groups that can be enabled at the same
+ * time (i.e. all the events in the set can be collected
+ * simultaneously).
+ */
+typedef struct {
+  /**
+   * The number of event groups in the set.
+   */
+  uint32_t numEventGroups;
+  /**
+   * An array of \p numEventGroups event groups.
+   */
+  CUpti_EventGroup *eventGroups;
+} CUpti_EventGroupSet;
+/**
+ * \brief A set of event group sets.
+ *
+ * A set of event group sets. When returned by \ref
+ * cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
+ * a CUpti_EventGroupSets indicates the number of passes required to
+ * collect all the events, and the event groups that should be
+ * collected during each pass.
+ */
+typedef struct {
+  /**
+   * Number of event group sets.
+   */
+  uint32_t numSets;
+  /**
+   * An array of \p numSets event group sets.
+   */
+  CUpti_EventGroupSet *sets;
+} CUpti_EventGroupSets;
+/**
+ * \brief Set the event collection mode.
+ *
+ * Set the event collection mode for a \p context.  The \p mode
+ * controls the event collection behavior of all events in event
+ * groups created in the \p context. This API is invalid in kernel
+ * replay mode.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param context The context
+ * \param mode The event collection mode
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
+ * \retval CUPTI_ERROR_INVALID_OPERATION if called when replay mode is enabled
+ * \retval CUPTI_ERROR_NOT_SUPPORTED if mode is not supported on the device
+ */
+CUptiResult CUPTIAPI cuptiSetEventCollectionMode(CUcontext context,
+                                                 CUpti_EventCollectionMode mode);
+/**
+ * \brief Read a device attribute.
+ *
+ * Read a device attribute and return it in \p *value.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param device The CUDA device
+ * \param attrib The attribute to read
+ * \param valueSize Size of buffer pointed by the value, and
+ * returns the number of bytes written to \p value
+ * \param value Returns the value of the attribute
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_DEVICE
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
+ * is NULL, or if \p attrib is not a device attribute
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
+ * attribute values, indicates that the \p value buffer is too small
+ * to hold the attribute value.
+ */
+CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device,
+                                             CUpti_DeviceAttribute attrib,
+                                             size_t *valueSize,
+                                             void *value);
+/**
+ * \brief Read a device timestamp.
+ *
+ * Returns the device timestamp in \p *timestamp. The timestamp is
+ * reported in nanoseconds and indicates the time since the device was
+ * last reset.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param context A context on the device from which to get the timestamp
+ * \param timestamp Returns the device timestamp
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
+ * \retval CUPTI_ERROR_INVALID_PARAMETER is \p timestamp is NULL
+ * **DEPRECATED** This API is deprecated as of CUDA 11.3
+ */
+CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context,
+                                             uint64_t *timestamp);
+/**
+ * \brief Get the number of domains for a device.
+ *
+ * Returns the number of domains in \p numDomains for a device.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param device The CUDA device
+ * \param numDomains Returns the number of domains
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_DEVICE
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
+ */
+CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device,
+                                                   uint32_t *numDomains);
+/**
+ * \brief Get the event domains for a device.
+ *
+ * Returns the event domains IDs in \p domainArray for a device.  The
+ * size of the \p domainArray buffer is given by \p
+ * *arraySizeBytes. The size of the \p domainArray buffer must be at
+ * least \p numdomains * sizeof(CUpti_EventDomainID) or else all
+ * domains will not be returned. The value returned in \p
+ * *arraySizeBytes contains the number of bytes returned in \p
+ * domainArray.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param device The CUDA device
+ * \param arraySizeBytes The size of \p domainArray in bytes, and
+ * returns the number of bytes written to \p domainArray
+ * \param domainArray Returns the IDs of the event domains for the device
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_DEVICE
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
+ * \p domainArray are NULL
+ */
+CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains(CUdevice device,
+                                                 size_t *arraySizeBytes,
+                                                 CUpti_EventDomainID *domainArray);
+/**
+ * \brief Read an event domain attribute.
+ *
+ * Returns an event domain attribute in \p *value. The size of the \p
+ * value buffer is given by \p *valueSize. The value returned in \p
+ * *valueSize contains the number of bytes returned in \p value.
+ *
+ * If the attribute value is a c-string that is longer than \p
+ * *valueSize, then only the first \p *valueSize characters will be
+ * returned and there will be no terminating null byte.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param device The CUDA device
+ * \param eventDomain ID of the event domain
+ * \param attrib The event domain attribute to read
+ * \param valueSize The size of the \p value buffer in bytes, and
+ * returns the number of bytes written to \p value
+ * \param value Returns the attribute's value
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_DEVICE
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
+ * is NULL, or if \p attrib is not an event domain attribute
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
+ * attribute values, indicates that the \p value buffer is too small
+ * to hold the attribute value.
+ */
+CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute(CUdevice device,
+                                                        CUpti_EventDomainID eventDomain,
+                                                        CUpti_EventDomainAttribute attrib,
+                                                        size_t *valueSize,
+                                                        void *value);
+/**
+ * \brief Get the number of event domains available on any device.
+ *
+ * Returns the total number of event domains available on any
+ * CUDA-capable device.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param numDomains Returns the number of domains
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
+ */
+CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains);
+/**
+ * \brief Get the event domains available on any device.
+ *
+ * Returns all the event domains available on any CUDA-capable device.
+ * Event domain IDs are returned in \p domainArray. The size of the \p
+ * domainArray buffer is given by \p *arraySizeBytes. The size of the
+ * \p domainArray buffer must be at least \p numDomains *
+ * sizeof(CUpti_EventDomainID) or all domains will not be
+ * returned. The value returned in \p *arraySizeBytes contains the
+ * number of bytes returned in \p domainArray.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param arraySizeBytes The size of \p domainArray in bytes, and
+ * returns the number of bytes written to \p domainArray
+ * \param domainArray Returns all the event domains
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
+ * \p domainArray are NULL
+ */
+CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes,
+                                           CUpti_EventDomainID *domainArray);
+/**
+ * \brief Read an event domain attribute.
+ *
+ * Returns an event domain attribute in \p *value. The size of the \p
+ * value buffer is given by \p *valueSize. The value returned in \p
+ * *valueSize contains the number of bytes returned in \p value.
+ *
+ * If the attribute value is a c-string that is longer than \p
+ * *valueSize, then only the first \p *valueSize characters will be
+ * returned and there will be no terminating null byte.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventDomain ID of the event domain
+ * \param attrib The event domain attribute to read
+ * \param valueSize The size of the \p value buffer in bytes, and
+ * returns the number of bytes written to \p value
+ * \param value Returns the attribute's value
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
+ * is NULL, or if \p attrib is not an event domain attribute
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
+ * attribute values, indicates that the \p value buffer is too small
+ * to hold the attribute value.
+ */
+CUptiResult CUPTIAPI cuptiEventDomainGetAttribute(CUpti_EventDomainID eventDomain,
+                                                  CUpti_EventDomainAttribute attrib,
+                                                  size_t *valueSize,
+                                                  void *value);
+/**
+ * \brief Get number of events in a domain.
+ *
+ * Returns the number of events in \p numEvents for a domain.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventDomain ID of the event domain
+ * \param numEvents Returns the number of events in the domain
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents(CUpti_EventDomainID eventDomain,
+                                                  uint32_t *numEvents);
+/**
+ * \brief Get the events in a domain.
+ *
+ * Returns the event IDs in \p eventArray for a domain.  The size of
+ * the \p eventArray buffer is given by \p *arraySizeBytes. The size
+ * of the \p eventArray buffer must be at least \p numdomainevents *
+ * sizeof(CUpti_EventID) or else all events will not be returned. The
+ * value returned in \p *arraySizeBytes contains the number of bytes
+ * returned in \p eventArray.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventDomain ID of the event domain
+ * \param arraySizeBytes The size of \p eventArray in bytes, and
+ * returns the number of bytes written to \p eventArray
+ * \param eventArray Returns the IDs of the events in the domain
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or \p
+ * eventArray are NULL
+ */
+CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain,
+                                                size_t *arraySizeBytes,
+                                                CUpti_EventID *eventArray);
+/**
+ * \brief Get an event attribute.
+ *
+ * Returns an event attribute in \p *value. The size of the \p
+ * value buffer is given by \p *valueSize. The value returned in \p
+ * *valueSize contains the number of bytes returned in \p value.
+ *
+ * If the attribute value is a c-string that is longer than \p
+ * *valueSize, then only the first \p *valueSize characters will be
+ * returned and there will be no terminating null byte.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param event ID of the event
+ * \param attrib The event attribute to read
+ * \param valueSize The size of the \p value buffer in bytes, and
+ * returns the number of bytes written to \p value
+ * \param value Returns the attribute's value
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
+ * is NULL, or if \p attrib is not an event attribute
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
+ * attribute values, indicates that the \p value buffer is too small
+ * to hold the attribute value.
+ */
+CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event,
+                                            CUpti_EventAttribute attrib,
+                                            size_t *valueSize,
+                                            void *value);
+/**
+ * \brief Find an event by name.
+ *
+ * Find an event by name and return the event ID in \p *event.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param device The CUDA device
+ * \param eventName The name of the event to find
+ * \param event Returns the ID of the found event or undefined if
+ * unable to find the event
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_DEVICE
+ * \retval CUPTI_ERROR_INVALID_EVENT_NAME if unable to find an event
+ * with name \p eventName. In this case \p *event is undefined
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventName or \p event are NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device,
+                                             const char *eventName,
+                                             CUpti_EventID *event);
+/**
+ * \brief Create a new event group for a context.
+ *
+ * Creates a new event group for \p context and returns the new group
+ * in \p *eventGroup.
+ * \note \p flags are reserved for future use and should be set to zero.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param context The context for the event group
+ * \param eventGroup Returns the new event group
+ * \param flags Reserved - must be zero
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context,
+                                           CUpti_EventGroup *eventGroup,
+                                           uint32_t flags);
+/**
+ * \brief Destroy an event group.
+ *
+ * Destroy an \p eventGroup and free its resources. An event group
+ * cannot be destroyed if it is enabled.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group to destroy
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_OPERATION if the event group is enabled
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup);
+/**
+ * \brief Read an event group attribute.
+ *
+ * Read an event group attribute and return it in \p *value.
+ * \note \b Thread-safety: this function is thread safe but client
+ * must guard against simultaneous destruction or modification of \p
+ * eventGroup (for example, client must guard against simultaneous
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
+ * etc.), and must guard against simultaneous destruction of the
+ * context in which \p eventGroup was created (for example, client
+ * must guard against simultaneous calls to cudaDeviceReset,
+ * cuCtxDestroy, etc.).
+ *
+ * \param eventGroup The event group
+ * \param attrib The attribute to read
+ * \param valueSize Size of buffer pointed by the value, and
+ * returns the number of bytes written to \p value
+ * \param value Returns the value of the attribute
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
+ * is NULL, or if \p attrib is not an eventgroup attribute
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
+ * attribute values, indicates that the \p value buffer is too small
+ * to hold the attribute value.
+ */
+CUptiResult CUPTIAPI cuptiEventGroupGetAttribute(CUpti_EventGroup eventGroup,
+                                                 CUpti_EventGroupAttribute attrib,
+                                                 size_t *valueSize,
+                                                 void *value);
+/**
+ * \brief Write an event group attribute.
+ *
+ * Write an event group attribute.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group
+ * \param attrib The attribute to write
+ * \param valueSize The size, in bytes, of the value
+ * \param value The attribute value to write
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
+ * is NULL, or if \p attrib is not an event group attribute, or if
+ * \p attrib is not a writable attribute
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that
+ * the \p value buffer is too small to hold the attribute value.
+ */
+CUptiResult CUPTIAPI cuptiEventGroupSetAttribute(CUpti_EventGroup eventGroup,
+                                                 CUpti_EventGroupAttribute attrib,
+                                                 size_t valueSize,
+                                                 void *value);
+/**
+ * \brief Add an event to an event group.
+ *
+ * Add an event to an event group. The event add can fail for a number of reasons:
+ * \li The event group is enabled
+ * \li The event does not belong to the same event domain as the
+ * events that are already in the event group
+ * \li Device limitations on the events that can belong to the same group
+ * \li The event group is full
+ *
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group
+ * \param event The event to add to the group
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p event belongs to a
+ * different event domain than the events already in \p eventGroup, or
+ * if a device limitation prevents \p event from being collected at
+ * the same time as the events already in \p eventGroup
+ * \retval CUPTI_ERROR_MAX_LIMIT_REACHED if \p eventGroup is full
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup,
+                                             CUpti_EventID event);
+/**
+ * \brief Remove an event from an event group.
+ *
+ * Remove \p event from the an event group. The event cannot be
+ * removed if the event group is enabled.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group
+ * \param event The event to remove from the group
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup,
+                                                CUpti_EventID event);
+/**
+ * \brief Remove all events from an event group.
+ *
+ * Remove all events from an event group. Events cannot be removed if
+ * the event group is enabled.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup);
+/**
+ * \brief Zero all the event counts in an event group.
+ *
+ * Zero all the event counts in an event group.
+ * \note \b Thread-safety: this function is thread safe but client
+ * must guard against simultaneous destruction or modification of \p
+ * eventGroup (for example, client must guard against simultaneous
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
+ * etc.), and must guard against simultaneous destruction of the
+ * context in which \p eventGroup was created (for example, client
+ * must guard against simultaneous calls to cudaDeviceReset,
+ * cuCtxDestroy, etc.).
+ *
+ * \param eventGroup The event group
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup);
+/**
+ * \brief Enable an event group.
+ *
+ * Enable an event group. Enabling an event group zeros the value of
+ * all the events in the group and then starts collection of those
+ * events.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
+ * enabled due to other already enabled event groups
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ * \retval CUPTI_ERROR_HARDWARE_BUSY if another client is profiling
+ * and hardware is busy
+ */
+CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup);
+/**
+ * \brief Disable an event group.
+ *
+ * Disable an event group. Disabling an event group stops collection
+ * of events contained in the group.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroup The event group
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup);
+/**
+ * \brief Read the value for an event in an event group.
+ *
+ * Read the value for an event in an event group. The event value is
+ * returned in the \p eventValueBuffer buffer. \p
+ * eventValueBufferSizeBytes indicates the size of the \p
+ * eventValueBuffer buffer. The buffer must be at least sizeof(uint64)
+ * if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set
+ * on the group containing the event.  The buffer must be at least
+ * (sizeof(uint64) * number of domain instances) if
+ * ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the
+ * group.
+ *
+ * If any instance of an event counter overflows, the value returned
+ * for that event instance will be ::CUPTI_EVENT_OVERFLOW.
+ *
+ * The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
+ *
+ * Reading an event from a disabled event group is not allowed. After
+ * being read, an event's value is reset to zero.
+ * \note \b Thread-safety: this function is thread safe but client
+ * must guard against simultaneous destruction or modification of \p
+ * eventGroup (for example, client must guard against simultaneous
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
+ * etc.), and must guard against simultaneous destruction of the
+ * context in which \p eventGroup was created (for example, client
+ * must guard against simultaneous calls to cudaDeviceReset,
+ * cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
+ * called simultaneously with this function, then returned event
+ * values are undefined.
+ *
+ * \param eventGroup The event group
+ * \param flags Flags controlling the reading mode
+ * \param event The event to read
+ * \param eventValueBufferSizeBytes The size of \p eventValueBuffer
+ * in bytes, and returns the number of bytes written to \p
+ * eventValueBuffer
+ * \param eventValueBuffer Returns the event value(s)
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
+ * eventValueBufferSizeBytes or \p eventValueBuffer is NULL
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
+ * is not sufficient
+ */
+CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup,
+                                              CUpti_ReadEventFlags flags,
+                                              CUpti_EventID event,
+                                              size_t *eventValueBufferSizeBytes,
+                                              uint64_t *eventValueBuffer);
+/**
+ * \brief Read the values for all the events in an event group.
+ *
+ * Read the values for all the events in an event group. The event
+ * values are returned in the \p eventValueBuffer buffer. \p
+ * eventValueBufferSizeBytes indicates the size of \p
+ * eventValueBuffer.  The buffer must be at least (sizeof(uint64) *
+ * number of events in group) if
+ * ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on
+ * the group containing the events.  The buffer must be at least
+ * (sizeof(uint64) * number of domain instances * number of events in
+ * group) if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is
+ * set on the group.
+ *
+ * The data format returned in \p eventValueBuffer is:
+ *    - domain instance 0: event0 event1 ... eventN
+ *    - domain instance 1: event0 event1 ... eventN
+ *    - ...
+ *    - domain instance M: event0 event1 ... eventN
+ *
+ * The event order in \p eventValueBuffer is returned in \p
+ * eventIdArray. The size of \p eventIdArray is specified in \p
+ * eventIdArraySizeBytes. The size should be at least
+ * (sizeof(CUpti_EventID) * number of events in group).
+ *
+ * If any instance of any event counter overflows, the value returned
+ * for that event instance will be ::CUPTI_EVENT_OVERFLOW.
+ *
+ * The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
+ *
+ * Reading events from a disabled event group is not allowed. After
+ * being read, an event's value is reset to zero.
+ * \note \b Thread-safety: this function is thread safe but client
+ * must guard against simultaneous destruction or modification of \p
+ * eventGroup (for example, client must guard against simultaneous
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
+ * etc.), and must guard against simultaneous destruction of the
+ * context in which \p eventGroup was created (for example, client
+ * must guard against simultaneous calls to cudaDeviceReset,
+ * cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
+ * called simultaneously with this function, then returned event
+ * values are undefined.
+ *
+ * \param eventGroup The event group
+ * \param flags Flags controlling the reading mode
+ * \param eventValueBufferSizeBytes The size of \p eventValueBuffer in
+ * bytes, and returns the number of bytes written to \p
+ * eventValueBuffer
+ * \param eventValueBuffer Returns the event values
+ * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
+ * and returns the number of bytes written to \p eventIdArray
+ * \param eventIdArray Returns the IDs of the events in the same order
+ * as the values return in eventValueBuffer.
+ * \param numEventIdsRead Returns the number of event IDs returned
+ * in \p eventIdArray
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
+ * eventValueBufferSizeBytes, \p eventValueBuffer, \p
+ * eventIdArraySizeBytes, \p eventIdArray or \p numEventIdsRead is
+ * NULL
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
+ * or \p eventIdArray is not sufficient
+ */
+CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents(CUpti_EventGroup       eventGroup,
+                                                  CUpti_ReadEventFlags   flags,
+                                                  size_t                 *eventValueBufferSizeBytes,
+                                                  uint64_t               *eventValueBuffer,
+                                                  size_t                 *eventIdArraySizeBytes,
+                                                  CUpti_EventID          *eventIdArray,
+                                                  size_t                 *numEventIdsRead);
+/**
+ * \brief For a set of events, get the grouping that indicates the
+ * number of passes and the event groups necessary to collect the
+ * events.
+ *
+ * The number of events that can be collected simultaneously varies by
+ * device and by the type of the events. When events can be collected
+ * simultaneously, they may need to be grouped into multiple event
+ * groups because they are from different event domains. This function
+ * takes a set of events and determines how many passes are required
+ * to collect all those events, and which events can be collected
+ * simultaneously in each pass.
+ *
+ * The CUpti_EventGroupSets returned in \p eventGroupPasses indicates
+ * how many passes are required to collect the events with the \p
+ * numSets field. Within each event group set, the \p sets array
+ * indicates the event groups that should be collected on each pass.
+ * \note \b Thread-safety: this function is thread safe, but client
+ * must guard against another thread simultaneously destroying \p
+ * context.
+ *
+ * \param context The context for event collection
+ * \param eventIdArraySizeBytes Size of \p eventIdArray in bytes
+ * \param eventIdArray Array of event IDs that need to be grouped
+ * \param eventGroupPasses Returns a CUpti_EventGroupSets object that
+ * indicates the number of passes required to collect the events and
+ * the events to collect on each pass
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArray or
+ * \p eventGroupPasses is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupSetsCreate(CUcontext context,
+                                               size_t eventIdArraySizeBytes,
+                                               CUpti_EventID *eventIdArray,
+                                               CUpti_EventGroupSets **eventGroupPasses);
+/**
+ * \brief Destroy a event group sets object.
+ *
+ * Destroy a CUpti_EventGroupSets object.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroupSets The object to destroy
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_INVALID_OPERATION if any of the event groups
+ * contained in the sets is enabled
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSets is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets);
+/**
+ * \brief Enable an event group set.
+ *
+ * Enable a set of event groups. Enabling a set of event groups zeros the value of
+ * all the events in all the groups and then starts collection of those events.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param eventGroupSet The pointer to the event group set
+ *
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
+ * enabled due to other already enabled event groups
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
+ * \retval CUPTI_ERROR_HARDWARE_BUSY if other client is profiling and hardware is
+ * busy
+ */
+CUptiResult CUPTIAPI cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet);
+/**
+ * \brief Disable an event group set.
+ *
+ * Disable a set of event groups. Disabling a set of event groups
+ * stops collection of events contained in the groups.
+ * \note \b Thread-safety: this function is thread safe.
+ * \note \b If this call fails, some of the event groups in the set may be disabled
+ * and other event groups may remain enabled.
+ *
+ * \param eventGroupSet The pointer to the event group set
+ * \retval CUPTI_SUCCESS
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
+ * \retval CUPTI_ERROR_HARDWARE
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
+ */
+CUptiResult CUPTIAPI cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet);
+/**
+ * \brief Enable kernel replay mode.
+ *
+ * Set profiling mode for the context to replay mode. In this mode,
+ * any number of events can be collected in one run of the kernel. The
+ * event collection mode will automatically switch to
+ * CUPTI_EVENT_COLLECTION_MODE_KERNEL.  In this mode, \ref
+ * cuptiSetEventCollectionMode will return
+ * CUPTI_ERROR_INVALID_OPERATION.
+ * \note \b Kernels might take longer to run if many events are enabled.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param context The context
+ * \retval CUPTI_SUCCESS
+ */
+CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context);
+/**
+ * \brief Disable kernel replay mode.
+ *
+ * Set profiling mode for the context to non-replay (default)
+ * mode. Event collection mode will be set to
+ * CUPTI_EVENT_COLLECTION_MODE_KERNEL.  All previously enabled
+ * event groups and event group sets will be disabled.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param context The context
+ * \retval CUPTI_SUCCESS
+ */
+CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context);
+/**
+ * \brief Function type for getting updates on kernel replay.
+ *
+ * \param kernelName The mangled kernel name
+ * \param numReplaysDone Number of replays done so far
+ * \param customData Pointer of any custom data passed in when subscribing
+ */
+typedef void (CUPTIAPI *CUpti_KernelReplayUpdateFunc)(
+    const char *kernelName,
+    int numReplaysDone,
+    void *customData);
+/**
+ * \brief Subscribe to kernel replay updates.
+ *
+ * When subscribed, the function pointer passed in will be called each time a
+ * kernel run is finished during kernel replay. Previously subscribed function
+ * pointer will be replaced. Pass in NULL as the function pointer unsubscribes
+ * the update.
+ *
+ * \param updateFunc The update function pointer
+ * \param customData Pointer to any custom data
+ * \retval CUPTI_SUCCESS
+ */
+CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate(CUpti_KernelReplayUpdateFunc updateFunc, void *customData);
+/** @} */ /* END CUPTI_EVENT_API */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif /*_CUPTI_EVENTS_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h ADDED Viewed

	@@ -0,0 +1,419 @@

+#if !defined(_CUPTI_PCSAMPLING_UTIL_H_)
+#define _CUPTI_PCSAMPLING_UTIL_H_
+#include <cupti_pcsampling.h>
+#include <fstream>
+#ifndef CUPTIUTILAPI
+#ifdef _WIN32
+#define CUPTIUTILAPI __stdcall
+#else
+#define CUPTIUTILAPI
+#endif
+#endif
+#define ACTIVITY_RECORD_ALIGNMENT 8
+#if defined(_WIN32) // Windows 32- and 64-bit
+#define START_PACKED_ALIGNMENT __pragma(pack(push,1)) // exact fit - no padding
+#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_RECORD_ALIGNMENT))
+#define END_PACKED_ALIGNMENT __pragma(pack(pop))
+#elif defined(__GNUC__) // GCC
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT __attribute__ ((__packed__)) __attribute__ ((aligned (ACTIVITY_RECORD_ALIGNMENT)))
+#define END_PACKED_ALIGNMENT
+#else // all other compilers
+#define START_PACKED_ALIGNMENT
+#define PACKED_ALIGNMENT
+#define END_PACKED_ALIGNMENT
+#endif
+#ifndef CUPTI_UTIL_STRUCT_SIZE
+#define CUPTI_UTIL_STRUCT_SIZE(type_, lastfield_)                     (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
+#endif
+#ifndef CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS
+#define CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(type, member, structSize)    \
+    (offsetof(type, member) < structSize)
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__)
+    #pragma GCC visibility push(default)
+#endif
+namespace CUPTI { namespace PcSamplingUtil {
+/**
+ * \defgroup CUPTI_PCSAMPLING_UTILITY CUPTI PC Sampling Utility API
+ * Functions, types, and enums that implement the CUPTI PC Sampling Utility API.
+ * @{
+ */
+/**
+ * \brief Header info will be stored in file.
+ */
+typedef struct PACKED_ALIGNMENT {
+  /**
+   * Version of file format.
+   */
+  uint32_t version;
+  /**
+   * Total number of buffers present in the file.
+   */
+  uint32_t totalBuffers;
+} Header;
+/**
+ * \brief BufferInfo will be stored in the file for every buffer
+ *  i.e for every call of UtilDumpPcSamplingBufferInFile() API.
+ */
+typedef struct PACKED_ALIGNMENT {
+  /**
+   * Total number of PC records.
+   */
+  uint64_t recordCount;
+  /**
+   * Count of all stall reasons supported on the GPU
+   */
+  size_t numStallReasons;
+  /**
+   * Total number of stall reasons in single record.
+   */
+  uint64_t numSelectedStallReasons;
+  /**
+   * Buffer size in Bytes.
+   */
+  uint64_t bufferByteSize;
+} BufferInfo;
+/**
+ * \brief All available stall reasons name and respective indexes
+ * will be stored in it.
+ */
+typedef struct PACKED_ALIGNMENT {
+  /**
+   * Number of all available stall reasons
+   */
+  size_t numStallReasons;
+  /**
+   * Stall reasons names of all available stall reasons
+   */
+  char **stallReasons;
+  /**
+   * Stall reason index of all available stall reasons
+   */
+  uint32_t *stallReasonIndex;
+} PcSamplingStallReasons;
+typedef enum {
+  /**
+   * Invalid buffer type.
+   */
+  PC_SAMPLING_BUFFER_INVALID             = 0,
+  /**
+   * Refers to CUpti_PCSamplingData buffer.
+   */
+  PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA  = 1
+} PcSamplingBufferType;
+/**
+ * \brief CUPTI PC sampling utility API result codes.
+ *
+ * Error and result codes returned by CUPTI PC sampling utility API.
+ */
+typedef enum {
+  /**
+   * No error
+   */
+  CUPTI_UTIL_SUCCESS                                       = 0,
+  /**
+   * One or more of the parameters are invalid.
+   */
+  CUPTI_UTIL_ERROR_INVALID_PARAMETER                       = 1,
+  /**
+   * Unable to create a new file
+   */
+  CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE                   = 2,
+  /**
+   * Unable to open a file
+   */
+  CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE                     = 3,
+  /**
+   * Read or write operation failed
+   */
+  CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED             = 4,
+  /**
+   * Provided file handle is corrupted.
+   */
+  CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED                   = 5,
+  /**
+   * seek operation failed.
+   */
+  CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED                   = 6,
+  /**
+   * Unable to allocate enough memory to perform the requested
+   * operation.
+   */
+  CUPTI_UTIL_ERROR_OUT_OF_MEMORY                           = 7,
+  /**
+   * An unknown internal error has occurred.
+   */
+  CUPTI_UTIL_ERROR_UNKNOWN                                 = 999,
+  CUPTI_UTIL_ERROR_FORCE_INT                               = 0x7fffffff
+} CUptiUtilResult;
+/**
+ * \brief Params for \ref CuptiUtilPutPcSampData
+ */
+typedef struct {
+  /**
+   * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * Type of buffer to store in file
+   */
+  PcSamplingBufferType bufferType;
+  /**
+   * PC sampling buffer.
+   */
+  void *pSamplingData;
+  /**
+   * Number of configured attributes
+   */
+  size_t numAttributes;
+  /**
+   * Refer \ref CUpti_PCSamplingConfigurationInfo
+   * It is expected to provide configuration details of at least
+   * CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute.
+   */
+  CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
+  /**
+   * Refer \ref PcSamplingStallReasons.
+   */
+  PcSamplingStallReasons *pPcSamplingStallReasons;
+  /**
+   * File name to store buffer into it.
+   */
+  const char* fileName;
+} CUptiUtil_PutPcSampDataParams;
+#define CUptiUtil_PutPcSampDataParamsSize                   CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_PutPcSampDataParams, fileName)
+/**
+ * \brief Dump PC sampling data into the file.
+ *
+ * This API can be called multiple times.
+ * It will append buffer in the file.
+ * For every buffer it will store BufferInfo
+ * so that before retrieving data it will help to allocate buffer
+ * to store retrieved data.
+ * This API creates file if file does not present.
+ * If stallReasonIndex or stallReasons pointer of \ref CUptiUtil_PutPcSampDataParams is NULL
+ * then stall reasons data  will not be stored in file.
+ * It is expected to store all available stall reason data at least once to refer it during
+ * offline correlation.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
+ * or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details not provided
+ * or filename is empty.
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilPutPcSampData(CUptiUtil_PutPcSampDataParams *pParams);
+/**
+ * \brief Params for \ref CuptiUtilGetHeaderData
+ */
+typedef struct {
+  /**
+   * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * File handle.
+   */
+  std::ifstream *fileHandler;
+  /**
+   * Header Info.
+   */
+  Header headerInfo;
+} CUptiUtil_GetHeaderDataParams;
+#define CUptiUtil_GetHeaderDataParamsSize                   CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetHeaderDataParams, headerInfo)
+/**
+ * \brief Get header data of file.
+ *
+ * This API must be called once initially while retrieving data from file.
+ * \ref Header structure, it gives info about total number
+ * of buffers present in the file.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED  failed to read data from file.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilGetHeaderData(CUptiUtil_GetHeaderDataParams *pParams);
+/**
+ * \brief Params for \ref CuptiUtilGetBufferInfo
+ */
+typedef struct {
+  /**
+   * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * File handle.
+   */
+  std::ifstream *fileHandler;
+  /**
+   * Buffer Info.
+   */
+  BufferInfo bufferInfoData;
+} CUptiUtil_GetBufferInfoParams;
+#define CUptiUtil_GetBufferInfoParamsSize                   CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetBufferInfoParams, bufferInfoData)
+/**
+ * \brief Get buffer info data of file.
+ *
+ * This API must be called every time before calling CuptiUtilGetPcSampData API.
+ * \ref BufferInfo structure, it gives info about recordCount and stallReasonCount
+ * of every record in the buffer. This will help to allocate exact buffer to retrieve data into it.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilGetBufferInfo(CUptiUtil_GetBufferInfoParams *pParams);
+/**
+ * \brief Params for \ref CuptiUtilGetPcSampData
+ */
+typedef struct {
+  /**
+   * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * File handle.
+   */
+  std::ifstream *fileHandler;
+  /**
+   * Type of buffer to store in file
+   */
+  PcSamplingBufferType bufferType;
+  /**
+   * Pointer to collected buffer info using \ref CuptiUtilGetBufferInfo
+   */
+  BufferInfo *pBufferInfoData;
+  /**
+   * Pointer to allocated memory to store retrieved data from file.
+   */
+  void *pSamplingData;
+  /**
+   * Number of configuration attributes
+   */
+  size_t numAttributes;
+  /**
+   * Refer \ref CUpti_PCSamplingConfigurationInfo
+   */
+  CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
+  /**
+   * Refer \ref PcSamplingStallReasons.
+   * For stallReasons field of \ref PcSamplingStallReasons it is expected to
+   * allocate memory for each string element of array.
+   */
+  PcSamplingStallReasons *pPcSamplingStallReasons;
+} CUptiUtil_GetPcSampDataParams;
+#define CUptiUtil_GetPcSampDataParamsSize                   CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetPcSampDataParams, pPcSamplingStallReasons)
+/**
+ * \brief Retrieve PC sampling data from file into allocated buffer.
+ *
+ * This API must be called after CuptiUtilGetBufferInfo API.
+ * It will retrieve data from file into allocated buffer.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
+ * or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then
+ * error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL.
+ * or filename is empty.
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilGetPcSampData(CUptiUtil_GetPcSampDataParams *pParams);
+/**
+ * \brief Params for \ref CuptiUtilMergePcSampData
+ */
+typedef struct
+{
+  /**
+   * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+   * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+   * available in the structure. Used to preserve backward compatibility.
+   */
+  size_t size;
+  /**
+   * Number of buffers to merge.
+   */
+  size_t numberOfBuffers;
+  /**
+   * Pointer to array of buffers to merge
+   */
+  CUpti_PCSamplingData *PcSampDataBuffer;
+  /**
+   * Pointer to array of merged buffers as per the range id.
+   */
+  CUpti_PCSamplingData **MergedPcSampDataBuffers;
+  /**
+   * Number of merged buffers.
+   */
+  size_t *numMergedBuffer;
+} CUptiUtil_MergePcSampDataParams;
+#define CUptiUtil_MergePcSampDataParamsSize                   CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_MergePcSampDataParams, numMergedBuffer)
+/**
+ * \brief Merge PC sampling data range id wise.
+ *
+ * This API merge PC sampling data range id wise.
+ * It allocates memory for merged data and fill data in it
+ * and provide buffer pointer in MergedPcSampDataBuffers field.
+ * It is expected from user to free merge data buffers after use.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if param struct size is invalid
+ * or count of buffers to merge is invalid i.e less than 1
+ * or either of PcSampDataBuffer, MergedPcSampDataBuffers, numMergedBuffer is NULL
+ * \retval CUPTI_UTIL_ERROR_OUT_OF_MEMORY Unable to allocate memory for merged buffer.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilMergePcSampData(CUptiUtil_MergePcSampDataParams *pParams);
+/** @} */ /* END CUPTI_PCSAMPLING_UTILITY */
+} }
+#if defined(__GNUC__)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_result.h ADDED Viewed

	@@ -0,0 +1,328 @@

+/*
+ * Copyright 2010-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(_CUPTI_RESULT_H_)
+#define _CUPTI_RESULT_H_
+#ifndef CUPTIAPI
+#ifdef _WIN32
+#define CUPTIAPI __stdcall
+#else
+#define CUPTIAPI
+#endif
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \defgroup CUPTI_RESULT_API CUPTI Result Codes
+ * Error and result codes returned by CUPTI functions.
+ * @{
+ */
+/**
+ * \brief CUPTI result codes.
+ *
+ * Error and result codes returned by CUPTI functions.
+ */
+typedef enum {
+    /**
+     * No error.
+     */
+    CUPTI_SUCCESS                                       = 0,
+    /**
+     * One or more of the parameters is invalid.
+     */
+    CUPTI_ERROR_INVALID_PARAMETER                       = 1,
+    /**
+     * The device does not correspond to a valid CUDA device.
+     */
+    CUPTI_ERROR_INVALID_DEVICE                          = 2,
+    /**
+     * The context is NULL or not valid.
+     */
+    CUPTI_ERROR_INVALID_CONTEXT                         = 3,
+    /**
+     * The event domain id is invalid.
+     */
+    CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID                 = 4,
+    /**
+     * The event id is invalid.
+     */
+    CUPTI_ERROR_INVALID_EVENT_ID                        = 5,
+    /**
+     * The event name is invalid.
+     */
+    CUPTI_ERROR_INVALID_EVENT_NAME                      = 6,
+    /**
+     * The current operation cannot be performed due to dependency on
+     * other factors.
+     */
+    CUPTI_ERROR_INVALID_OPERATION                       = 7,
+    /**
+     * Unable to allocate enough memory to perform the requested
+     * operation.
+     */
+    CUPTI_ERROR_OUT_OF_MEMORY                           = 8,
+    /**
+     * An error occurred on the performance monitoring hardware.
+     */
+    CUPTI_ERROR_HARDWARE                                = 9,
+    /**
+     * The output buffer size is not sufficient to return all
+     * requested data.
+     */
+    CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT           = 10,
+    /**
+     * API is not implemented.
+     */
+    CUPTI_ERROR_API_NOT_IMPLEMENTED                     = 11,
+    /**
+     * The maximum limit is reached.
+     */
+    CUPTI_ERROR_MAX_LIMIT_REACHED                       = 12,
+    /**
+     * The object is not yet ready to perform the requested operation.
+     */
+    CUPTI_ERROR_NOT_READY                               = 13,
+    /**
+     * The current operation is not compatible with the current state
+     * of the object
+     */
+    CUPTI_ERROR_NOT_COMPATIBLE                          = 14,
+    /**
+     * CUPTI is unable to initialize its connection to the CUDA
+     * driver.
+     */
+    CUPTI_ERROR_NOT_INITIALIZED                         = 15,
+    /**
+     * The metric id is invalid.
+     */
+    CUPTI_ERROR_INVALID_METRIC_ID                        = 16,
+    /**
+     * The metric name is invalid.
+     */
+    CUPTI_ERROR_INVALID_METRIC_NAME                      = 17,
+    /**
+     * The queue is empty.
+     */
+    CUPTI_ERROR_QUEUE_EMPTY                              = 18,
+    /**
+     * Invalid handle (internal?).
+     */
+    CUPTI_ERROR_INVALID_HANDLE                           = 19,
+    /**
+     * Invalid stream.
+     */
+    CUPTI_ERROR_INVALID_STREAM                           = 20,
+    /**
+     * Invalid kind.
+     */
+    CUPTI_ERROR_INVALID_KIND                             = 21,
+    /**
+     * Invalid event value.
+     */
+    CUPTI_ERROR_INVALID_EVENT_VALUE                      = 22,
+    /**
+     * CUPTI is disabled due to conflicts with other enabled profilers
+     */
+    CUPTI_ERROR_DISABLED                                 = 23,
+    /**
+     * Invalid module.
+     */
+    CUPTI_ERROR_INVALID_MODULE                           = 24,
+    /**
+     * Invalid metric value.
+     */
+    CUPTI_ERROR_INVALID_METRIC_VALUE                     = 25,
+    /**
+     * The performance monitoring hardware is in use by other client.
+     */
+    CUPTI_ERROR_HARDWARE_BUSY                            = 26,
+    /**
+     * The attempted operation is not supported on the current
+     * system or device.
+     */
+    CUPTI_ERROR_NOT_SUPPORTED                            = 27,
+    /**
+     * Unified memory profiling is not supported on the system.
+     * Potential reason could be unsupported OS or architecture.
+     */
+    CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED               = 28,
+    /**
+     * Unified memory profiling is not supported on the device
+     */
+    CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE     = 29,
+    /**
+     * Unified memory profiling is not supported on a multi-GPU
+     * configuration without P2P support between any pair of devices
+     */
+    CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES = 30,
+    /**
+     * Unified memory profiling is not supported under the
+     * Multi-Process Service (MPS) environment. CUDA 7.5 removes this
+     * restriction.
+     */
+    CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS      = 31,
+    /**
+     * In CUDA 9.0, devices with compute capability 7.0 don't
+     * support CDP tracing
+     */
+    CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED                = 32,
+    /**
+     * Profiling on virtualized GPU is not supported.
+     */
+    CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED         = 33,
+    /**
+     * Profiling results might be incorrect for CUDA applications
+     * compiled with nvcc version older than 9.0 for devices with
+     * compute capability 6.0 and 6.1.
+     * Profiling session will continue and CUPTI will notify it using this error code.
+     * User is advised to recompile the application code with nvcc version 9.0 or later.
+     * Ignore this warning if code is already compiled with the recommended nvcc version.
+     */
+    CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE             = 34,
+    /**
+     * User doesn't have sufficient privileges which are required to
+     * start the profiling session.
+     * One possible reason for this may be that the NVIDIA driver or your system
+     * administrator may have restricted access to the NVIDIA GPU performance counters.
+     * To learn how to resolve this issue and find more information, please visit
+     * https://developer.nvidia.com/CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
+     */
+    CUPTI_ERROR_INSUFFICIENT_PRIVILEGES                  = 35,
+    /**
+     * Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
+     * metric API from the header cupti_metrics.h are not compatible with the
+     * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
+     * in the headers nvperf_host.h and nvperf_target.h.
+     */
+    CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED             = 36,
+    /**
+     * Missing definition of the OpenACC API routine in the linked OpenACC library.
+     *
+     * One possible reason is that OpenACC library is linked statically in the
+     * user application, which might not have the definition of all the OpenACC
+     * API routines needed for the OpenACC profiling, as compiler might ignore
+     * definitions for the functions not used in the application. This issue
+     * can be mitigated by linking the OpenACC library dynamically.
+     */
+    CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE                = 37,
+    /**
+     * Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
+     * metric API from the header cupti_metrics.h are not supported on devices with
+     * compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
+     * These API will be deprecated in a future CUDA release. These are replaced by
+     * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
+     * in the headers nvperf_host.h and nvperf_target.h.
+     */
+    CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED            = 38,
+    /**
+     * CUPTI doesn't allow multiple callback subscribers. Only a single subscriber
+     * can be registered at a time.
+     * Same error code is used when application is launched using NVIDIA tools
+     * like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb and
+     * cuda-memcheck.
+     */
+    CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED       = 39,
+    /**
+     * Profiling on virtualized GPU is not allowed by hypervisor.
+     */
+    CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES = 40,
+    /**
+     * Profiling and tracing are not allowed when confidential computing mode
+     * is enabled.
+     */
+    CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED = 41,
+    /**
+     * CUPTI does not support NVIDIA Crypto Mining Processors (CMP).
+     * For more information, please visit https://developer.nvidia.com/ERR_NVCMPGPU
+    */
+    CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED = 42,
+    /**
+     * An unknown internal error has occurred.
+     */
+    CUPTI_ERROR_UNKNOWN                                  = 999,
+    CUPTI_ERROR_FORCE_INT                                = 0x7fffffff
+} CUptiResult;
+/**
+ * \brief Get the descriptive string for a CUptiResult.
+ *
+ * Return the descriptive string for a CUptiResult in \p *str.
+ * \note \b Thread-safety: this function is thread safe.
+ *
+ * \param result The result to get the string for
+ * \param str Returns the string
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p str is NULL or \p
+ * result is not a valid CUptiResult
+ */
+CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, const char **str);
+/** @} */ /* END CUPTI_RESULT_API */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif /*_CUPTI_RESULT_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h ADDED Viewed

	@@ -0,0 +1,447 @@

+// *************************************************************************
+//      Definitions of indices for API functions, unique across entire API
+// *************************************************************************
+// This file is generated.  Any changes you make will be lost during the next clean build.
+// CUDA public interface, for type definitions and cu* function prototypes
+typedef enum CUpti_runtime_api_trace_cbid_enum {
+    CUPTI_RUNTIME_TRACE_CBID_INVALID                                                       = 0,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDriverGetVersion_v3020                                    = 1,
+    CUPTI_RUNTIME_TRACE_CBID_cudaRuntimeGetVersion_v3020                                   = 2,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceCount_v3020                                      = 3,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v3020                                 = 4,
+    CUPTI_RUNTIME_TRACE_CBID_cudaChooseDevice_v3020                                        = 5,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetChannelDesc_v3020                                      = 6,
+    CUPTI_RUNTIME_TRACE_CBID_cudaCreateChannelDesc_v3020                                   = 7,
+    CUPTI_RUNTIME_TRACE_CBID_cudaConfigureCall_v3020                                       = 8,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSetupArgument_v3020                                       = 9,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020                                        = 10,
+    CUPTI_RUNTIME_TRACE_CBID_cudaPeekAtLastError_v3020                                     = 11,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorString_v3020                                      = 12,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020                                              = 13,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetCacheConfig_v3020                                  = 14,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetAttributes_v3020                                   = 15,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020                                           = 16,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020                                           = 17,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSetValidDevices_v3020                                     = 18,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSetDeviceFlags_v3020                                      = 19,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020                                              = 20,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocPitch_v3020                                         = 21,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020                                                = 22,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocArray_v3020                                         = 23,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFreeArray_v3020                                           = 24,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocHost_v3020                                          = 25,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFreeHost_v3020                                            = 26,
+    CUPTI_RUNTIME_TRACE_CBID_cudaHostAlloc_v3020                                           = 27,
+    CUPTI_RUNTIME_TRACE_CBID_cudaHostGetDevicePointer_v3020                                = 28,
+    CUPTI_RUNTIME_TRACE_CBID_cudaHostGetFlags_v3020                                        = 29,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemGetInfo_v3020                                          = 30,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020                                              = 31,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_v3020                                            = 32,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_v3020                                       = 33,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_v3020                                     = 34,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_v3020                                     = 35,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_v3020                                   = 36,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_v3020                                  = 37,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_v3020                                = 38,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_v3020                                      = 39,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_v3020                                    = 40,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020                                         = 41,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_v3020                                  = 42,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_v3020                                = 43,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_v3020                                       = 44,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_v3020                                = 45,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_v3020                              = 46,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_v3020                                 = 47,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_v3020                               = 48,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020                                              = 49,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_v3020                                            = 50,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020                                         = 51,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_v3020                                       = 52,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolAddress_v3020                                    = 53,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolSize_v3020                                       = 54,
+    CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture_v3020                                         = 55,
+    CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture2D_v3020                                       = 56,
+    CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToArray_v3020                                  = 57,
+    CUPTI_RUNTIME_TRACE_CBID_cudaUnbindTexture_v3020                                       = 58,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureAlignmentOffset_v3020                           = 59,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureReference_v3020                                 = 60,
+    CUPTI_RUNTIME_TRACE_CBID_cudaBindSurfaceToArray_v3020                                  = 61,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceReference_v3020                                 = 62,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLSetGLDevice_v3020                                       = 63,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLRegisterBufferObject_v3020                              = 64,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObject_v3020                                   = 65,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObject_v3020                                 = 66,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLUnregisterBufferObject_v3020                            = 67,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLSetBufferObjectMapFlags_v3020                           = 68,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObjectAsync_v3020                              = 69,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObjectAsync_v3020                            = 70,
+    CUPTI_RUNTIME_TRACE_CBID_cudaWGLGetDevice_v3020                                        = 71,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterImage_v3020                             = 72,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterBuffer_v3020                            = 73,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnregisterResource_v3020                          = 74,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceSetMapFlags_v3020                         = 75,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsMapResources_v3020                                = 76,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnmapResources_v3020                              = 77,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedPointer_v3020                    = 78,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsSubResourceGetMappedArray_v3020                   = 79,
+    CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUGetDevice_v3020                                      = 80,
+    CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUSetVDPAUDevice_v3020                                 = 81,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterVideoSurface_v3020                   = 82,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterOutputSurface_v3020                  = 83,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevice_v3020                                      = 84,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevices_v3020                                     = 85,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D11SetDirect3DDevice_v3020                              = 86,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D11RegisterResource_v3020                       = 87,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevice_v3020                                      = 88,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevices_v3020                                     = 89,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10SetDirect3DDevice_v3020                              = 90,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D10RegisterResource_v3020                       = 91,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10RegisterResource_v3020                               = 92,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnregisterResource_v3020                             = 93,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10MapResources_v3020                                   = 94,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnmapResources_v3020                                 = 95,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceSetMapFlags_v3020                            = 96,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetSurfaceDimensions_v3020                   = 97,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedArray_v3020                         = 98,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPointer_v3020                       = 99,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedSize_v3020                          = 100,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPitch_v3020                         = 101,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevice_v3020                                       = 102,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevices_v3020                                      = 103,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9SetDirect3DDevice_v3020                               = 104,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDirect3DDevice_v3020                               = 105,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D9RegisterResource_v3020                        = 106,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterResource_v3020                                = 107,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterResource_v3020                              = 108,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapResources_v3020                                    = 109,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapResources_v3020                                  = 110,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceSetMapFlags_v3020                             = 111,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetSurfaceDimensions_v3020                    = 112,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedArray_v3020                          = 113,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPointer_v3020                        = 114,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedSize_v3020                           = 115,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPitch_v3020                          = 116,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9Begin_v3020                                           = 117,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9End_v3020                                             = 118,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterVertexBuffer_v3020                            = 119,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterVertexBuffer_v3020                          = 120,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapVertexBuffer_v3020                                 = 121,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapVertexBuffer_v3020                               = 122,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadExit_v3020                                          = 123,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForDevice_v3020                                  = 124,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForHost_v3020                                    = 125,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020                                   = 126,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetLimit_v3020                                      = 127,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetLimit_v3020                                      = 128,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreate_v3020                                        = 129,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v3020                                       = 130,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020                                   = 131,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_v3020                                         = 132,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020                                         = 133,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020                                = 134,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020                                         = 135,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020                                        = 136,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020                                    = 137,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020                                          = 138,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v3020                                    = 139,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3D_v3020                                            = 140,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3DArray_v3020                                       = 141,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_v3020                                            = 142,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_v3020                                       = 143,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_v3020                                            = 144,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_v3020                                       = 145,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetCacheConfig_v3020                                = 146,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020                                     = 147,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDirect3DDevice_v3020                              = 148,
+    CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDirect3DDevice_v3020                              = 149,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetCacheConfig_v3020                                = 150,
+    CUPTI_RUNTIME_TRACE_CBID_cudaPointerGetAttributes_v4000                                = 151,
+    CUPTI_RUNTIME_TRACE_CBID_cudaHostRegister_v4000                                        = 152,
+    CUPTI_RUNTIME_TRACE_CBID_cudaHostUnregister_v4000                                      = 153,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceCanAccessPeer_v4000                                 = 154,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceEnablePeerAccess_v4000                              = 155,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceDisablePeerAccess_v4000                             = 156,
+    CUPTI_RUNTIME_TRACE_CBID_cudaPeerRegister_v4000                                        = 157,
+    CUPTI_RUNTIME_TRACE_CBID_cudaPeerUnregister_v4000                                      = 158,
+    CUPTI_RUNTIME_TRACE_CBID_cudaPeerGetDevicePointer_v4000                                = 159,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeer_v4000                                          = 160,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeerAsync_v4000                                     = 161,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_v4000                                        = 162,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_v4000                                   = 163,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceReset_v3020                                         = 164,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020                                   = 165,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetLimit_v3020                                      = 166,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetLimit_v3020                                      = 167,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetCacheConfig_v3020                                = 168,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetCacheConfig_v3020                                = 169,
+    CUPTI_RUNTIME_TRACE_CBID_cudaProfilerInitialize_v4000                                  = 170,
+    CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStart_v4000                                       = 171,
+    CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStop_v4000                                        = 172,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetByPCIBusId_v4010                                 = 173,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetPCIBusId_v4010                                   = 174,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGLGetDevices_v4010                                        = 175,
+    CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetEventHandle_v4010                                   = 176,
+    CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenEventHandle_v4010                                  = 177,
+    CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetMemHandle_v4010                                     = 178,
+    CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenMemHandle_v4010                                    = 179,
+    CUPTI_RUNTIME_TRACE_CBID_cudaIpcCloseMemHandle_v4010                                   = 180,
+    CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetInfo_v4010                                        = 181,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetSharedMemConfig_v4020                              = 182,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetSharedMemConfig_v4020                            = 183,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetSharedMemConfig_v4020                            = 184,
+    CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v5000                                 = 185,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDestroyTextureObject_v5000                                = 186,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceDesc_v5000                        = 187,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v5000                         = 188,
+    CUPTI_RUNTIME_TRACE_CBID_cudaCreateSurfaceObject_v5000                                 = 189,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDestroySurfaceObject_v5000                                = 190,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceObjectResourceDesc_v5000                        = 191,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocMipmappedArray_v5000                                = 192,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetMipmappedArrayLevel_v5000                              = 193,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFreeMipmappedArray_v5000                                  = 194,
+    CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToMipmappedArray_v5000                         = 195,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedMipmappedArray_v5000             = 196,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_v5000                                   = 197,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithFlags_v5000                               = 198,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceViewDesc_v5000                    = 199,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetAttribute_v5000                                  = 200,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v5050                                       = 201,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithPriority_v5050                            = 202,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_v5050                                   = 203,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_v5050                                      = 204,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetStreamPriorityRange_v5050                        = 205,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocManaged_v6000                                       = 206,
+    CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000           = 207,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_v6000                                = 208,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorName_v6050                                        = 209,
+    CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050           = 210,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000                                        = 211,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceFlags_v7000                                      = 212,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_ptsz_v7000                                         = 213,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000                                   = 214,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_ptds_v7000                                         = 215,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_ptds_v7000                                       = 216,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_ptds_v7000                                  = 217,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_ptds_v7000                                = 218,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_ptds_v7000                                = 219,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_ptds_v7000                              = 220,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_ptds_v7000                             = 221,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_ptds_v7000                           = 222,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_ptds_v7000                                 = 223,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_ptds_v7000                               = 224,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_ptsz_v7000                                    = 225,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_ptsz_v7000                             = 226,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_ptsz_v7000                           = 227,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_ptsz_v7000                                  = 228,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_ptsz_v7000                           = 229,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_ptsz_v7000                         = 230,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_ptsz_v7000                            = 231,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_ptsz_v7000                          = 232,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset_ptds_v7000                                         = 233,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_ptds_v7000                                       = 234,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_ptsz_v7000                                    = 235,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_ptsz_v7000                                  = 236,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_ptsz_v7000                              = 237,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_ptsz_v7000                                 = 238,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_ptsz_v7000                              = 239,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_ptsz_v7000                                    = 240,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_ptsz_v7000                           = 241,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_ptsz_v7000                                    = 242,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_ptds_v7000                                       = 243,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_ptsz_v7000                                  = 244,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_ptds_v7000                                       = 245,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_ptsz_v7000                                  = 246,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_ptsz_v7000                                = 247,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_ptsz_v7000                              = 248,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_ptds_v7000                                   = 249,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_ptsz_v7000                              = 250,
+    CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000  = 251,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v8000                                    = 252,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_ptsz_v8000                               = 253,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v8000                                           = 254,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetP2PAttribute_v8000                               = 255,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsEGLRegisterImage_v7000                            = 256,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnect_v7000                            = 257,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerDisconnect_v7000                         = 258,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerAcquireFrame_v7000                       = 259,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerReleaseFrame_v7000                       = 260,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerConnect_v7000                            = 261,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerDisconnect_v7000                         = 262,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerPresentFrame_v7000                       = 263,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerReturnFrame_v7000                        = 264,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedEglFrame_v7000                   = 265,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttribute_v8000                                = 266,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttributes_v8000                               = 267,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnectWithFlags_v7000                   = 268,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000                             = 269,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_ptsz_v9000                        = 270,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateFromEGLSync_v9000                              = 271,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000                  = 272,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetAttribute_v9000                                    = 273,
+    CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalMemory_v10000                               = 274,
+    CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedBuffer_v10000                      = 275,
+    CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedMipmappedArray_v10000              = 276,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalMemory_v10000                              = 277,
+    CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalSemaphore_v10000                            = 278,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v10000                      = 279,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_ptsz_v10000                 = 280,
+    CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v10000                        = 281,
+    CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_ptsz_v10000                   = 282,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalSemaphore_v10000                           = 283,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_v10000                                     = 284,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_ptsz_v10000                                = 285,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphCreate_v10000                                        = 286,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetParams_v10000                           = 287,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetParams_v10000                           = 288,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddKernelNode_v10000                                 = 289,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode_v10000                                 = 290,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeGetParams_v10000                           = 291,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams_v10000                           = 292,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemsetNode_v10000                                 = 293,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeGetParams_v10000                           = 294,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeSetParams_v10000                           = 295,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddHostNode_v10000                                   = 296,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeGetParams_v10000                             = 297,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddChildGraphNode_v10000                             = 298,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphChildGraphNodeGetGraph_v10000                        = 299,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEmptyNode_v10000                                  = 300,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphClone_v10000                                         = 301,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeFindInClone_v10000                               = 302,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetType_v10000                                   = 303,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetRootNodes_v10000                                  = 304,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v10000                           = 305,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v10000                         = 306,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v10000                               = 307,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v10000                            = 308,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroyNode_v10000                                   = 309,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v10000                                   = 310,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_v10000                                        = 311,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000                                   = 312,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecDestroy_v10000                                   = 313,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroy_v10000                                       = 314,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_v10000                                 = 315,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_ptsz_v10000                            = 316,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_v10000                                  = 317,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_ptsz_v10000                             = 318,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_v10000                                   = 319,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_ptsz_v10000                              = 320,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeSetParams_v10000                             = 321,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetNodes_v10000                                      = 322,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v10000                                      = 323,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v10010                               = 324,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_ptsz_v10010                          = 325,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecKernelNodeSetParams_v10010                       = 326,
+    CUPTI_RUNTIME_TRACE_CBID_cudaThreadExchangeStreamCaptureMode_v10010                    = 327,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetNvSciSyncAttributes_v10020                       = 328,
+    CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyAvailableDynamicSMemPerBlock_v10200              = 329,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_v10200                                     = 330,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_ptsz_v10200                                = 331,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams_v10020                       = 332,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemsetNodeSetParams_v10020                       = 333,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecHostNodeSetParams_v10020                         = 334,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecUpdate_v10020                                    = 335,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetFuncBySymbol_v11000                                    = 336,
+    CUPTI_RUNTIME_TRACE_CBID_cudaCtxResetPersistingL2Cache_v11000                          = 337,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeCopyAttributes_v11000                      = 338,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetAttribute_v11000                        = 339,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetAttribute_v11000                        = 340,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_v11000                               = 341,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_ptsz_v11000                          = 342,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_v11000                                 = 343,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_ptsz_v11000                            = 344,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_v11000                                 = 345,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_ptsz_v11000                            = 346,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetTexture1DLinearMaxWidth_v11010                   = 347,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_v10000                                        = 348,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_ptsz_v10000                                   = 349,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeToSymbol_v11010                         = 350,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeFromSymbol_v11010                       = 351,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode1D_v11010                               = 352,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsToSymbol_v11010                   = 353,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsFromSymbol_v11010                 = 354,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams1D_v11010                         = 355,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010               = 356,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010             = 357,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams1D_v11010                     = 358,
+    CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetSparseProperties_v11010                           = 359,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetSparseProperties_v11010                  = 360,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecChildGraphNodeSetParams_v11010                   = 361,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventRecordNode_v11010                            = 362,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeGetEvent_v11010                       = 363,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeSetEvent_v11010                       = 364,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventWaitNode_v11010                              = 365,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeGetEvent_v11010                         = 366,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeSetEvent_v11010                         = 367,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventRecordNodeSetEvent_v11010                   = 368,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventWaitNodeSetEvent_v11010                     = 369,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_v11010                               = 370,
+    CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_ptsz_v11010                          = 371,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetDefaultMemPool_v11020                            = 372,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_v11020                                        = 373,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_ptsz_v11020                                   = 374,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_v11020                                          = 375,
+    CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_ptsz_v11020                                     = 376,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolTrimTo_v11020                                      = 377,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAttribute_v11020                                = 378,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAttribute_v11020                                = 379,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAccess_v11020                                   = 380,
+    CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetPlane_v11020                                      = 381,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAccess_v11020                                   = 382,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolCreate_v11020                                      = 383,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolDestroy_v11020                                     = 384,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetMemPool_v11020                                   = 385,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetMemPool_v11020                                   = 386,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportToShareableHandle_v11020                     = 387,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportFromShareableHandle_v11020                   = 388,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportPointer_v11020                               = 389,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportPointer_v11020                               = 390,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_v11020                                = 391,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_ptsz_v11020                           = 392,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_v11020                   = 393,
+    CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020              = 394,
+    CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_v11020                     = 395,
+    CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020                = 396,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresSignalNode_v11020               = 397,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeGetParams_v11020         = 398,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeSetParams_v11020         = 399,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresWaitNode_v11020                 = 400,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeGetParams_v11020           = 401,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeSetParams_v11020           = 402,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020     = 403,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020       = 404,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceFlushGPUDirectRDMAWrites_v11030                     = 405,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_v11030                                = 406,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_ptsz_v11030                           = 407,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphDebugDotPrint_v11030                                 = 408,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_v11030                            = 409,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_ptsz_v11030                       = 410,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v11030                    = 411,
+    CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_ptsz_v11030               = 412,
+    CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectCreate_v11030                                   = 413,
+    CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRetain_v11030                                   = 414,
+    CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRelease_v11030                                  = 415,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphRetainUserObject_v11030                              = 416,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphReleaseUserObject_v11030                             = 417,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithFlags_v11040                          = 418,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemAllocNode_v11040                               = 419,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemAllocNodeGetParams_v11040                         = 420,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemFreeNode_v11040                                = 421,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemFreeNodeGetParams_v11040                          = 422,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGraphMemTrim_v11040                                 = 423,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetGraphMemAttribute_v11040                         = 424,
+    CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetGraphMemAttribute_v11040                         = 425,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetEnabled_v11060                                = 426,
+    CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetEnabled_v11060                                = 427,
+    CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetMemoryRequirements_v11060                         = 428,
+    CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetMemoryRequirements_v11060                = 429,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060                                    = 430,
+    CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060                               = 431,
+    CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxPotentialClusterSize_v11070                   = 432,
+    CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveClusters_v11070                         = 433,
+    CUPTI_RUNTIME_TRACE_CBID_SIZE                                                          = 434,
+    CUPTI_RUNTIME_TRACE_CBID_FORCE_INT                                                     = 0x7fffffff
+} CUpti_runtime_api_trace_cbid;

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_target.h ADDED Viewed

	@@ -0,0 +1,43 @@

+#if !defined(_CUPTI_TARGET_H_)
+#define _CUPTI_TARGET_H_
+/*
+CUPTI profiler target API's
+This file contains the CUPTI profiling API's.
+*/
+#include <cupti_result.h>
+#include <stddef.h>
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+#ifndef CUPTI_PROFILER_STRUCT_SIZE
+#define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_)                     (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
+#endif
+typedef struct CUpti_Device_GetChipName_Params
+{
+    size_t structSize;                                      //!< [in]
+    void* pPriv;                                            //!< [in] assign to NULL
+    size_t deviceIndex;                                     //!< [in]
+    const char* pChipName;                                  //!< [out]
+} CUpti_Device_GetChipName_Params;
+#define CUpti_Device_GetChipName_Params_STRUCT_SIZE                  CUPTI_PROFILER_STRUCT_SIZE(CUpti_Device_GetChipName_Params, pChipName)
+CUptiResult CUPTIAPI cuptiDeviceGetChipName(CUpti_Device_GetChipName_Params *pParams);
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+#endif

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_version.h ADDED Viewed

	@@ -0,0 +1,130 @@

+/*
+ * Copyright 2010-2018 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+#if !defined(_CUPTI_VERSION_H_)
+#define _CUPTI_VERSION_H_
+#include <cuda_stdint.h>
+#include <cupti_result.h>
+#ifndef CUPTIAPI
+#ifdef _WIN32
+#define CUPTIAPI __stdcall
+#else
+#define CUPTIAPI
+#endif
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility push(default)
+#endif
+/**
+ * \defgroup CUPTI_VERSION_API CUPTI Version
+ * Function and macro to determine the CUPTI version.
+ * @{
+ */
+/**
+ * \brief The API version for this implementation of CUPTI.
+ *
+ * The API version for this implementation of CUPTI. This define along
+ * with \ref cuptiGetVersion can be used to dynamically detect if the
+ * version of CUPTI compiled against matches the version of the loaded
+ * CUPTI library.
+ *
+ * v1 : CUDAToolsSDK 4.0
+ * v2 : CUDAToolsSDK 4.1
+ * v3 : CUDA Toolkit 5.0
+ * v4 : CUDA Toolkit 5.5
+ * v5 : CUDA Toolkit 6.0
+ * v6 : CUDA Toolkit 6.5
+ * v7 : CUDA Toolkit 6.5(with sm_52 support)
+ * v8 : CUDA Toolkit 7.0
+ * v9 : CUDA Toolkit 8.0
+ * v10 : CUDA Toolkit 9.0
+ * v11 : CUDA Toolkit 9.1
+ * v12 : CUDA Toolkit 10.0, 10.1 and 10.2
+ * v13 : CUDA Toolkit 11.0
+ * v14 : CUDA Toolkit 11.1
+ * v15 : CUDA Toolkit 11.2, 11.3 and 11.4
+ * v16 : CUDA Toolkit 11.5
+ * v17 : CUDA Toolkit 11.6
+ * v18 : CUDA Toolkit 11.8
+ */
+#define CUPTI_API_VERSION 18
+/**
+ * \brief Get the CUPTI API version.
+ *
+ * Return the API version in \p *version.
+ *
+ * \param version Returns the version
+ *
+ * \retval CUPTI_SUCCESS on success
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p version is NULL
+ * \sa CUPTI_API_VERSION
+ */
+CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version);
+/** @} */ /* END CUPTI_VERSION_API */
+#if defined(__GNUC__) && defined(CUPTI_LIB)
+    #pragma GCC visibility pop
+#endif
+#if defined(__cplusplus)
+}
+#endif
+#endif /*_CUPTI_VERSION_H_*/

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h ADDED Viewed

	@@ -0,0 +1,2941 @@

+// This file is generated.  Any changes you make will be lost during the next clean build.
+// No dependent includes
+// CUDA public interface, for type definitions and cu* function prototypes
+#include "cuda.h"
+// *************************************************************************
+//      Definitions of structs to hold parameters for each function
+// *************************************************************************
+typedef struct cuGetErrorString_params_st {
+    CUresult error;
+    const char **pStr;
+} cuGetErrorString_params;
+typedef struct cuGetErrorName_params_st {
+    CUresult error;
+    const char **pStr;
+} cuGetErrorName_params;
+typedef struct cuInit_params_st {
+    unsigned int Flags;
+} cuInit_params;
+typedef struct cuDriverGetVersion_params_st {
+    int *driverVersion;
+} cuDriverGetVersion_params;
+typedef struct cuDeviceGet_params_st {
+    CUdevice *device;
+    int ordinal;
+} cuDeviceGet_params;
+typedef struct cuDeviceGetCount_params_st {
+    int *count;
+} cuDeviceGetCount_params;
+typedef struct cuDeviceGetName_params_st {
+    char *name;
+    int len;
+    CUdevice dev;
+} cuDeviceGetName_params;
+typedef struct cuDeviceGetUuid_params_st {
+    CUuuid *uuid;
+    CUdevice dev;
+} cuDeviceGetUuid_params;
+typedef struct cuDeviceGetUuid_v2_params_st {
+    CUuuid *uuid;
+    CUdevice dev;
+} cuDeviceGetUuid_v2_params;
+typedef struct cuDeviceGetLuid_params_st {
+    char *luid;
+    unsigned int *deviceNodeMask;
+    CUdevice dev;
+} cuDeviceGetLuid_params;
+typedef struct cuDeviceTotalMem_v2_params_st {
+    size_t *bytes;
+    CUdevice dev;
+} cuDeviceTotalMem_v2_params;
+typedef struct cuDeviceGetTexture1DLinearMaxWidth_params_st {
+    size_t *maxWidthInElements;
+    CUarray_format format;
+    unsigned numChannels;
+    CUdevice dev;
+} cuDeviceGetTexture1DLinearMaxWidth_params;
+typedef struct cuDeviceGetAttribute_params_st {
+    int *pi;
+    CUdevice_attribute attrib;
+    CUdevice dev;
+} cuDeviceGetAttribute_params;
+typedef struct cuDeviceGetNvSciSyncAttributes_params_st {
+    void *nvSciSyncAttrList;
+    CUdevice dev;
+    int flags;
+} cuDeviceGetNvSciSyncAttributes_params;
+typedef struct cuDeviceSetMemPool_params_st {
+    CUdevice dev;
+    CUmemoryPool pool;
+} cuDeviceSetMemPool_params;
+typedef struct cuDeviceGetMemPool_params_st {
+    CUmemoryPool *pool;
+    CUdevice dev;
+} cuDeviceGetMemPool_params;
+typedef struct cuDeviceGetDefaultMemPool_params_st {
+    CUmemoryPool *pool_out;
+    CUdevice dev;
+} cuDeviceGetDefaultMemPool_params;
+typedef struct cuFlushGPUDirectRDMAWrites_params_st {
+    CUflushGPUDirectRDMAWritesTarget target;
+    CUflushGPUDirectRDMAWritesScope scope;
+} cuFlushGPUDirectRDMAWrites_params;
+typedef struct cuDeviceGetProperties_params_st {
+    CUdevprop *prop;
+    CUdevice dev;
+} cuDeviceGetProperties_params;
+typedef struct cuDeviceComputeCapability_params_st {
+    int *major;
+    int *minor;
+    CUdevice dev;
+} cuDeviceComputeCapability_params;
+typedef struct cuDevicePrimaryCtxRetain_params_st {
+    CUcontext *pctx;
+    CUdevice dev;
+} cuDevicePrimaryCtxRetain_params;
+typedef struct cuDevicePrimaryCtxRelease_v2_params_st {
+    CUdevice dev;
+} cuDevicePrimaryCtxRelease_v2_params;
+typedef struct cuDevicePrimaryCtxSetFlags_v2_params_st {
+    CUdevice dev;
+    unsigned int flags;
+} cuDevicePrimaryCtxSetFlags_v2_params;
+typedef struct cuDevicePrimaryCtxGetState_params_st {
+    CUdevice dev;
+    unsigned int *flags;
+    int *active;
+} cuDevicePrimaryCtxGetState_params;
+typedef struct cuDevicePrimaryCtxReset_v2_params_st {
+    CUdevice dev;
+} cuDevicePrimaryCtxReset_v2_params;
+typedef struct cuDeviceGetExecAffinitySupport_params_st {
+    int *pi;
+    CUexecAffinityType type;
+    CUdevice dev;
+} cuDeviceGetExecAffinitySupport_params;
+typedef struct cuCtxCreate_v2_params_st {
+    CUcontext *pctx;
+    unsigned int flags;
+    CUdevice dev;
+} cuCtxCreate_v2_params;
+typedef struct cuCtxCreate_v3_params_st {
+    CUcontext *pctx;
+    CUexecAffinityParam *paramsArray;
+    int numParams;
+    unsigned int flags;
+    CUdevice dev;
+} cuCtxCreate_v3_params;
+typedef struct cuCtxDestroy_v2_params_st {
+    CUcontext ctx;
+} cuCtxDestroy_v2_params;
+typedef struct cuCtxPushCurrent_v2_params_st {
+    CUcontext ctx;
+} cuCtxPushCurrent_v2_params;
+typedef struct cuCtxPopCurrent_v2_params_st {
+    CUcontext *pctx;
+} cuCtxPopCurrent_v2_params;
+typedef struct cuCtxSetCurrent_params_st {
+    CUcontext ctx;
+} cuCtxSetCurrent_params;
+typedef struct cuCtxGetCurrent_params_st {
+    CUcontext *pctx;
+} cuCtxGetCurrent_params;
+typedef struct cuCtxGetDevice_params_st {
+    CUdevice *device;
+} cuCtxGetDevice_params;
+typedef struct cuCtxGetFlags_params_st {
+    unsigned int *flags;
+} cuCtxGetFlags_params;
+typedef struct cuCtxSetLimit_params_st {
+    CUlimit limit;
+    size_t value;
+} cuCtxSetLimit_params;
+typedef struct cuCtxGetLimit_params_st {
+    size_t *pvalue;
+    CUlimit limit;
+} cuCtxGetLimit_params;
+typedef struct cuCtxGetCacheConfig_params_st {
+    CUfunc_cache *pconfig;
+} cuCtxGetCacheConfig_params;
+typedef struct cuCtxSetCacheConfig_params_st {
+    CUfunc_cache config;
+} cuCtxSetCacheConfig_params;
+typedef struct cuCtxGetSharedMemConfig_params_st {
+    CUsharedconfig *pConfig;
+} cuCtxGetSharedMemConfig_params;
+typedef struct cuCtxSetSharedMemConfig_params_st {
+    CUsharedconfig config;
+} cuCtxSetSharedMemConfig_params;
+typedef struct cuCtxGetApiVersion_params_st {
+    CUcontext ctx;
+    unsigned int *version;
+} cuCtxGetApiVersion_params;
+typedef struct cuCtxGetStreamPriorityRange_params_st {
+    int *leastPriority;
+    int *greatestPriority;
+} cuCtxGetStreamPriorityRange_params;
+typedef struct cuCtxGetExecAffinity_params_st {
+    CUexecAffinityParam *pExecAffinity;
+    CUexecAffinityType type;
+} cuCtxGetExecAffinity_params;
+typedef struct cuCtxAttach_params_st {
+    CUcontext *pctx;
+    unsigned int flags;
+} cuCtxAttach_params;
+typedef struct cuCtxDetach_params_st {
+    CUcontext ctx;
+} cuCtxDetach_params;
+typedef struct cuModuleLoad_params_st {
+    CUmodule *module;
+    const char *fname;
+} cuModuleLoad_params;
+typedef struct cuModuleLoadData_params_st {
+    CUmodule *module;
+    const void *image;
+} cuModuleLoadData_params;
+typedef struct cuModuleLoadDataEx_params_st {
+    CUmodule *module;
+    const void *image;
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+} cuModuleLoadDataEx_params;
+typedef struct cuModuleLoadFatBinary_params_st {
+    CUmodule *module;
+    const void *fatCubin;
+} cuModuleLoadFatBinary_params;
+typedef struct cuModuleUnload_params_st {
+    CUmodule hmod;
+} cuModuleUnload_params;
+typedef struct cuModuleGetFunction_params_st {
+    CUfunction *hfunc;
+    CUmodule hmod;
+    const char *name;
+} cuModuleGetFunction_params;
+typedef struct cuModuleGetGlobal_v2_params_st {
+    CUdeviceptr *dptr;
+    size_t *bytes;
+    CUmodule hmod;
+    const char *name;
+} cuModuleGetGlobal_v2_params;
+typedef struct cuModuleGetTexRef_params_st {
+    CUtexref *pTexRef;
+    CUmodule hmod;
+    const char *name;
+} cuModuleGetTexRef_params;
+typedef struct cuModuleGetSurfRef_params_st {
+    CUsurfref *pSurfRef;
+    CUmodule hmod;
+    const char *name;
+} cuModuleGetSurfRef_params;
+typedef struct cuLinkCreate_v2_params_st {
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+    CUlinkState *stateOut;
+} cuLinkCreate_v2_params;
+typedef struct cuLinkAddData_v2_params_st {
+    CUlinkState state;
+    CUjitInputType type;
+    void *data;
+    size_t size;
+    const char *name;
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+} cuLinkAddData_v2_params;
+typedef struct cuLinkAddFile_v2_params_st {
+    CUlinkState state;
+    CUjitInputType type;
+    const char *path;
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+} cuLinkAddFile_v2_params;
+typedef struct cuLinkComplete_params_st {
+    CUlinkState state;
+    void **cubinOut;
+    size_t *sizeOut;
+} cuLinkComplete_params;
+typedef struct cuLinkDestroy_params_st {
+    CUlinkState state;
+} cuLinkDestroy_params;
+typedef struct cuMemGetInfo_v2_params_st {
+    size_t *free;
+    size_t *total;
+} cuMemGetInfo_v2_params;
+typedef struct cuMemAlloc_v2_params_st {
+    CUdeviceptr *dptr;
+    size_t bytesize;
+} cuMemAlloc_v2_params;
+typedef struct cuMemAllocPitch_v2_params_st {
+    CUdeviceptr *dptr;
+    size_t *pPitch;
+    size_t WidthInBytes;
+    size_t Height;
+    unsigned int ElementSizeBytes;
+} cuMemAllocPitch_v2_params;
+typedef struct cuMemFree_v2_params_st {
+    CUdeviceptr dptr;
+} cuMemFree_v2_params;
+typedef struct cuMemGetAddressRange_v2_params_st {
+    CUdeviceptr *pbase;
+    size_t *psize;
+    CUdeviceptr dptr;
+} cuMemGetAddressRange_v2_params;
+typedef struct cuMemAllocHost_v2_params_st {
+    void **pp;
+    size_t bytesize;
+} cuMemAllocHost_v2_params;
+typedef struct cuMemFreeHost_params_st {
+    void *p;
+} cuMemFreeHost_params;
+typedef struct cuMemHostAlloc_params_st {
+    void **pp;
+    size_t bytesize;
+    unsigned int Flags;
+} cuMemHostAlloc_params;
+typedef struct cuMemHostGetDevicePointer_v2_params_st {
+    CUdeviceptr *pdptr;
+    void *p;
+    unsigned int Flags;
+} cuMemHostGetDevicePointer_v2_params;
+typedef struct cuMemHostGetFlags_params_st {
+    unsigned int *pFlags;
+    void *p;
+} cuMemHostGetFlags_params;
+typedef struct cuMemAllocManaged_params_st {
+    CUdeviceptr *dptr;
+    size_t bytesize;
+    unsigned int flags;
+} cuMemAllocManaged_params;
+typedef struct cuDeviceGetByPCIBusId_params_st {
+    CUdevice *dev;
+    const char *pciBusId;
+} cuDeviceGetByPCIBusId_params;
+typedef struct cuDeviceGetPCIBusId_params_st {
+    char *pciBusId;
+    int len;
+    CUdevice dev;
+} cuDeviceGetPCIBusId_params;
+typedef struct cuIpcGetEventHandle_params_st {
+    CUipcEventHandle *pHandle;
+    CUevent event;
+} cuIpcGetEventHandle_params;
+typedef struct cuIpcOpenEventHandle_params_st {
+    CUevent *phEvent;
+    CUipcEventHandle handle;
+} cuIpcOpenEventHandle_params;
+typedef struct cuIpcGetMemHandle_params_st {
+    CUipcMemHandle *pHandle;
+    CUdeviceptr dptr;
+} cuIpcGetMemHandle_params;
+typedef struct cuIpcOpenMemHandle_v2_params_st {
+    CUdeviceptr *pdptr;
+    CUipcMemHandle handle;
+    unsigned int Flags;
+} cuIpcOpenMemHandle_v2_params;
+typedef struct cuIpcCloseMemHandle_params_st {
+    CUdeviceptr dptr;
+} cuIpcCloseMemHandle_params;
+typedef struct cuMemHostRegister_v2_params_st {
+    void *p;
+    size_t bytesize;
+    unsigned int Flags;
+} cuMemHostRegister_v2_params;
+typedef struct cuMemHostUnregister_params_st {
+    void *p;
+} cuMemHostUnregister_params;
+typedef struct cuMemcpy_ptds_params_st {
+    CUdeviceptr dst;
+    CUdeviceptr src;
+    size_t ByteCount;
+} cuMemcpy_ptds_params;
+typedef struct cuMemcpyPeer_ptds_params_st {
+    CUdeviceptr dstDevice;
+    CUcontext dstContext;
+    CUdeviceptr srcDevice;
+    CUcontext srcContext;
+    size_t ByteCount;
+} cuMemcpyPeer_ptds_params;
+typedef struct cuMemcpyHtoD_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    const void *srcHost;
+    size_t ByteCount;
+} cuMemcpyHtoD_v2_ptds_params;
+typedef struct cuMemcpyDtoH_v2_ptds_params_st {
+    void *dstHost;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+} cuMemcpyDtoH_v2_ptds_params;
+typedef struct cuMemcpyDtoD_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+} cuMemcpyDtoD_v2_ptds_params;
+typedef struct cuMemcpyDtoA_v2_ptds_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+} cuMemcpyDtoA_v2_ptds_params;
+typedef struct cuMemcpyAtoD_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+} cuMemcpyAtoD_v2_ptds_params;
+typedef struct cuMemcpyHtoA_v2_ptds_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    const void *srcHost;
+    size_t ByteCount;
+} cuMemcpyHtoA_v2_ptds_params;
+typedef struct cuMemcpyAtoH_v2_ptds_params_st {
+    void *dstHost;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+} cuMemcpyAtoH_v2_ptds_params;
+typedef struct cuMemcpyAtoA_v2_ptds_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+} cuMemcpyAtoA_v2_ptds_params;
+typedef struct cuMemcpy2D_v2_ptds_params_st {
+    const CUDA_MEMCPY2D *pCopy;
+} cuMemcpy2D_v2_ptds_params;
+typedef struct cuMemcpy2DUnaligned_v2_ptds_params_st {
+    const CUDA_MEMCPY2D *pCopy;
+} cuMemcpy2DUnaligned_v2_ptds_params;
+typedef struct cuMemcpy3D_v2_ptds_params_st {
+    const CUDA_MEMCPY3D *pCopy;
+} cuMemcpy3D_v2_ptds_params;
+typedef struct cuMemcpy3DPeer_ptds_params_st {
+    const CUDA_MEMCPY3D_PEER *pCopy;
+} cuMemcpy3DPeer_ptds_params;
+typedef struct cuMemcpyAsync_ptsz_params_st {
+    CUdeviceptr dst;
+    CUdeviceptr src;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyAsync_ptsz_params;
+typedef struct cuMemcpyPeerAsync_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    CUcontext dstContext;
+    CUdeviceptr srcDevice;
+    CUcontext srcContext;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyPeerAsync_ptsz_params;
+typedef struct cuMemcpyHtoDAsync_v2_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    const void *srcHost;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyHtoDAsync_v2_ptsz_params;
+typedef struct cuMemcpyDtoHAsync_v2_ptsz_params_st {
+    void *dstHost;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyDtoHAsync_v2_ptsz_params;
+typedef struct cuMemcpyDtoDAsync_v2_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyDtoDAsync_v2_ptsz_params;
+typedef struct cuMemcpyHtoAAsync_v2_ptsz_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    const void *srcHost;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyHtoAAsync_v2_ptsz_params;
+typedef struct cuMemcpyAtoHAsync_v2_ptsz_params_st {
+    void *dstHost;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyAtoHAsync_v2_ptsz_params;
+typedef struct cuMemcpy2DAsync_v2_ptsz_params_st {
+    const CUDA_MEMCPY2D *pCopy;
+    CUstream hStream;
+} cuMemcpy2DAsync_v2_ptsz_params;
+typedef struct cuMemcpy3DAsync_v2_ptsz_params_st {
+    const CUDA_MEMCPY3D *pCopy;
+    CUstream hStream;
+} cuMemcpy3DAsync_v2_ptsz_params;
+typedef struct cuMemcpy3DPeerAsync_ptsz_params_st {
+    const CUDA_MEMCPY3D_PEER *pCopy;
+    CUstream hStream;
+} cuMemcpy3DPeerAsync_ptsz_params;
+typedef struct cuMemsetD8_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    unsigned char uc;
+    size_t N;
+} cuMemsetD8_v2_ptds_params;
+typedef struct cuMemsetD16_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    unsigned short us;
+    size_t N;
+} cuMemsetD16_v2_ptds_params;
+typedef struct cuMemsetD32_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    unsigned int ui;
+    size_t N;
+} cuMemsetD32_v2_ptds_params;
+typedef struct cuMemsetD2D8_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned char uc;
+    size_t Width;
+    size_t Height;
+} cuMemsetD2D8_v2_ptds_params;
+typedef struct cuMemsetD2D16_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned short us;
+    size_t Width;
+    size_t Height;
+} cuMemsetD2D16_v2_ptds_params;
+typedef struct cuMemsetD2D32_v2_ptds_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned int ui;
+    size_t Width;
+    size_t Height;
+} cuMemsetD2D32_v2_ptds_params;
+typedef struct cuMemsetD8Async_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    unsigned char uc;
+    size_t N;
+    CUstream hStream;
+} cuMemsetD8Async_ptsz_params;
+typedef struct cuMemsetD16Async_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    unsigned short us;
+    size_t N;
+    CUstream hStream;
+} cuMemsetD16Async_ptsz_params;
+typedef struct cuMemsetD32Async_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    unsigned int ui;
+    size_t N;
+    CUstream hStream;
+} cuMemsetD32Async_ptsz_params;
+typedef struct cuMemsetD2D8Async_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned char uc;
+    size_t Width;
+    size_t Height;
+    CUstream hStream;
+} cuMemsetD2D8Async_ptsz_params;
+typedef struct cuMemsetD2D16Async_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned short us;
+    size_t Width;
+    size_t Height;
+    CUstream hStream;
+} cuMemsetD2D16Async_ptsz_params;
+typedef struct cuMemsetD2D32Async_ptsz_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned int ui;
+    size_t Width;
+    size_t Height;
+    CUstream hStream;
+} cuMemsetD2D32Async_ptsz_params;
+typedef struct cuArrayCreate_v2_params_st {
+    CUarray *pHandle;
+    const CUDA_ARRAY_DESCRIPTOR *pAllocateArray;
+} cuArrayCreate_v2_params;
+typedef struct cuArrayGetDescriptor_v2_params_st {
+    CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor;
+    CUarray hArray;
+} cuArrayGetDescriptor_v2_params;
+typedef struct cuArrayGetSparseProperties_params_st {
+    CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties;
+    CUarray array;
+} cuArrayGetSparseProperties_params;
+typedef struct cuMipmappedArrayGetSparseProperties_params_st {
+    CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties;
+    CUmipmappedArray mipmap;
+} cuMipmappedArrayGetSparseProperties_params;
+typedef struct cuArrayGetMemoryRequirements_params_st {
+    CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements;
+    CUarray array;
+    CUdevice device;
+} cuArrayGetMemoryRequirements_params;
+typedef struct cuMipmappedArrayGetMemoryRequirements_params_st {
+    CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements;
+    CUmipmappedArray mipmap;
+    CUdevice device;
+} cuMipmappedArrayGetMemoryRequirements_params;
+typedef struct cuArrayGetPlane_params_st {
+    CUarray *pPlaneArray;
+    CUarray hArray;
+    unsigned int planeIdx;
+} cuArrayGetPlane_params;
+typedef struct cuArrayDestroy_params_st {
+    CUarray hArray;
+} cuArrayDestroy_params;
+typedef struct cuArray3DCreate_v2_params_st {
+    CUarray *pHandle;
+    const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray;
+} cuArray3DCreate_v2_params;
+typedef struct cuArray3DGetDescriptor_v2_params_st {
+    CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor;
+    CUarray hArray;
+} cuArray3DGetDescriptor_v2_params;
+typedef struct cuMipmappedArrayCreate_params_st {
+    CUmipmappedArray *pHandle;
+    const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc;
+    unsigned int numMipmapLevels;
+} cuMipmappedArrayCreate_params;
+typedef struct cuMipmappedArrayGetLevel_params_st {
+    CUarray *pLevelArray;
+    CUmipmappedArray hMipmappedArray;
+    unsigned int level;
+} cuMipmappedArrayGetLevel_params;
+typedef struct cuMipmappedArrayDestroy_params_st {
+    CUmipmappedArray hMipmappedArray;
+} cuMipmappedArrayDestroy_params;
+typedef struct cuMemAddressReserve_params_st {
+    CUdeviceptr *ptr;
+    size_t size;
+    size_t alignment;
+    CUdeviceptr addr;
+    unsigned long long flags;
+} cuMemAddressReserve_params;
+typedef struct cuMemAddressFree_params_st {
+    CUdeviceptr ptr;
+    size_t size;
+} cuMemAddressFree_params;
+typedef struct cuMemCreate_params_st {
+    CUmemGenericAllocationHandle *handle;
+    size_t size;
+    const CUmemAllocationProp *prop;
+    unsigned long long flags;
+} cuMemCreate_params;
+typedef struct cuMemRelease_params_st {
+    CUmemGenericAllocationHandle handle;
+} cuMemRelease_params;
+typedef struct cuMemMap_params_st {
+    CUdeviceptr ptr;
+    size_t size;
+    size_t offset;
+    CUmemGenericAllocationHandle handle;
+    unsigned long long flags;
+} cuMemMap_params;
+typedef struct cuMemMapArrayAsync_ptsz_params_st {
+    CUarrayMapInfo *mapInfoList;
+    unsigned int count;
+    CUstream hStream;
+} cuMemMapArrayAsync_ptsz_params;
+typedef struct cuMemUnmap_params_st {
+    CUdeviceptr ptr;
+    size_t size;
+} cuMemUnmap_params;
+typedef struct cuMemSetAccess_params_st {
+    CUdeviceptr ptr;
+    size_t size;
+    const CUmemAccessDesc *desc;
+    size_t count;
+} cuMemSetAccess_params;
+typedef struct cuMemGetAccess_params_st {
+    unsigned long long *flags;
+    const CUmemLocation *location;
+    CUdeviceptr ptr;
+} cuMemGetAccess_params;
+typedef struct cuMemExportToShareableHandle_params_st {
+    void *shareableHandle;
+    CUmemGenericAllocationHandle handle;
+    CUmemAllocationHandleType handleType;
+    unsigned long long flags;
+} cuMemExportToShareableHandle_params;
+typedef struct cuMemImportFromShareableHandle_params_st {
+    CUmemGenericAllocationHandle *handle;
+    void *osHandle;
+    CUmemAllocationHandleType shHandleType;
+} cuMemImportFromShareableHandle_params;
+typedef struct cuMemGetAllocationGranularity_params_st {
+    size_t *granularity;
+    const CUmemAllocationProp *prop;
+    CUmemAllocationGranularity_flags option;
+} cuMemGetAllocationGranularity_params;
+typedef struct cuMemGetAllocationPropertiesFromHandle_params_st {
+    CUmemAllocationProp *prop;
+    CUmemGenericAllocationHandle handle;
+} cuMemGetAllocationPropertiesFromHandle_params;
+typedef struct cuMemRetainAllocationHandle_params_st {
+    CUmemGenericAllocationHandle *handle;
+    void *addr;
+} cuMemRetainAllocationHandle_params;
+typedef struct cuMemFreeAsync_ptsz_params_st {
+    CUdeviceptr dptr;
+    CUstream hStream;
+} cuMemFreeAsync_ptsz_params;
+typedef struct cuMemAllocAsync_ptsz_params_st {
+    CUdeviceptr *dptr;
+    size_t bytesize;
+    CUstream hStream;
+} cuMemAllocAsync_ptsz_params;
+typedef struct cuMemPoolTrimTo_params_st {
+    CUmemoryPool pool;
+    size_t minBytesToKeep;
+} cuMemPoolTrimTo_params;
+typedef struct cuMemPoolSetAttribute_params_st {
+    CUmemoryPool pool;
+    CUmemPool_attribute attr;
+    void *value;
+} cuMemPoolSetAttribute_params;
+typedef struct cuMemPoolGetAttribute_params_st {
+    CUmemoryPool pool;
+    CUmemPool_attribute attr;
+    void *value;
+} cuMemPoolGetAttribute_params;
+typedef struct cuMemPoolSetAccess_params_st {
+    CUmemoryPool pool;
+    const CUmemAccessDesc *map;
+    size_t count;
+} cuMemPoolSetAccess_params;
+typedef struct cuMemPoolGetAccess_params_st {
+    CUmemAccess_flags *flags;
+    CUmemoryPool memPool;
+    CUmemLocation *location;
+} cuMemPoolGetAccess_params;
+typedef struct cuMemPoolCreate_params_st {
+    CUmemoryPool *pool;
+    const CUmemPoolProps *poolProps;
+} cuMemPoolCreate_params;
+typedef struct cuMemPoolDestroy_params_st {
+    CUmemoryPool pool;
+} cuMemPoolDestroy_params;
+typedef struct cuMemAllocFromPoolAsync_ptsz_params_st {
+    CUdeviceptr *dptr;
+    size_t bytesize;
+    CUmemoryPool pool;
+    CUstream hStream;
+} cuMemAllocFromPoolAsync_ptsz_params;
+typedef struct cuMemPoolExportToShareableHandle_params_st {
+    void *handle_out;
+    CUmemoryPool pool;
+    CUmemAllocationHandleType handleType;
+    unsigned long long flags;
+} cuMemPoolExportToShareableHandle_params;
+typedef struct cuMemPoolImportFromShareableHandle_params_st {
+    CUmemoryPool *pool_out;
+    void *handle;
+    CUmemAllocationHandleType handleType;
+    unsigned long long flags;
+} cuMemPoolImportFromShareableHandle_params;
+typedef struct cuMemPoolExportPointer_params_st {
+    CUmemPoolPtrExportData *shareData_out;
+    CUdeviceptr ptr;
+} cuMemPoolExportPointer_params;
+typedef struct cuMemPoolImportPointer_params_st {
+    CUdeviceptr *ptr_out;
+    CUmemoryPool pool;
+    CUmemPoolPtrExportData *shareData;
+} cuMemPoolImportPointer_params;
+typedef struct cuPointerGetAttribute_params_st {
+    void *data;
+    CUpointer_attribute attribute;
+    CUdeviceptr ptr;
+} cuPointerGetAttribute_params;
+typedef struct cuMemPrefetchAsync_ptsz_params_st {
+    CUdeviceptr devPtr;
+    size_t count;
+    CUdevice dstDevice;
+    CUstream hStream;
+} cuMemPrefetchAsync_ptsz_params;
+typedef struct cuMemAdvise_params_st {
+    CUdeviceptr devPtr;
+    size_t count;
+    CUmem_advise advice;
+    CUdevice device;
+} cuMemAdvise_params;
+typedef struct cuMemRangeGetAttribute_params_st {
+    void *data;
+    size_t dataSize;
+    CUmem_range_attribute attribute;
+    CUdeviceptr devPtr;
+    size_t count;
+} cuMemRangeGetAttribute_params;
+typedef struct cuMemRangeGetAttributes_params_st {
+    void **data;
+    size_t *dataSizes;
+    CUmem_range_attribute *attributes;
+    size_t numAttributes;
+    CUdeviceptr devPtr;
+    size_t count;
+} cuMemRangeGetAttributes_params;
+typedef struct cuPointerSetAttribute_params_st {
+    const void *value;
+    CUpointer_attribute attribute;
+    CUdeviceptr ptr;
+} cuPointerSetAttribute_params;
+typedef struct cuPointerGetAttributes_params_st {
+    unsigned int numAttributes;
+    CUpointer_attribute *attributes;
+    void **data;
+    CUdeviceptr ptr;
+} cuPointerGetAttributes_params;
+typedef struct cuStreamCreate_params_st {
+    CUstream *phStream;
+    unsigned int Flags;
+} cuStreamCreate_params;
+typedef struct cuStreamCreateWithPriority_params_st {
+    CUstream *phStream;
+    unsigned int flags;
+    int priority;
+} cuStreamCreateWithPriority_params;
+typedef struct cuStreamGetPriority_ptsz_params_st {
+    CUstream hStream;
+    int *priority;
+} cuStreamGetPriority_ptsz_params;
+typedef struct cuStreamGetFlags_ptsz_params_st {
+    CUstream hStream;
+    unsigned int *flags;
+} cuStreamGetFlags_ptsz_params;
+typedef struct cuStreamGetCtx_ptsz_params_st {
+    CUstream hStream;
+    CUcontext *pctx;
+} cuStreamGetCtx_ptsz_params;
+typedef struct cuStreamWaitEvent_ptsz_params_st {
+    CUstream hStream;
+    CUevent hEvent;
+    unsigned int Flags;
+} cuStreamWaitEvent_ptsz_params;
+typedef struct cuStreamAddCallback_ptsz_params_st {
+    CUstream hStream;
+    CUstreamCallback callback;
+    void *userData;
+    unsigned int flags;
+} cuStreamAddCallback_ptsz_params;
+typedef struct cuStreamBeginCapture_v2_ptsz_params_st {
+    CUstream hStream;
+    CUstreamCaptureMode mode;
+} cuStreamBeginCapture_v2_ptsz_params;
+typedef struct cuThreadExchangeStreamCaptureMode_params_st {
+    CUstreamCaptureMode *mode;
+} cuThreadExchangeStreamCaptureMode_params;
+typedef struct cuStreamEndCapture_ptsz_params_st {
+    CUstream hStream;
+    CUgraph *phGraph;
+} cuStreamEndCapture_ptsz_params;
+typedef struct cuStreamIsCapturing_ptsz_params_st {
+    CUstream hStream;
+    CUstreamCaptureStatus *captureStatus;
+} cuStreamIsCapturing_ptsz_params;
+typedef struct cuStreamGetCaptureInfo_ptsz_params_st {
+    CUstream hStream;
+    CUstreamCaptureStatus *captureStatus_out;
+    cuuint64_t *id_out;
+} cuStreamGetCaptureInfo_ptsz_params;
+typedef struct cuStreamGetCaptureInfo_v2_ptsz_params_st {
+    CUstream hStream;
+    CUstreamCaptureStatus *captureStatus_out;
+    cuuint64_t *id_out;
+    CUgraph *graph_out;
+    const CUgraphNode **dependencies_out;
+    size_t *numDependencies_out;
+} cuStreamGetCaptureInfo_v2_ptsz_params;
+typedef struct cuStreamUpdateCaptureDependencies_ptsz_params_st {
+    CUstream hStream;
+    CUgraphNode *dependencies;
+    size_t numDependencies;
+    unsigned int flags;
+} cuStreamUpdateCaptureDependencies_ptsz_params;
+typedef struct cuStreamAttachMemAsync_ptsz_params_st {
+    CUstream hStream;
+    CUdeviceptr dptr;
+    size_t length;
+    unsigned int flags;
+} cuStreamAttachMemAsync_ptsz_params;
+typedef struct cuStreamQuery_ptsz_params_st {
+    CUstream hStream;
+} cuStreamQuery_ptsz_params;
+typedef struct cuStreamSynchronize_ptsz_params_st {
+    CUstream hStream;
+} cuStreamSynchronize_ptsz_params;
+typedef struct cuStreamDestroy_v2_params_st {
+    CUstream hStream;
+} cuStreamDestroy_v2_params;
+typedef struct cuStreamCopyAttributes_ptsz_params_st {
+    CUstream dst;
+    CUstream src;
+} cuStreamCopyAttributes_ptsz_params;
+typedef struct cuStreamGetAttribute_ptsz_params_st {
+    CUstream hStream;
+    CUstreamAttrID attr;
+    CUstreamAttrValue *value_out;
+} cuStreamGetAttribute_ptsz_params;
+typedef struct cuStreamSetAttribute_ptsz_params_st {
+    CUstream hStream;
+    CUstreamAttrID attr;
+    const CUstreamAttrValue *value;
+} cuStreamSetAttribute_ptsz_params;
+typedef struct cuEventCreate_params_st {
+    CUevent *phEvent;
+    unsigned int Flags;
+} cuEventCreate_params;
+typedef struct cuEventRecord_ptsz_params_st {
+    CUevent hEvent;
+    CUstream hStream;
+} cuEventRecord_ptsz_params;
+typedef struct cuEventRecordWithFlags_ptsz_params_st {
+    CUevent hEvent;
+    CUstream hStream;
+    unsigned int flags;
+} cuEventRecordWithFlags_ptsz_params;
+typedef struct cuEventQuery_params_st {
+    CUevent hEvent;
+} cuEventQuery_params;
+typedef struct cuEventSynchronize_params_st {
+    CUevent hEvent;
+} cuEventSynchronize_params;
+typedef struct cuEventDestroy_v2_params_st {
+    CUevent hEvent;
+} cuEventDestroy_v2_params;
+typedef struct cuEventElapsedTime_params_st {
+    float *pMilliseconds;
+    CUevent hStart;
+    CUevent hEnd;
+} cuEventElapsedTime_params;
+typedef struct cuImportExternalMemory_params_st {
+    CUexternalMemory *extMem_out;
+    const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc;
+} cuImportExternalMemory_params;
+typedef struct cuExternalMemoryGetMappedBuffer_params_st {
+    CUdeviceptr *devPtr;
+    CUexternalMemory extMem;
+    const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc;
+} cuExternalMemoryGetMappedBuffer_params;
+typedef struct cuExternalMemoryGetMappedMipmappedArray_params_st {
+    CUmipmappedArray *mipmap;
+    CUexternalMemory extMem;
+    const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc;
+} cuExternalMemoryGetMappedMipmappedArray_params;
+typedef struct cuDestroyExternalMemory_params_st {
+    CUexternalMemory extMem;
+} cuDestroyExternalMemory_params;
+typedef struct cuImportExternalSemaphore_params_st {
+    CUexternalSemaphore *extSem_out;
+    const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc;
+} cuImportExternalSemaphore_params;
+typedef struct cuSignalExternalSemaphoresAsync_ptsz_params_st {
+    const CUexternalSemaphore *extSemArray;
+    const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray;
+    unsigned int numExtSems;
+    CUstream stream;
+} cuSignalExternalSemaphoresAsync_ptsz_params;
+typedef struct cuWaitExternalSemaphoresAsync_ptsz_params_st {
+    const CUexternalSemaphore *extSemArray;
+    const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray;
+    unsigned int numExtSems;
+    CUstream stream;
+} cuWaitExternalSemaphoresAsync_ptsz_params;
+typedef struct cuDestroyExternalSemaphore_params_st {
+    CUexternalSemaphore extSem;
+} cuDestroyExternalSemaphore_params;
+typedef struct cuStreamWaitValue32_ptsz_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint32_t value;
+    unsigned int flags;
+} cuStreamWaitValue32_ptsz_params;
+typedef struct cuStreamWaitValue64_ptsz_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint64_t value;
+    unsigned int flags;
+} cuStreamWaitValue64_ptsz_params;
+typedef struct cuStreamWriteValue32_ptsz_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint32_t value;
+    unsigned int flags;
+} cuStreamWriteValue32_ptsz_params;
+typedef struct cuStreamWriteValue64_ptsz_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint64_t value;
+    unsigned int flags;
+} cuStreamWriteValue64_ptsz_params;
+typedef struct cuStreamBatchMemOp_ptsz_params_st {
+    CUstream stream;
+    unsigned int count;
+    CUstreamBatchMemOpParams *paramArray;
+    unsigned int flags;
+} cuStreamBatchMemOp_ptsz_params;
+typedef struct cuFuncGetAttribute_params_st {
+    int *pi;
+    CUfunction_attribute attrib;
+    CUfunction hfunc;
+} cuFuncGetAttribute_params;
+typedef struct cuFuncSetAttribute_params_st {
+    CUfunction hfunc;
+    CUfunction_attribute attrib;
+    int value;
+} cuFuncSetAttribute_params;
+typedef struct cuFuncSetCacheConfig_params_st {
+    CUfunction hfunc;
+    CUfunc_cache config;
+} cuFuncSetCacheConfig_params;
+typedef struct cuFuncSetSharedMemConfig_params_st {
+    CUfunction hfunc;
+    CUsharedconfig config;
+} cuFuncSetSharedMemConfig_params;
+typedef struct cuFuncGetModule_params_st {
+    CUmodule *hmod;
+    CUfunction hfunc;
+} cuFuncGetModule_params;
+typedef struct cuLaunchKernel_ptsz_params_st {
+    CUfunction f;
+    unsigned int gridDimX;
+    unsigned int gridDimY;
+    unsigned int gridDimZ;
+    unsigned int blockDimX;
+    unsigned int blockDimY;
+    unsigned int blockDimZ;
+    unsigned int sharedMemBytes;
+    CUstream hStream;
+    void **kernelParams;
+    void **extra;
+} cuLaunchKernel_ptsz_params;
+typedef struct cuLaunchKernelEx_ptsz_params_st {
+    const CUlaunchConfig *config;
+    CUfunction f;
+    void **kernelParams;
+    void **extra;
+} cuLaunchKernelEx_ptsz_params;
+typedef struct cuLaunchCooperativeKernel_ptsz_params_st {
+    CUfunction f;
+    unsigned int gridDimX;
+    unsigned int gridDimY;
+    unsigned int gridDimZ;
+    unsigned int blockDimX;
+    unsigned int blockDimY;
+    unsigned int blockDimZ;
+    unsigned int sharedMemBytes;
+    CUstream hStream;
+    void **kernelParams;
+} cuLaunchCooperativeKernel_ptsz_params;
+typedef struct cuLaunchCooperativeKernelMultiDevice_params_st {
+    CUDA_LAUNCH_PARAMS *launchParamsList;
+    unsigned int numDevices;
+    unsigned int flags;
+} cuLaunchCooperativeKernelMultiDevice_params;
+typedef struct cuLaunchHostFunc_ptsz_params_st {
+    CUstream hStream;
+    CUhostFn fn;
+    void *userData;
+} cuLaunchHostFunc_ptsz_params;
+typedef struct cuFuncSetBlockShape_params_st {
+    CUfunction hfunc;
+    int x;
+    int y;
+    int z;
+} cuFuncSetBlockShape_params;
+typedef struct cuFuncSetSharedSize_params_st {
+    CUfunction hfunc;
+    unsigned int bytes;
+} cuFuncSetSharedSize_params;
+typedef struct cuParamSetSize_params_st {
+    CUfunction hfunc;
+    unsigned int numbytes;
+} cuParamSetSize_params;
+typedef struct cuParamSeti_params_st {
+    CUfunction hfunc;
+    int offset;
+    unsigned int value;
+} cuParamSeti_params;
+typedef struct cuParamSetf_params_st {
+    CUfunction hfunc;
+    int offset;
+    float value;
+} cuParamSetf_params;
+typedef struct cuParamSetv_params_st {
+    CUfunction hfunc;
+    int offset;
+    void *ptr;
+    unsigned int numbytes;
+} cuParamSetv_params;
+typedef struct cuLaunch_params_st {
+    CUfunction f;
+} cuLaunch_params;
+typedef struct cuLaunchGrid_params_st {
+    CUfunction f;
+    int grid_width;
+    int grid_height;
+} cuLaunchGrid_params;
+typedef struct cuLaunchGridAsync_params_st {
+    CUfunction f;
+    int grid_width;
+    int grid_height;
+    CUstream hStream;
+} cuLaunchGridAsync_params;
+typedef struct cuParamSetTexRef_params_st {
+    CUfunction hfunc;
+    int texunit;
+    CUtexref hTexRef;
+} cuParamSetTexRef_params;
+typedef struct cuGraphCreate_params_st {
+    CUgraph *phGraph;
+    unsigned int flags;
+} cuGraphCreate_params;
+typedef struct cuGraphAddKernelNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    const CUDA_KERNEL_NODE_PARAMS *nodeParams;
+} cuGraphAddKernelNode_params;
+typedef struct cuGraphKernelNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_KERNEL_NODE_PARAMS *nodeParams;
+} cuGraphKernelNodeGetParams_params;
+typedef struct cuGraphKernelNodeSetParams_params_st {
+    CUgraphNode hNode;
+    const CUDA_KERNEL_NODE_PARAMS *nodeParams;
+} cuGraphKernelNodeSetParams_params;
+typedef struct cuGraphAddMemcpyNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    const CUDA_MEMCPY3D *copyParams;
+    CUcontext ctx;
+} cuGraphAddMemcpyNode_params;
+typedef struct cuGraphMemcpyNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_MEMCPY3D *nodeParams;
+} cuGraphMemcpyNodeGetParams_params;
+typedef struct cuGraphMemcpyNodeSetParams_params_st {
+    CUgraphNode hNode;
+    const CUDA_MEMCPY3D *nodeParams;
+} cuGraphMemcpyNodeSetParams_params;
+typedef struct cuGraphAddMemsetNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    const CUDA_MEMSET_NODE_PARAMS *memsetParams;
+    CUcontext ctx;
+} cuGraphAddMemsetNode_params;
+typedef struct cuGraphMemsetNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_MEMSET_NODE_PARAMS *nodeParams;
+} cuGraphMemsetNodeGetParams_params;
+typedef struct cuGraphMemsetNodeSetParams_params_st {
+    CUgraphNode hNode;
+    const CUDA_MEMSET_NODE_PARAMS *nodeParams;
+} cuGraphMemsetNodeSetParams_params;
+typedef struct cuGraphAddHostNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    const CUDA_HOST_NODE_PARAMS *nodeParams;
+} cuGraphAddHostNode_params;
+typedef struct cuGraphHostNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_HOST_NODE_PARAMS *nodeParams;
+} cuGraphHostNodeGetParams_params;
+typedef struct cuGraphHostNodeSetParams_params_st {
+    CUgraphNode hNode;
+    const CUDA_HOST_NODE_PARAMS *nodeParams;
+} cuGraphHostNodeSetParams_params;
+typedef struct cuGraphAddChildGraphNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    CUgraph childGraph;
+} cuGraphAddChildGraphNode_params;
+typedef struct cuGraphChildGraphNodeGetGraph_params_st {
+    CUgraphNode hNode;
+    CUgraph *phGraph;
+} cuGraphChildGraphNodeGetGraph_params;
+typedef struct cuGraphAddEmptyNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+} cuGraphAddEmptyNode_params;
+typedef struct cuGraphAddEventRecordNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    CUevent event;
+} cuGraphAddEventRecordNode_params;
+typedef struct cuGraphEventRecordNodeGetEvent_params_st {
+    CUgraphNode hNode;
+    CUevent *event_out;
+} cuGraphEventRecordNodeGetEvent_params;
+typedef struct cuGraphEventRecordNodeSetEvent_params_st {
+    CUgraphNode hNode;
+    CUevent event;
+} cuGraphEventRecordNodeSetEvent_params;
+typedef struct cuGraphAddEventWaitNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    CUevent event;
+} cuGraphAddEventWaitNode_params;
+typedef struct cuGraphEventWaitNodeGetEvent_params_st {
+    CUgraphNode hNode;
+    CUevent *event_out;
+} cuGraphEventWaitNodeGetEvent_params;
+typedef struct cuGraphEventWaitNodeSetEvent_params_st {
+    CUgraphNode hNode;
+    CUevent event;
+} cuGraphEventWaitNodeSetEvent_params;
+typedef struct cuGraphAddExternalSemaphoresSignalNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
+} cuGraphAddExternalSemaphoresSignalNode_params;
+typedef struct cuGraphExternalSemaphoresSignalNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out;
+} cuGraphExternalSemaphoresSignalNodeGetParams_params;
+typedef struct cuGraphExternalSemaphoresSignalNodeSetParams_params_st {
+    CUgraphNode hNode;
+    const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
+} cuGraphExternalSemaphoresSignalNodeSetParams_params;
+typedef struct cuGraphAddExternalSemaphoresWaitNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
+} cuGraphAddExternalSemaphoresWaitNode_params;
+typedef struct cuGraphExternalSemaphoresWaitNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out;
+} cuGraphExternalSemaphoresWaitNodeGetParams_params;
+typedef struct cuGraphExternalSemaphoresWaitNodeSetParams_params_st {
+    CUgraphNode hNode;
+    const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
+} cuGraphExternalSemaphoresWaitNodeSetParams_params;
+typedef struct cuGraphAddMemAllocNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams;
+} cuGraphAddMemAllocNode_params;
+typedef struct cuGraphMemAllocNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUDA_MEM_ALLOC_NODE_PARAMS *params_out;
+} cuGraphMemAllocNodeGetParams_params;
+typedef struct cuGraphAddMemFreeNode_params_st {
+    CUgraphNode *phGraphNode;
+    CUgraph hGraph;
+    const CUgraphNode *dependencies;
+    size_t numDependencies;
+    CUdeviceptr dptr;
+} cuGraphAddMemFreeNode_params;
+typedef struct cuGraphMemFreeNodeGetParams_params_st {
+    CUgraphNode hNode;
+    CUdeviceptr *dptr_out;
+} cuGraphMemFreeNodeGetParams_params;
+typedef struct cuDeviceGraphMemTrim_params_st {
+    CUdevice device;
+} cuDeviceGraphMemTrim_params;
+typedef struct cuDeviceGetGraphMemAttribute_params_st {
+    CUdevice device;
+    CUgraphMem_attribute attr;
+    void *value;
+} cuDeviceGetGraphMemAttribute_params;
+typedef struct cuDeviceSetGraphMemAttribute_params_st {
+    CUdevice device;
+    CUgraphMem_attribute attr;
+    void *value;
+} cuDeviceSetGraphMemAttribute_params;
+typedef struct cuGraphClone_params_st {
+    CUgraph *phGraphClone;
+    CUgraph originalGraph;
+} cuGraphClone_params;
+typedef struct cuGraphNodeFindInClone_params_st {
+    CUgraphNode *phNode;
+    CUgraphNode hOriginalNode;
+    CUgraph hClonedGraph;
+} cuGraphNodeFindInClone_params;
+typedef struct cuGraphNodeGetType_params_st {
+    CUgraphNode hNode;
+    CUgraphNodeType *type;
+} cuGraphNodeGetType_params;
+typedef struct cuGraphGetNodes_params_st {
+    CUgraph hGraph;
+    CUgraphNode *nodes;
+    size_t *numNodes;
+} cuGraphGetNodes_params;
+typedef struct cuGraphGetRootNodes_params_st {
+    CUgraph hGraph;
+    CUgraphNode *rootNodes;
+    size_t *numRootNodes;
+} cuGraphGetRootNodes_params;
+typedef struct cuGraphGetEdges_params_st {
+    CUgraph hGraph;
+    CUgraphNode *from;
+    CUgraphNode *to;
+    size_t *numEdges;
+} cuGraphGetEdges_params;
+typedef struct cuGraphNodeGetDependencies_params_st {
+    CUgraphNode hNode;
+    CUgraphNode *dependencies;
+    size_t *numDependencies;
+} cuGraphNodeGetDependencies_params;
+typedef struct cuGraphNodeGetDependentNodes_params_st {
+    CUgraphNode hNode;
+    CUgraphNode *dependentNodes;
+    size_t *numDependentNodes;
+} cuGraphNodeGetDependentNodes_params;
+typedef struct cuGraphAddDependencies_params_st {
+    CUgraph hGraph;
+    const CUgraphNode *from;
+    const CUgraphNode *to;
+    size_t numDependencies;
+} cuGraphAddDependencies_params;
+typedef struct cuGraphRemoveDependencies_params_st {
+    CUgraph hGraph;
+    const CUgraphNode *from;
+    const CUgraphNode *to;
+    size_t numDependencies;
+} cuGraphRemoveDependencies_params;
+typedef struct cuGraphDestroyNode_params_st {
+    CUgraphNode hNode;
+} cuGraphDestroyNode_params;
+typedef struct cuGraphInstantiate_v2_params_st {
+    CUgraphExec *phGraphExec;
+    CUgraph hGraph;
+    CUgraphNode *phErrorNode;
+    char *logBuffer;
+    size_t bufferSize;
+} cuGraphInstantiate_v2_params;
+typedef struct cuGraphInstantiateWithFlags_params_st {
+    CUgraphExec *phGraphExec;
+    CUgraph hGraph;
+    unsigned long long flags;
+} cuGraphInstantiateWithFlags_params;
+typedef struct cuGraphExecKernelNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    const CUDA_KERNEL_NODE_PARAMS *nodeParams;
+} cuGraphExecKernelNodeSetParams_params;
+typedef struct cuGraphExecMemcpyNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    const CUDA_MEMCPY3D *copyParams;
+    CUcontext ctx;
+} cuGraphExecMemcpyNodeSetParams_params;
+typedef struct cuGraphExecMemsetNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    const CUDA_MEMSET_NODE_PARAMS *memsetParams;
+    CUcontext ctx;
+} cuGraphExecMemsetNodeSetParams_params;
+typedef struct cuGraphExecHostNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    const CUDA_HOST_NODE_PARAMS *nodeParams;
+} cuGraphExecHostNodeSetParams_params;
+typedef struct cuGraphExecChildGraphNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    CUgraph childGraph;
+} cuGraphExecChildGraphNodeSetParams_params;
+typedef struct cuGraphExecEventRecordNodeSetEvent_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    CUevent event;
+} cuGraphExecEventRecordNodeSetEvent_params;
+typedef struct cuGraphExecEventWaitNodeSetEvent_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    CUevent event;
+} cuGraphExecEventWaitNodeSetEvent_params;
+typedef struct cuGraphExecExternalSemaphoresSignalNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
+} cuGraphExecExternalSemaphoresSignalNodeSetParams_params;
+typedef struct cuGraphExecExternalSemaphoresWaitNodeSetParams_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
+} cuGraphExecExternalSemaphoresWaitNodeSetParams_params;
+typedef struct cuGraphNodeSetEnabled_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    unsigned int isEnabled;
+} cuGraphNodeSetEnabled_params;
+typedef struct cuGraphNodeGetEnabled_params_st {
+    CUgraphExec hGraphExec;
+    CUgraphNode hNode;
+    unsigned int *isEnabled;
+} cuGraphNodeGetEnabled_params;
+typedef struct cuGraphUpload_ptsz_params_st {
+    CUgraphExec hGraphExec;
+    CUstream hStream;
+} cuGraphUpload_ptsz_params;
+typedef struct cuGraphLaunch_ptsz_params_st {
+    CUgraphExec hGraphExec;
+    CUstream hStream;
+} cuGraphLaunch_ptsz_params;
+typedef struct cuGraphExecDestroy_params_st {
+    CUgraphExec hGraphExec;
+} cuGraphExecDestroy_params;
+typedef struct cuGraphDestroy_params_st {
+    CUgraph hGraph;
+} cuGraphDestroy_params;
+typedef struct cuGraphExecUpdate_params_st {
+    CUgraphExec hGraphExec;
+    CUgraph hGraph;
+    CUgraphNode *hErrorNode_out;
+    CUgraphExecUpdateResult *updateResult_out;
+} cuGraphExecUpdate_params;
+typedef struct cuGraphKernelNodeCopyAttributes_params_st {
+    CUgraphNode dst;
+    CUgraphNode src;
+} cuGraphKernelNodeCopyAttributes_params;
+typedef struct cuGraphKernelNodeGetAttribute_params_st {
+    CUgraphNode hNode;
+    CUkernelNodeAttrID attr;
+    CUkernelNodeAttrValue *value_out;
+} cuGraphKernelNodeGetAttribute_params;
+typedef struct cuGraphKernelNodeSetAttribute_params_st {
+    CUgraphNode hNode;
+    CUkernelNodeAttrID attr;
+    const CUkernelNodeAttrValue *value;
+} cuGraphKernelNodeSetAttribute_params;
+typedef struct cuGraphDebugDotPrint_params_st {
+    CUgraph hGraph;
+    const char *path;
+    unsigned int flags;
+} cuGraphDebugDotPrint_params;
+typedef struct cuUserObjectCreate_params_st {
+    CUuserObject *object_out;
+    void *ptr;
+    CUhostFn destroy;
+    unsigned int initialRefcount;
+    unsigned int flags;
+} cuUserObjectCreate_params;
+typedef struct cuUserObjectRetain_params_st {
+    CUuserObject object;
+    unsigned int count;
+} cuUserObjectRetain_params;
+typedef struct cuUserObjectRelease_params_st {
+    CUuserObject object;
+    unsigned int count;
+} cuUserObjectRelease_params;
+typedef struct cuGraphRetainUserObject_params_st {
+    CUgraph graph;
+    CUuserObject object;
+    unsigned int count;
+    unsigned int flags;
+} cuGraphRetainUserObject_params;
+typedef struct cuGraphReleaseUserObject_params_st {
+    CUgraph graph;
+    CUuserObject object;
+    unsigned int count;
+} cuGraphReleaseUserObject_params;
+typedef struct cuOccupancyMaxActiveBlocksPerMultiprocessor_params_st {
+    int *numBlocks;
+    CUfunction func;
+    int blockSize;
+    size_t dynamicSMemSize;
+} cuOccupancyMaxActiveBlocksPerMultiprocessor_params;
+typedef struct cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_params_st {
+    int *numBlocks;
+    CUfunction func;
+    int blockSize;
+    size_t dynamicSMemSize;
+    unsigned int flags;
+} cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_params;
+typedef struct cuOccupancyMaxPotentialBlockSize_params_st {
+    int *minGridSize;
+    int *blockSize;
+    CUfunction func;
+    CUoccupancyB2DSize blockSizeToDynamicSMemSize;
+    size_t dynamicSMemSize;
+    int blockSizeLimit;
+} cuOccupancyMaxPotentialBlockSize_params;
+typedef struct cuOccupancyMaxPotentialBlockSizeWithFlags_params_st {
+    int *minGridSize;
+    int *blockSize;
+    CUfunction func;
+    CUoccupancyB2DSize blockSizeToDynamicSMemSize;
+    size_t dynamicSMemSize;
+    int blockSizeLimit;
+    unsigned int flags;
+} cuOccupancyMaxPotentialBlockSizeWithFlags_params;
+typedef struct cuOccupancyAvailableDynamicSMemPerBlock_params_st {
+    size_t *dynamicSmemSize;
+    CUfunction func;
+    int numBlocks;
+    int blockSize;
+} cuOccupancyAvailableDynamicSMemPerBlock_params;
+typedef struct cuOccupancyMaxPotentialClusterSize_params_st {
+    int *clusterSize;
+    CUfunction func;
+    const CUlaunchConfig *config;
+} cuOccupancyMaxPotentialClusterSize_params;
+typedef struct cuOccupancyMaxActiveClusters_params_st {
+    int *numClusters;
+    CUfunction func;
+    const CUlaunchConfig *config;
+} cuOccupancyMaxActiveClusters_params;
+typedef struct cuTexRefSetArray_params_st {
+    CUtexref hTexRef;
+    CUarray hArray;
+    unsigned int Flags;
+} cuTexRefSetArray_params;
+typedef struct cuTexRefSetMipmappedArray_params_st {
+    CUtexref hTexRef;
+    CUmipmappedArray hMipmappedArray;
+    unsigned int Flags;
+} cuTexRefSetMipmappedArray_params;
+typedef struct cuTexRefSetAddress_v2_params_st {
+    size_t *ByteOffset;
+    CUtexref hTexRef;
+    CUdeviceptr dptr;
+    size_t bytes;
+} cuTexRefSetAddress_v2_params;
+typedef struct cuTexRefSetAddress2D_v3_params_st {
+    CUtexref hTexRef;
+    const CUDA_ARRAY_DESCRIPTOR *desc;
+    CUdeviceptr dptr;
+    size_t Pitch;
+} cuTexRefSetAddress2D_v3_params;
+typedef struct cuTexRefSetFormat_params_st {
+    CUtexref hTexRef;
+    CUarray_format fmt;
+    int NumPackedComponents;
+} cuTexRefSetFormat_params;
+typedef struct cuTexRefSetAddressMode_params_st {
+    CUtexref hTexRef;
+    int dim;
+    CUaddress_mode am;
+} cuTexRefSetAddressMode_params;
+typedef struct cuTexRefSetFilterMode_params_st {
+    CUtexref hTexRef;
+    CUfilter_mode fm;
+} cuTexRefSetFilterMode_params;
+typedef struct cuTexRefSetMipmapFilterMode_params_st {
+    CUtexref hTexRef;
+    CUfilter_mode fm;
+} cuTexRefSetMipmapFilterMode_params;
+typedef struct cuTexRefSetMipmapLevelBias_params_st {
+    CUtexref hTexRef;
+    float bias;
+} cuTexRefSetMipmapLevelBias_params;
+typedef struct cuTexRefSetMipmapLevelClamp_params_st {
+    CUtexref hTexRef;
+    float minMipmapLevelClamp;
+    float maxMipmapLevelClamp;
+} cuTexRefSetMipmapLevelClamp_params;
+typedef struct cuTexRefSetMaxAnisotropy_params_st {
+    CUtexref hTexRef;
+    unsigned int maxAniso;
+} cuTexRefSetMaxAnisotropy_params;
+typedef struct cuTexRefSetBorderColor_params_st {
+    CUtexref hTexRef;
+    float *pBorderColor;
+} cuTexRefSetBorderColor_params;
+typedef struct cuTexRefSetFlags_params_st {
+    CUtexref hTexRef;
+    unsigned int Flags;
+} cuTexRefSetFlags_params;
+typedef struct cuTexRefGetAddress_v2_params_st {
+    CUdeviceptr *pdptr;
+    CUtexref hTexRef;
+} cuTexRefGetAddress_v2_params;
+typedef struct cuTexRefGetArray_params_st {
+    CUarray *phArray;
+    CUtexref hTexRef;
+} cuTexRefGetArray_params;
+typedef struct cuTexRefGetMipmappedArray_params_st {
+    CUmipmappedArray *phMipmappedArray;
+    CUtexref hTexRef;
+} cuTexRefGetMipmappedArray_params;
+typedef struct cuTexRefGetAddressMode_params_st {
+    CUaddress_mode *pam;
+    CUtexref hTexRef;
+    int dim;
+} cuTexRefGetAddressMode_params;
+typedef struct cuTexRefGetFilterMode_params_st {
+    CUfilter_mode *pfm;
+    CUtexref hTexRef;
+} cuTexRefGetFilterMode_params;
+typedef struct cuTexRefGetFormat_params_st {
+    CUarray_format *pFormat;
+    int *pNumChannels;
+    CUtexref hTexRef;
+} cuTexRefGetFormat_params;
+typedef struct cuTexRefGetMipmapFilterMode_params_st {
+    CUfilter_mode *pfm;
+    CUtexref hTexRef;
+} cuTexRefGetMipmapFilterMode_params;
+typedef struct cuTexRefGetMipmapLevelBias_params_st {
+    float *pbias;
+    CUtexref hTexRef;
+} cuTexRefGetMipmapLevelBias_params;
+typedef struct cuTexRefGetMipmapLevelClamp_params_st {
+    float *pminMipmapLevelClamp;
+    float *pmaxMipmapLevelClamp;
+    CUtexref hTexRef;
+} cuTexRefGetMipmapLevelClamp_params;
+typedef struct cuTexRefGetMaxAnisotropy_params_st {
+    int *pmaxAniso;
+    CUtexref hTexRef;
+} cuTexRefGetMaxAnisotropy_params;
+typedef struct cuTexRefGetBorderColor_params_st {
+    float *pBorderColor;
+    CUtexref hTexRef;
+} cuTexRefGetBorderColor_params;
+typedef struct cuTexRefGetFlags_params_st {
+    unsigned int *pFlags;
+    CUtexref hTexRef;
+} cuTexRefGetFlags_params;
+typedef struct cuTexRefCreate_params_st {
+    CUtexref *pTexRef;
+} cuTexRefCreate_params;
+typedef struct cuTexRefDestroy_params_st {
+    CUtexref hTexRef;
+} cuTexRefDestroy_params;
+typedef struct cuSurfRefSetArray_params_st {
+    CUsurfref hSurfRef;
+    CUarray hArray;
+    unsigned int Flags;
+} cuSurfRefSetArray_params;
+typedef struct cuSurfRefGetArray_params_st {
+    CUarray *phArray;
+    CUsurfref hSurfRef;
+} cuSurfRefGetArray_params;
+typedef struct cuTexObjectCreate_params_st {
+    CUtexObject *pTexObject;
+    const CUDA_RESOURCE_DESC *pResDesc;
+    const CUDA_TEXTURE_DESC *pTexDesc;
+    const CUDA_RESOURCE_VIEW_DESC *pResViewDesc;
+} cuTexObjectCreate_params;
+typedef struct cuTexObjectDestroy_params_st {
+    CUtexObject texObject;
+} cuTexObjectDestroy_params;
+typedef struct cuTexObjectGetResourceDesc_params_st {
+    CUDA_RESOURCE_DESC *pResDesc;
+    CUtexObject texObject;
+} cuTexObjectGetResourceDesc_params;
+typedef struct cuTexObjectGetTextureDesc_params_st {
+    CUDA_TEXTURE_DESC *pTexDesc;
+    CUtexObject texObject;
+} cuTexObjectGetTextureDesc_params;
+typedef struct cuTexObjectGetResourceViewDesc_params_st {
+    CUDA_RESOURCE_VIEW_DESC *pResViewDesc;
+    CUtexObject texObject;
+} cuTexObjectGetResourceViewDesc_params;
+typedef struct cuSurfObjectCreate_params_st {
+    CUsurfObject *pSurfObject;
+    const CUDA_RESOURCE_DESC *pResDesc;
+} cuSurfObjectCreate_params;
+typedef struct cuSurfObjectDestroy_params_st {
+    CUsurfObject surfObject;
+} cuSurfObjectDestroy_params;
+typedef struct cuSurfObjectGetResourceDesc_params_st {
+    CUDA_RESOURCE_DESC *pResDesc;
+    CUsurfObject surfObject;
+} cuSurfObjectGetResourceDesc_params;
+typedef struct cuDeviceCanAccessPeer_params_st {
+    int *canAccessPeer;
+    CUdevice dev;
+    CUdevice peerDev;
+} cuDeviceCanAccessPeer_params;
+typedef struct cuCtxEnablePeerAccess_params_st {
+    CUcontext peerContext;
+    unsigned int Flags;
+} cuCtxEnablePeerAccess_params;
+typedef struct cuCtxDisablePeerAccess_params_st {
+    CUcontext peerContext;
+} cuCtxDisablePeerAccess_params;
+typedef struct cuDeviceGetP2PAttribute_params_st {
+    int *value;
+    CUdevice_P2PAttribute attrib;
+    CUdevice srcDevice;
+    CUdevice dstDevice;
+} cuDeviceGetP2PAttribute_params;
+typedef struct cuGraphicsUnregisterResource_params_st {
+    CUgraphicsResource resource;
+} cuGraphicsUnregisterResource_params;
+typedef struct cuGraphicsSubResourceGetMappedArray_params_st {
+    CUarray *pArray;
+    CUgraphicsResource resource;
+    unsigned int arrayIndex;
+    unsigned int mipLevel;
+} cuGraphicsSubResourceGetMappedArray_params;
+typedef struct cuGraphicsResourceGetMappedMipmappedArray_params_st {
+    CUmipmappedArray *pMipmappedArray;
+    CUgraphicsResource resource;
+} cuGraphicsResourceGetMappedMipmappedArray_params;
+typedef struct cuGraphicsResourceGetMappedPointer_v2_params_st {
+    CUdeviceptr *pDevPtr;
+    size_t *pSize;
+    CUgraphicsResource resource;
+} cuGraphicsResourceGetMappedPointer_v2_params;
+typedef struct cuGraphicsResourceSetMapFlags_v2_params_st {
+    CUgraphicsResource resource;
+    unsigned int flags;
+} cuGraphicsResourceSetMapFlags_v2_params;
+typedef struct cuGraphicsMapResources_ptsz_params_st {
+    unsigned int count;
+    CUgraphicsResource *resources;
+    CUstream hStream;
+} cuGraphicsMapResources_ptsz_params;
+typedef struct cuGraphicsUnmapResources_ptsz_params_st {
+    unsigned int count;
+    CUgraphicsResource *resources;
+    CUstream hStream;
+} cuGraphicsUnmapResources_ptsz_params;
+typedef struct cuGetProcAddress_params_st {
+    const char *symbol;
+    void **pfn;
+    int cudaVersion;
+    cuuint64_t flags;
+} cuGetProcAddress_params;
+typedef struct cuModuleGetLoadingMode_params_st {
+    CUmoduleLoadingMode *mode;
+} cuModuleGetLoadingMode_params;
+typedef struct cuMemGetHandleForAddressRange_params_st {
+    void *handle;
+    CUdeviceptr dptr;
+    size_t size;
+    CUmemRangeHandleType handleType;
+    unsigned long long flags;
+} cuMemGetHandleForAddressRange_params;
+typedef struct cuGetExportTable_params_st {
+    const void **ppExportTable;
+    const CUuuid *pExportTableId;
+} cuGetExportTable_params;
+typedef struct cuMemHostRegister_params_st {
+    void *p;
+    size_t bytesize;
+    unsigned int Flags;
+} cuMemHostRegister_params;
+typedef struct cuGraphicsResourceSetMapFlags_params_st {
+    CUgraphicsResource resource;
+    unsigned int flags;
+} cuGraphicsResourceSetMapFlags_params;
+typedef struct cuLinkCreate_params_st {
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+    CUlinkState *stateOut;
+} cuLinkCreate_params;
+typedef struct cuLinkAddData_params_st {
+    CUlinkState state;
+    CUjitInputType type;
+    void *data;
+    size_t size;
+    const char *name;
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+} cuLinkAddData_params;
+typedef struct cuLinkAddFile_params_st {
+    CUlinkState state;
+    CUjitInputType type;
+    const char *path;
+    unsigned int numOptions;
+    CUjit_option *options;
+    void **optionValues;
+} cuLinkAddFile_params;
+typedef struct cuTexRefSetAddress2D_v2_params_st {
+    CUtexref hTexRef;
+    const CUDA_ARRAY_DESCRIPTOR *desc;
+    CUdeviceptr dptr;
+    size_t Pitch;
+} cuTexRefSetAddress2D_v2_params;
+typedef struct cuDeviceTotalMem_params_st {
+    unsigned int *bytes;
+    CUdevice dev;
+} cuDeviceTotalMem_params;
+typedef struct cuCtxCreate_params_st {
+    CUcontext *pctx;
+    unsigned int flags;
+    CUdevice dev;
+} cuCtxCreate_params;
+typedef struct cuModuleGetGlobal_params_st {
+    CUdeviceptr_v1 *dptr;
+    unsigned int *bytes;
+    CUmodule hmod;
+    const char *name;
+} cuModuleGetGlobal_params;
+typedef struct cuMemGetInfo_params_st {
+    unsigned int *free;
+    unsigned int *total;
+} cuMemGetInfo_params;
+typedef struct cuMemAlloc_params_st {
+    CUdeviceptr_v1 *dptr;
+    unsigned int bytesize;
+} cuMemAlloc_params;
+typedef struct cuMemAllocPitch_params_st {
+    CUdeviceptr_v1 *dptr;
+    unsigned int *pPitch;
+    unsigned int WidthInBytes;
+    unsigned int Height;
+    unsigned int ElementSizeBytes;
+} cuMemAllocPitch_params;
+typedef struct cuMemFree_params_st {
+    CUdeviceptr_v1 dptr;
+} cuMemFree_params;
+typedef struct cuMemGetAddressRange_params_st {
+    CUdeviceptr_v1 *pbase;
+    unsigned int *psize;
+    CUdeviceptr_v1 dptr;
+} cuMemGetAddressRange_params;
+typedef struct cuMemAllocHost_params_st {
+    void **pp;
+    unsigned int bytesize;
+} cuMemAllocHost_params;
+typedef struct cuMemHostGetDevicePointer_params_st {
+    CUdeviceptr_v1 *pdptr;
+    void *p;
+    unsigned int Flags;
+} cuMemHostGetDevicePointer_params;
+typedef struct cuMemcpyHtoD_params_st {
+    CUdeviceptr_v1 dstDevice;
+    const void *srcHost;
+    unsigned int ByteCount;
+} cuMemcpyHtoD_params;
+typedef struct cuMemcpyDtoH_params_st {
+    void *dstHost;
+    CUdeviceptr_v1 srcDevice;
+    unsigned int ByteCount;
+} cuMemcpyDtoH_params;
+typedef struct cuMemcpyDtoD_params_st {
+    CUdeviceptr_v1 dstDevice;
+    CUdeviceptr_v1 srcDevice;
+    unsigned int ByteCount;
+} cuMemcpyDtoD_params;
+typedef struct cuMemcpyDtoA_params_st {
+    CUarray dstArray;
+    unsigned int dstOffset;
+    CUdeviceptr_v1 srcDevice;
+    unsigned int ByteCount;
+} cuMemcpyDtoA_params;
+typedef struct cuMemcpyAtoD_params_st {
+    CUdeviceptr_v1 dstDevice;
+    CUarray srcArray;
+    unsigned int srcOffset;
+    unsigned int ByteCount;
+} cuMemcpyAtoD_params;
+typedef struct cuMemcpyHtoA_params_st {
+    CUarray dstArray;
+    unsigned int dstOffset;
+    const void *srcHost;
+    unsigned int ByteCount;
+} cuMemcpyHtoA_params;
+typedef struct cuMemcpyAtoH_params_st {
+    void *dstHost;
+    CUarray srcArray;
+    unsigned int srcOffset;
+    unsigned int ByteCount;
+} cuMemcpyAtoH_params;
+typedef struct cuMemcpyAtoA_params_st {
+    CUarray dstArray;
+    unsigned int dstOffset;
+    CUarray srcArray;
+    unsigned int srcOffset;
+    unsigned int ByteCount;
+} cuMemcpyAtoA_params;
+typedef struct cuMemcpyHtoAAsync_params_st {
+    CUarray dstArray;
+    unsigned int dstOffset;
+    const void *srcHost;
+    unsigned int ByteCount;
+    CUstream hStream;
+} cuMemcpyHtoAAsync_params;
+typedef struct cuMemcpyAtoHAsync_params_st {
+    void *dstHost;
+    CUarray srcArray;
+    unsigned int srcOffset;
+    unsigned int ByteCount;
+    CUstream hStream;
+} cuMemcpyAtoHAsync_params;
+typedef struct cuMemcpy2D_params_st {
+    const CUDA_MEMCPY2D_v1 *pCopy;
+} cuMemcpy2D_params;
+typedef struct cuMemcpy2DUnaligned_params_st {
+    const CUDA_MEMCPY2D_v1 *pCopy;
+} cuMemcpy2DUnaligned_params;
+typedef struct cuMemcpy3D_params_st {
+    const CUDA_MEMCPY3D_v1 *pCopy;
+} cuMemcpy3D_params;
+typedef struct cuMemcpyHtoDAsync_params_st {
+    CUdeviceptr_v1 dstDevice;
+    const void *srcHost;
+    unsigned int ByteCount;
+    CUstream hStream;
+} cuMemcpyHtoDAsync_params;
+typedef struct cuMemcpyDtoHAsync_params_st {
+    void *dstHost;
+    CUdeviceptr_v1 srcDevice;
+    unsigned int ByteCount;
+    CUstream hStream;
+} cuMemcpyDtoHAsync_params;
+typedef struct cuMemcpyDtoDAsync_params_st {
+    CUdeviceptr_v1 dstDevice;
+    CUdeviceptr_v1 srcDevice;
+    unsigned int ByteCount;
+    CUstream hStream;
+} cuMemcpyDtoDAsync_params;
+typedef struct cuMemcpy2DAsync_params_st {
+    const CUDA_MEMCPY2D_v1 *pCopy;
+    CUstream hStream;
+} cuMemcpy2DAsync_params;
+typedef struct cuMemcpy3DAsync_params_st {
+    const CUDA_MEMCPY3D_v1 *pCopy;
+    CUstream hStream;
+} cuMemcpy3DAsync_params;
+typedef struct cuMemsetD8_params_st {
+    CUdeviceptr_v1 dstDevice;
+    unsigned char uc;
+    unsigned int N;
+} cuMemsetD8_params;
+typedef struct cuMemsetD16_params_st {
+    CUdeviceptr_v1 dstDevice;
+    unsigned short us;
+    unsigned int N;
+} cuMemsetD16_params;
+typedef struct cuMemsetD32_params_st {
+    CUdeviceptr_v1 dstDevice;
+    unsigned int ui;
+    unsigned int N;
+} cuMemsetD32_params;
+typedef struct cuMemsetD2D8_params_st {
+    CUdeviceptr_v1 dstDevice;
+    unsigned int dstPitch;
+    unsigned char uc;
+    unsigned int Width;
+    unsigned int Height;
+} cuMemsetD2D8_params;
+typedef struct cuMemsetD2D16_params_st {
+    CUdeviceptr_v1 dstDevice;
+    unsigned int dstPitch;
+    unsigned short us;
+    unsigned int Width;
+    unsigned int Height;
+} cuMemsetD2D16_params;
+typedef struct cuMemsetD2D32_params_st {
+    CUdeviceptr_v1 dstDevice;
+    unsigned int dstPitch;
+    unsigned int ui;
+    unsigned int Width;
+    unsigned int Height;
+} cuMemsetD2D32_params;
+typedef struct cuArrayCreate_params_st {
+    CUarray *pHandle;
+    const CUDA_ARRAY_DESCRIPTOR_v1 *pAllocateArray;
+} cuArrayCreate_params;
+typedef struct cuArrayGetDescriptor_params_st {
+    CUDA_ARRAY_DESCRIPTOR_v1 *pArrayDescriptor;
+    CUarray hArray;
+} cuArrayGetDescriptor_params;
+typedef struct cuArray3DCreate_params_st {
+    CUarray *pHandle;
+    const CUDA_ARRAY3D_DESCRIPTOR_v1 *pAllocateArray;
+} cuArray3DCreate_params;
+typedef struct cuArray3DGetDescriptor_params_st {
+    CUDA_ARRAY3D_DESCRIPTOR_v1 *pArrayDescriptor;
+    CUarray hArray;
+} cuArray3DGetDescriptor_params;
+typedef struct cuTexRefSetAddress_params_st {
+    unsigned int *ByteOffset;
+    CUtexref hTexRef;
+    CUdeviceptr_v1 dptr;
+    unsigned int bytes;
+} cuTexRefSetAddress_params;
+typedef struct cuTexRefSetAddress2D_params_st {
+    CUtexref hTexRef;
+    const CUDA_ARRAY_DESCRIPTOR_v1 *desc;
+    CUdeviceptr_v1 dptr;
+    unsigned int Pitch;
+} cuTexRefSetAddress2D_params;
+typedef struct cuTexRefGetAddress_params_st {
+    CUdeviceptr_v1 *pdptr;
+    CUtexref hTexRef;
+} cuTexRefGetAddress_params;
+typedef struct cuGraphicsResourceGetMappedPointer_params_st {
+    CUdeviceptr_v1 *pDevPtr;
+    unsigned int *pSize;
+    CUgraphicsResource resource;
+} cuGraphicsResourceGetMappedPointer_params;
+typedef struct cuCtxDestroy_params_st {
+    CUcontext ctx;
+} cuCtxDestroy_params;
+typedef struct cuCtxPopCurrent_params_st {
+    CUcontext *pctx;
+} cuCtxPopCurrent_params;
+typedef struct cuCtxPushCurrent_params_st {
+    CUcontext ctx;
+} cuCtxPushCurrent_params;
+typedef struct cuStreamDestroy_params_st {
+    CUstream hStream;
+} cuStreamDestroy_params;
+typedef struct cuEventDestroy_params_st {
+    CUevent hEvent;
+} cuEventDestroy_params;
+typedef struct cuDevicePrimaryCtxRelease_params_st {
+    CUdevice dev;
+} cuDevicePrimaryCtxRelease_params;
+typedef struct cuDevicePrimaryCtxReset_params_st {
+    CUdevice dev;
+} cuDevicePrimaryCtxReset_params;
+typedef struct cuDevicePrimaryCtxSetFlags_params_st {
+    CUdevice dev;
+    unsigned int flags;
+} cuDevicePrimaryCtxSetFlags_params;
+typedef struct cuMemcpyHtoD_v2_params_st {
+    CUdeviceptr dstDevice;
+    const void *srcHost;
+    size_t ByteCount;
+} cuMemcpyHtoD_v2_params;
+typedef struct cuMemcpyDtoH_v2_params_st {
+    void *dstHost;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+} cuMemcpyDtoH_v2_params;
+typedef struct cuMemcpyDtoD_v2_params_st {
+    CUdeviceptr dstDevice;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+} cuMemcpyDtoD_v2_params;
+typedef struct cuMemcpyDtoA_v2_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+} cuMemcpyDtoA_v2_params;
+typedef struct cuMemcpyAtoD_v2_params_st {
+    CUdeviceptr dstDevice;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+} cuMemcpyAtoD_v2_params;
+typedef struct cuMemcpyHtoA_v2_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    const void *srcHost;
+    size_t ByteCount;
+} cuMemcpyHtoA_v2_params;
+typedef struct cuMemcpyAtoH_v2_params_st {
+    void *dstHost;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+} cuMemcpyAtoH_v2_params;
+typedef struct cuMemcpyAtoA_v2_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+} cuMemcpyAtoA_v2_params;
+typedef struct cuMemcpyHtoAAsync_v2_params_st {
+    CUarray dstArray;
+    size_t dstOffset;
+    const void *srcHost;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyHtoAAsync_v2_params;
+typedef struct cuMemcpyAtoHAsync_v2_params_st {
+    void *dstHost;
+    CUarray srcArray;
+    size_t srcOffset;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyAtoHAsync_v2_params;
+typedef struct cuMemcpy2D_v2_params_st {
+    const CUDA_MEMCPY2D *pCopy;
+} cuMemcpy2D_v2_params;
+typedef struct cuMemcpy2DUnaligned_v2_params_st {
+    const CUDA_MEMCPY2D *pCopy;
+} cuMemcpy2DUnaligned_v2_params;
+typedef struct cuMemcpy3D_v2_params_st {
+    const CUDA_MEMCPY3D *pCopy;
+} cuMemcpy3D_v2_params;
+typedef struct cuMemcpyHtoDAsync_v2_params_st {
+    CUdeviceptr dstDevice;
+    const void *srcHost;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyHtoDAsync_v2_params;
+typedef struct cuMemcpyDtoHAsync_v2_params_st {
+    void *dstHost;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyDtoHAsync_v2_params;
+typedef struct cuMemcpyDtoDAsync_v2_params_st {
+    CUdeviceptr dstDevice;
+    CUdeviceptr srcDevice;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyDtoDAsync_v2_params;
+typedef struct cuMemcpy2DAsync_v2_params_st {
+    const CUDA_MEMCPY2D *pCopy;
+    CUstream hStream;
+} cuMemcpy2DAsync_v2_params;
+typedef struct cuMemcpy3DAsync_v2_params_st {
+    const CUDA_MEMCPY3D *pCopy;
+    CUstream hStream;
+} cuMemcpy3DAsync_v2_params;
+typedef struct cuMemsetD8_v2_params_st {
+    CUdeviceptr dstDevice;
+    unsigned char uc;
+    size_t N;
+} cuMemsetD8_v2_params;
+typedef struct cuMemsetD16_v2_params_st {
+    CUdeviceptr dstDevice;
+    unsigned short us;
+    size_t N;
+} cuMemsetD16_v2_params;
+typedef struct cuMemsetD32_v2_params_st {
+    CUdeviceptr dstDevice;
+    unsigned int ui;
+    size_t N;
+} cuMemsetD32_v2_params;
+typedef struct cuMemsetD2D8_v2_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned char uc;
+    size_t Width;
+    size_t Height;
+} cuMemsetD2D8_v2_params;
+typedef struct cuMemsetD2D16_v2_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned short us;
+    size_t Width;
+    size_t Height;
+} cuMemsetD2D16_v2_params;
+typedef struct cuMemsetD2D32_v2_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned int ui;
+    size_t Width;
+    size_t Height;
+} cuMemsetD2D32_v2_params;
+typedef struct cuMemcpy_params_st {
+    CUdeviceptr dst;
+    CUdeviceptr src;
+    size_t ByteCount;
+} cuMemcpy_params;
+typedef struct cuMemcpyAsync_params_st {
+    CUdeviceptr dst;
+    CUdeviceptr src;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyAsync_params;
+typedef struct cuMemcpyPeer_params_st {
+    CUdeviceptr dstDevice;
+    CUcontext dstContext;
+    CUdeviceptr srcDevice;
+    CUcontext srcContext;
+    size_t ByteCount;
+} cuMemcpyPeer_params;
+typedef struct cuMemcpyPeerAsync_params_st {
+    CUdeviceptr dstDevice;
+    CUcontext dstContext;
+    CUdeviceptr srcDevice;
+    CUcontext srcContext;
+    size_t ByteCount;
+    CUstream hStream;
+} cuMemcpyPeerAsync_params;
+typedef struct cuMemcpy3DPeer_params_st {
+    const CUDA_MEMCPY3D_PEER *pCopy;
+} cuMemcpy3DPeer_params;
+typedef struct cuMemcpy3DPeerAsync_params_st {
+    const CUDA_MEMCPY3D_PEER *pCopy;
+    CUstream hStream;
+} cuMemcpy3DPeerAsync_params;
+typedef struct cuMemsetD8Async_params_st {
+    CUdeviceptr dstDevice;
+    unsigned char uc;
+    size_t N;
+    CUstream hStream;
+} cuMemsetD8Async_params;
+typedef struct cuMemsetD16Async_params_st {
+    CUdeviceptr dstDevice;
+    unsigned short us;
+    size_t N;
+    CUstream hStream;
+} cuMemsetD16Async_params;
+typedef struct cuMemsetD32Async_params_st {
+    CUdeviceptr dstDevice;
+    unsigned int ui;
+    size_t N;
+    CUstream hStream;
+} cuMemsetD32Async_params;
+typedef struct cuMemsetD2D8Async_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned char uc;
+    size_t Width;
+    size_t Height;
+    CUstream hStream;
+} cuMemsetD2D8Async_params;
+typedef struct cuMemsetD2D16Async_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned short us;
+    size_t Width;
+    size_t Height;
+    CUstream hStream;
+} cuMemsetD2D16Async_params;
+typedef struct cuMemsetD2D32Async_params_st {
+    CUdeviceptr dstDevice;
+    size_t dstPitch;
+    unsigned int ui;
+    size_t Width;
+    size_t Height;
+    CUstream hStream;
+} cuMemsetD2D32Async_params;
+typedef struct cuStreamGetPriority_params_st {
+    CUstream hStream;
+    int *priority;
+} cuStreamGetPriority_params;
+typedef struct cuStreamGetFlags_params_st {
+    CUstream hStream;
+    unsigned int *flags;
+} cuStreamGetFlags_params;
+typedef struct cuStreamGetCtx_params_st {
+    CUstream hStream;
+    CUcontext *pctx;
+} cuStreamGetCtx_params;
+typedef struct cuStreamWaitEvent_params_st {
+    CUstream hStream;
+    CUevent hEvent;
+    unsigned int Flags;
+} cuStreamWaitEvent_params;
+typedef struct cuStreamAddCallback_params_st {
+    CUstream hStream;
+    CUstreamCallback callback;
+    void *userData;
+    unsigned int flags;
+} cuStreamAddCallback_params;
+typedef struct cuStreamAttachMemAsync_params_st {
+    CUstream hStream;
+    CUdeviceptr dptr;
+    size_t length;
+    unsigned int flags;
+} cuStreamAttachMemAsync_params;
+typedef struct cuStreamQuery_params_st {
+    CUstream hStream;
+} cuStreamQuery_params;
+typedef struct cuStreamSynchronize_params_st {
+    CUstream hStream;
+} cuStreamSynchronize_params;
+typedef struct cuEventRecord_params_st {
+    CUevent hEvent;
+    CUstream hStream;
+} cuEventRecord_params;
+typedef struct cuEventRecordWithFlags_params_st {
+    CUevent hEvent;
+    CUstream hStream;
+    unsigned int flags;
+} cuEventRecordWithFlags_params;
+typedef struct cuLaunchKernel_params_st {
+    CUfunction f;
+    unsigned int gridDimX;
+    unsigned int gridDimY;
+    unsigned int gridDimZ;
+    unsigned int blockDimX;
+    unsigned int blockDimY;
+    unsigned int blockDimZ;
+    unsigned int sharedMemBytes;
+    CUstream hStream;
+    void **kernelParams;
+    void **extra;
+} cuLaunchKernel_params;
+typedef struct cuLaunchKernelEx_params_st {
+    const CUlaunchConfig *config;
+    CUfunction f;
+    void **kernelParams;
+    void **extra;
+} cuLaunchKernelEx_params;
+typedef struct cuLaunchHostFunc_params_st {
+    CUstream hStream;
+    CUhostFn fn;
+    void *userData;
+} cuLaunchHostFunc_params;
+typedef struct cuGraphicsMapResources_params_st {
+    unsigned int count;
+    CUgraphicsResource *resources;
+    CUstream hStream;
+} cuGraphicsMapResources_params;
+typedef struct cuGraphicsUnmapResources_params_st {
+    unsigned int count;
+    CUgraphicsResource *resources;
+    CUstream hStream;
+} cuGraphicsUnmapResources_params;
+typedef struct cuStreamWriteValue32_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint32_t value;
+    unsigned int flags;
+} cuStreamWriteValue32_params;
+typedef struct cuStreamWaitValue32_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint32_t value;
+    unsigned int flags;
+} cuStreamWaitValue32_params;
+typedef struct cuStreamWriteValue64_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint64_t value;
+    unsigned int flags;
+} cuStreamWriteValue64_params;
+typedef struct cuStreamWaitValue64_params_st {
+    CUstream stream;
+    CUdeviceptr addr;
+    cuuint64_t value;
+    unsigned int flags;
+} cuStreamWaitValue64_params;
+typedef struct cuStreamBatchMemOp_params_st {
+    CUstream stream;
+    unsigned int count;
+    CUstreamBatchMemOpParams *paramArray;
+    unsigned int flags;
+} cuStreamBatchMemOp_params;
+typedef struct cuMemPrefetchAsync_params_st {
+    CUdeviceptr devPtr;
+    size_t count;
+    CUdevice dstDevice;
+    CUstream hStream;
+} cuMemPrefetchAsync_params;
+typedef struct cuLaunchCooperativeKernel_params_st {
+    CUfunction f;
+    unsigned int gridDimX;
+    unsigned int gridDimY;
+    unsigned int gridDimZ;
+    unsigned int blockDimX;
+    unsigned int blockDimY;
+    unsigned int blockDimZ;
+    unsigned int sharedMemBytes;
+    CUstream hStream;
+    void **kernelParams;
+} cuLaunchCooperativeKernel_params;
+typedef struct cuSignalExternalSemaphoresAsync_params_st {
+    const CUexternalSemaphore *extSemArray;
+    const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray;
+    unsigned int numExtSems;
+    CUstream stream;
+} cuSignalExternalSemaphoresAsync_params;
+typedef struct cuWaitExternalSemaphoresAsync_params_st {
+    const CUexternalSemaphore *extSemArray;
+    const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray;
+    unsigned int numExtSems;
+    CUstream stream;
+} cuWaitExternalSemaphoresAsync_params;
+typedef struct cuStreamBeginCapture_params_st {
+    CUstream hStream;
+} cuStreamBeginCapture_params;
+typedef struct cuStreamBeginCapture_ptsz_params_st {
+    CUstream hStream;
+} cuStreamBeginCapture_ptsz_params;
+typedef struct cuStreamBeginCapture_v2_params_st {
+    CUstream hStream;
+    CUstreamCaptureMode mode;
+} cuStreamBeginCapture_v2_params;
+typedef struct cuStreamEndCapture_params_st {
+    CUstream hStream;
+    CUgraph *phGraph;
+} cuStreamEndCapture_params;
+typedef struct cuStreamIsCapturing_params_st {
+    CUstream hStream;
+    CUstreamCaptureStatus *captureStatus;
+} cuStreamIsCapturing_params;
+typedef struct cuStreamGetCaptureInfo_params_st {
+    CUstream hStream;
+    CUstreamCaptureStatus *captureStatus_out;
+    cuuint64_t *id_out;
+} cuStreamGetCaptureInfo_params;
+typedef struct cuStreamGetCaptureInfo_v2_params_st {
+    CUstream hStream;
+    CUstreamCaptureStatus *captureStatus_out;
+    cuuint64_t *id_out;
+    CUgraph *graph_out;
+    const CUgraphNode **dependencies_out;
+    size_t *numDependencies_out;
+} cuStreamGetCaptureInfo_v2_params;
+typedef struct cuGraphUpload_params_st {
+    CUgraphExec hGraph;
+    CUstream hStream;
+} cuGraphUpload_params;
+typedef struct cuGraphLaunch_params_st {
+    CUgraphExec hGraph;
+    CUstream hStream;
+} cuGraphLaunch_params;
+typedef struct cuStreamCopyAttributes_params_st {
+    CUstream dstStream;
+    CUstream srcStream;
+} cuStreamCopyAttributes_params;
+typedef struct cuStreamGetAttribute_params_st {
+    CUstream hStream;
+    CUstreamAttrID attr;
+    CUstreamAttrValue *value;
+} cuStreamGetAttribute_params;
+typedef struct cuStreamSetAttribute_params_st {
+    CUstream hStream;
+    CUstreamAttrID attr;
+    const CUstreamAttrValue *param;
+} cuStreamSetAttribute_params;
+typedef struct cuIpcOpenMemHandle_params_st {
+    CUdeviceptr *pdptr;
+    CUipcMemHandle handle;
+    unsigned int Flags;
+} cuIpcOpenMemHandle_params;
+typedef struct cuGraphInstantiate_params_st {
+    CUgraphExec *phGraphExec;
+    CUgraph hGraph;
+    CUgraphNode *phErrorNode;
+    char *logBuffer;
+    size_t bufferSize;
+} cuGraphInstantiate_params;
+typedef struct cuMemMapArrayAsync_params_st {
+    CUarrayMapInfo *mapInfoList;
+    unsigned int count;
+    CUstream hStream;
+} cuMemMapArrayAsync_params;
+typedef struct cuMemFreeAsync_params_st {
+    CUdeviceptr dptr;
+    CUstream hStream;
+} cuMemFreeAsync_params;
+typedef struct cuMemAllocAsync_params_st {
+    CUdeviceptr *dptr;
+    size_t bytesize;
+    CUstream hStream;
+} cuMemAllocAsync_params;
+typedef struct cuMemAllocFromPoolAsync_params_st {
+    CUdeviceptr *dptr;
+    size_t bytesize;
+    CUmemoryPool pool;
+    CUstream hStream;
+} cuMemAllocFromPoolAsync_params;
+typedef struct cuStreamUpdateCaptureDependencies_params_st {
+    CUstream hStream;
+    CUgraphNode *dependencies;
+    size_t numDependencies;
+    unsigned int flags;
+} cuStreamUpdateCaptureDependencies_params;

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h ADDED Viewed

	@@ -0,0 +1,2139 @@

+// This file is generated.  Any changes you make will be lost during the next clean build.
+// CUDA public interface, for type definitions and api function prototypes
+#include "cuda_runtime_api.h"
+// *************************************************************************
+//      Definitions of structs to hold parameters for each function
+// *************************************************************************
+// Currently used parameter trace structures
+typedef struct cudaDeviceSetLimit_v3020_params_st {
+    enum cudaLimit limit;
+    size_t value;
+} cudaDeviceSetLimit_v3020_params;
+typedef struct cudaDeviceGetLimit_v3020_params_st {
+    size_t *pValue;
+    enum cudaLimit limit;
+} cudaDeviceGetLimit_v3020_params;
+typedef struct cudaDeviceGetTexture1DLinearMaxWidth_v11010_params_st {
+    size_t *maxWidthInElements;
+    const struct cudaChannelFormatDesc *fmtDesc;
+    int device;
+} cudaDeviceGetTexture1DLinearMaxWidth_v11010_params;
+typedef struct cudaDeviceGetCacheConfig_v3020_params_st {
+    enum cudaFuncCache *pCacheConfig;
+} cudaDeviceGetCacheConfig_v3020_params;
+typedef struct cudaDeviceGetStreamPriorityRange_v5050_params_st {
+    int *leastPriority;
+    int *greatestPriority;
+} cudaDeviceGetStreamPriorityRange_v5050_params;
+typedef struct cudaDeviceSetCacheConfig_v3020_params_st {
+    enum cudaFuncCache cacheConfig;
+} cudaDeviceSetCacheConfig_v3020_params;
+typedef struct cudaDeviceGetSharedMemConfig_v4020_params_st {
+    enum cudaSharedMemConfig *pConfig;
+} cudaDeviceGetSharedMemConfig_v4020_params;
+typedef struct cudaDeviceSetSharedMemConfig_v4020_params_st {
+    enum cudaSharedMemConfig config;
+} cudaDeviceSetSharedMemConfig_v4020_params;
+typedef struct cudaDeviceGetByPCIBusId_v4010_params_st {
+    int *device;
+    const char *pciBusId;
+} cudaDeviceGetByPCIBusId_v4010_params;
+typedef struct cudaDeviceGetPCIBusId_v4010_params_st {
+    char *pciBusId;
+    int len;
+    int device;
+} cudaDeviceGetPCIBusId_v4010_params;
+typedef struct cudaIpcGetEventHandle_v4010_params_st {
+    cudaIpcEventHandle_t *handle;
+    cudaEvent_t event;
+} cudaIpcGetEventHandle_v4010_params;
+typedef struct cudaIpcOpenEventHandle_v4010_params_st {
+    cudaEvent_t *event;
+    cudaIpcEventHandle_t handle;
+} cudaIpcOpenEventHandle_v4010_params;
+typedef struct cudaIpcGetMemHandle_v4010_params_st {
+    cudaIpcMemHandle_t *handle;
+    void *devPtr;
+} cudaIpcGetMemHandle_v4010_params;
+typedef struct cudaIpcOpenMemHandle_v4010_params_st {
+    void **devPtr;
+    cudaIpcMemHandle_t handle;
+    unsigned int flags;
+} cudaIpcOpenMemHandle_v4010_params;
+typedef struct cudaIpcCloseMemHandle_v4010_params_st {
+    void *devPtr;
+} cudaIpcCloseMemHandle_v4010_params;
+typedef struct cudaDeviceFlushGPUDirectRDMAWrites_v11030_params_st {
+    enum cudaFlushGPUDirectRDMAWritesTarget target;
+    enum cudaFlushGPUDirectRDMAWritesScope scope;
+} cudaDeviceFlushGPUDirectRDMAWrites_v11030_params;
+typedef struct cudaGetErrorName_v6050_params_st {
+    cudaError_t error;
+} cudaGetErrorName_v6050_params;
+typedef struct cudaGetErrorString_v3020_params_st {
+    cudaError_t error;
+} cudaGetErrorString_v3020_params;
+typedef struct cudaGetDeviceCount_v3020_params_st {
+    int *count;
+} cudaGetDeviceCount_v3020_params;
+typedef struct cudaGetDeviceProperties_v3020_params_st {
+    struct cudaDeviceProp *prop;
+    int device;
+} cudaGetDeviceProperties_v3020_params;
+typedef struct cudaDeviceGetAttribute_v5000_params_st {
+    int *value;
+    enum cudaDeviceAttr attr;
+    int device;
+} cudaDeviceGetAttribute_v5000_params;
+typedef struct cudaDeviceGetDefaultMemPool_v11020_params_st {
+    cudaMemPool_t *memPool;
+    int device;
+} cudaDeviceGetDefaultMemPool_v11020_params;
+typedef struct cudaDeviceSetMemPool_v11020_params_st {
+    int device;
+    cudaMemPool_t memPool;
+} cudaDeviceSetMemPool_v11020_params;
+typedef struct cudaDeviceGetMemPool_v11020_params_st {
+    cudaMemPool_t *memPool;
+    int device;
+} cudaDeviceGetMemPool_v11020_params;
+typedef struct cudaDeviceGetNvSciSyncAttributes_v10020_params_st {
+    void *nvSciSyncAttrList;
+    int device;
+    int flags;
+} cudaDeviceGetNvSciSyncAttributes_v10020_params;
+typedef struct cudaDeviceGetP2PAttribute_v8000_params_st {
+    int *value;
+    enum cudaDeviceP2PAttr attr;
+    int srcDevice;
+    int dstDevice;
+} cudaDeviceGetP2PAttribute_v8000_params;
+typedef struct cudaChooseDevice_v3020_params_st {
+    int *device;
+    const struct cudaDeviceProp *prop;
+} cudaChooseDevice_v3020_params;
+typedef struct cudaSetDevice_v3020_params_st {
+    int device;
+} cudaSetDevice_v3020_params;
+typedef struct cudaGetDevice_v3020_params_st {
+    int *device;
+} cudaGetDevice_v3020_params;
+typedef struct cudaSetValidDevices_v3020_params_st {
+    int *device_arr;
+    int len;
+} cudaSetValidDevices_v3020_params;
+typedef struct cudaSetDeviceFlags_v3020_params_st {
+    unsigned int flags;
+} cudaSetDeviceFlags_v3020_params;
+typedef struct cudaGetDeviceFlags_v7000_params_st {
+    unsigned int *flags;
+} cudaGetDeviceFlags_v7000_params;
+typedef struct cudaStreamCreate_v3020_params_st {
+    cudaStream_t *pStream;
+} cudaStreamCreate_v3020_params;
+typedef struct cudaStreamCreateWithFlags_v5000_params_st {
+    cudaStream_t *pStream;
+    unsigned int flags;
+} cudaStreamCreateWithFlags_v5000_params;
+typedef struct cudaStreamCreateWithPriority_v5050_params_st {
+    cudaStream_t *pStream;
+    unsigned int flags;
+    int priority;
+} cudaStreamCreateWithPriority_v5050_params;
+typedef struct cudaStreamGetPriority_ptsz_v7000_params_st {
+    cudaStream_t hStream;
+    int *priority;
+} cudaStreamGetPriority_ptsz_v7000_params;
+typedef struct cudaStreamGetFlags_ptsz_v7000_params_st {
+    cudaStream_t hStream;
+    unsigned int *flags;
+} cudaStreamGetFlags_ptsz_v7000_params;
+typedef struct cudaStreamCopyAttributes_ptsz_v11000_params_st {
+    cudaStream_t dst;
+    cudaStream_t src;
+} cudaStreamCopyAttributes_ptsz_v11000_params;
+typedef struct cudaStreamGetAttribute_ptsz_v11000_params_st {
+    cudaStream_t hStream;
+    cudaStreamAttrID attr;
+    cudaStreamAttrValue *value_out;
+} cudaStreamGetAttribute_ptsz_v11000_params;
+typedef struct cudaStreamSetAttribute_ptsz_v11000_params_st {
+    cudaStream_t hStream;
+    cudaStreamAttrID attr;
+    const cudaStreamAttrValue *value;
+} cudaStreamSetAttribute_ptsz_v11000_params;
+typedef struct cudaStreamDestroy_v5050_params_st {
+    cudaStream_t stream;
+} cudaStreamDestroy_v5050_params;
+typedef struct cudaStreamWaitEvent_ptsz_v7000_params_st {
+    cudaStream_t stream;
+    cudaEvent_t event;
+    unsigned int flags;
+} cudaStreamWaitEvent_ptsz_v7000_params;
+typedef struct cudaStreamAddCallback_ptsz_v7000_params_st {
+    cudaStream_t stream;
+    cudaStreamCallback_t callback;
+    void *userData;
+    unsigned int flags;
+} cudaStreamAddCallback_ptsz_v7000_params;
+typedef struct cudaStreamSynchronize_ptsz_v7000_params_st {
+    cudaStream_t stream;
+} cudaStreamSynchronize_ptsz_v7000_params;
+typedef struct cudaStreamQuery_ptsz_v7000_params_st {
+    cudaStream_t stream;
+} cudaStreamQuery_ptsz_v7000_params;
+typedef struct cudaStreamAttachMemAsync_ptsz_v7000_params_st {
+    cudaStream_t stream;
+    void *devPtr;
+    size_t length;
+    unsigned int flags;
+} cudaStreamAttachMemAsync_ptsz_v7000_params;
+typedef struct cudaStreamBeginCapture_ptsz_v10000_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureMode mode;
+} cudaStreamBeginCapture_ptsz_v10000_params;
+typedef struct cudaThreadExchangeStreamCaptureMode_v10010_params_st {
+    enum cudaStreamCaptureMode *mode;
+} cudaThreadExchangeStreamCaptureMode_v10010_params;
+typedef struct cudaStreamEndCapture_ptsz_v10000_params_st {
+    cudaStream_t stream;
+    cudaGraph_t *pGraph;
+} cudaStreamEndCapture_ptsz_v10000_params;
+typedef struct cudaStreamIsCapturing_ptsz_v10000_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureStatus *pCaptureStatus;
+} cudaStreamIsCapturing_ptsz_v10000_params;
+typedef struct cudaStreamGetCaptureInfo_ptsz_v10010_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureStatus *pCaptureStatus;
+    unsigned long long *pId;
+} cudaStreamGetCaptureInfo_ptsz_v10010_params;
+typedef struct cudaStreamGetCaptureInfo_v2_ptsz_v11030_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureStatus *captureStatus_out;
+    unsigned long long *id_out;
+    cudaGraph_t *graph_out;
+    const cudaGraphNode_t **dependencies_out;
+    size_t *numDependencies_out;
+} cudaStreamGetCaptureInfo_v2_ptsz_v11030_params;
+typedef struct cudaStreamUpdateCaptureDependencies_v11030_params_st {
+    cudaStream_t stream;
+    cudaGraphNode_t *dependencies;
+    size_t numDependencies;
+    unsigned int flags;
+} cudaStreamUpdateCaptureDependencies_v11030_params;
+typedef struct cudaEventCreate_v3020_params_st {
+    cudaEvent_t *event;
+} cudaEventCreate_v3020_params;
+typedef struct cudaEventCreateWithFlags_v3020_params_st {
+    cudaEvent_t *event;
+    unsigned int flags;
+} cudaEventCreateWithFlags_v3020_params;
+typedef struct cudaEventRecord_ptsz_v7000_params_st {
+    cudaEvent_t event;
+    cudaStream_t stream;
+} cudaEventRecord_ptsz_v7000_params;
+typedef struct cudaEventRecordWithFlags_ptsz_v11010_params_st {
+    cudaEvent_t event;
+    cudaStream_t stream;
+    unsigned int flags;
+} cudaEventRecordWithFlags_ptsz_v11010_params;
+typedef struct cudaEventQuery_v3020_params_st {
+    cudaEvent_t event;
+} cudaEventQuery_v3020_params;
+typedef struct cudaEventSynchronize_v3020_params_st {
+    cudaEvent_t event;
+} cudaEventSynchronize_v3020_params;
+typedef struct cudaEventDestroy_v3020_params_st {
+    cudaEvent_t event;
+} cudaEventDestroy_v3020_params;
+typedef struct cudaEventElapsedTime_v3020_params_st {
+    float *ms;
+    cudaEvent_t start;
+    cudaEvent_t end;
+} cudaEventElapsedTime_v3020_params;
+typedef struct cudaImportExternalMemory_v10000_params_st {
+    cudaExternalMemory_t *extMem_out;
+    const struct cudaExternalMemoryHandleDesc *memHandleDesc;
+} cudaImportExternalMemory_v10000_params;
+typedef struct cudaExternalMemoryGetMappedBuffer_v10000_params_st {
+    void **devPtr;
+    cudaExternalMemory_t extMem;
+    const struct cudaExternalMemoryBufferDesc *bufferDesc;
+} cudaExternalMemoryGetMappedBuffer_v10000_params;
+typedef struct cudaExternalMemoryGetMappedMipmappedArray_v10000_params_st {
+    cudaMipmappedArray_t *mipmap;
+    cudaExternalMemory_t extMem;
+    const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc;
+} cudaExternalMemoryGetMappedMipmappedArray_v10000_params;
+typedef struct cudaDestroyExternalMemory_v10000_params_st {
+    cudaExternalMemory_t extMem;
+} cudaDestroyExternalMemory_v10000_params;
+typedef struct cudaImportExternalSemaphore_v10000_params_st {
+    cudaExternalSemaphore_t *extSem_out;
+    const struct cudaExternalSemaphoreHandleDesc *semHandleDesc;
+} cudaImportExternalSemaphore_v10000_params;
+typedef struct cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreSignalParams *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params;
+typedef struct cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreWaitParams *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params;
+typedef struct cudaDestroyExternalSemaphore_v10000_params_st {
+    cudaExternalSemaphore_t extSem;
+} cudaDestroyExternalSemaphore_v10000_params;
+typedef struct cudaLaunchKernel_ptsz_v7000_params_st {
+    const void *func;
+    dim3 gridDim;
+    dim3 blockDim;
+    void **args;
+    size_t sharedMem;
+    cudaStream_t stream;
+} cudaLaunchKernel_ptsz_v7000_params;
+typedef struct cudaLaunchKernelExC_ptsz_v11060_params_st {
+    const cudaLaunchConfig_t *config;
+    const void *func;
+    void **args;
+} cudaLaunchKernelExC_ptsz_v11060_params;
+typedef struct cudaLaunchCooperativeKernel_ptsz_v9000_params_st {
+    const void *func;
+    dim3 gridDim;
+    dim3 blockDim;
+    void **args;
+    size_t sharedMem;
+    cudaStream_t stream;
+} cudaLaunchCooperativeKernel_ptsz_v9000_params;
+typedef struct cudaLaunchCooperativeKernelMultiDevice_v9000_params_st {
+    struct cudaLaunchParams *launchParamsList;
+    unsigned int numDevices;
+    unsigned int flags;
+} cudaLaunchCooperativeKernelMultiDevice_v9000_params;
+typedef struct cudaFuncSetCacheConfig_v3020_params_st {
+    const void *func;
+    enum cudaFuncCache cacheConfig;
+} cudaFuncSetCacheConfig_v3020_params;
+typedef struct cudaFuncSetSharedMemConfig_v4020_params_st {
+    const void *func;
+    enum cudaSharedMemConfig config;
+} cudaFuncSetSharedMemConfig_v4020_params;
+typedef struct cudaFuncGetAttributes_v3020_params_st {
+    struct cudaFuncAttributes *attr;
+    const void *func;
+} cudaFuncGetAttributes_v3020_params;
+typedef struct cudaFuncSetAttribute_v9000_params_st {
+    const void *func;
+    enum cudaFuncAttribute attr;
+    int value;
+} cudaFuncSetAttribute_v9000_params;
+typedef struct cudaLaunchHostFunc_ptsz_v10000_params_st {
+    cudaStream_t stream;
+    cudaHostFn_t fn;
+    void *userData;
+} cudaLaunchHostFunc_ptsz_v10000_params;
+typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params_st {
+    int *numBlocks;
+    const void *func;
+    int blockSize;
+    size_t dynamicSMemSize;
+} cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params;
+typedef struct cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params_st {
+    size_t *dynamicSmemSize;
+    const void *func;
+    int numBlocks;
+    int blockSize;
+} cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params;
+typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params_st {
+    int *numBlocks;
+    const void *func;
+    int blockSize;
+    size_t dynamicSMemSize;
+    unsigned int flags;
+} cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params;
+typedef struct cudaOccupancyMaxPotentialClusterSize_v11070_params_st {
+    int *clusterSize;
+    const void *func;
+    const cudaLaunchConfig_t *launchConfig;
+} cudaOccupancyMaxPotentialClusterSize_v11070_params;
+typedef struct cudaOccupancyMaxActiveClusters_v11070_params_st {
+    int *numClusters;
+    const void *func;
+    const cudaLaunchConfig_t *launchConfig;
+} cudaOccupancyMaxActiveClusters_v11070_params;
+typedef struct cudaMallocManaged_v6000_params_st {
+    void **devPtr;
+    size_t size;
+    unsigned int flags;
+} cudaMallocManaged_v6000_params;
+typedef struct cudaMalloc_v3020_params_st {
+    void **devPtr;
+    size_t size;
+} cudaMalloc_v3020_params;
+typedef struct cudaMallocHost_v3020_params_st {
+    void **ptr;
+    size_t size;
+} cudaMallocHost_v3020_params;
+typedef struct cudaMallocPitch_v3020_params_st {
+    void **devPtr;
+    size_t *pitch;
+    size_t width;
+    size_t height;
+} cudaMallocPitch_v3020_params;
+typedef struct cudaMallocArray_v3020_params_st {
+    cudaArray_t *array;
+    const struct cudaChannelFormatDesc *desc;
+    size_t width;
+    size_t height;
+    unsigned int flags;
+} cudaMallocArray_v3020_params;
+typedef struct cudaFree_v3020_params_st {
+    void *devPtr;
+} cudaFree_v3020_params;
+typedef struct cudaFreeHost_v3020_params_st {
+    void *ptr;
+} cudaFreeHost_v3020_params;
+typedef struct cudaFreeArray_v3020_params_st {
+    cudaArray_t array;
+} cudaFreeArray_v3020_params;
+typedef struct cudaFreeMipmappedArray_v5000_params_st {
+    cudaMipmappedArray_t mipmappedArray;
+} cudaFreeMipmappedArray_v5000_params;
+typedef struct cudaHostAlloc_v3020_params_st {
+    void **pHost;
+    size_t size;
+    unsigned int flags;
+} cudaHostAlloc_v3020_params;
+typedef struct cudaHostRegister_v4000_params_st {
+    void *ptr;
+    size_t size;
+    unsigned int flags;
+} cudaHostRegister_v4000_params;
+typedef struct cudaHostUnregister_v4000_params_st {
+    void *ptr;
+} cudaHostUnregister_v4000_params;
+typedef struct cudaHostGetDevicePointer_v3020_params_st {
+    void **pDevice;
+    void *pHost;
+    unsigned int flags;
+} cudaHostGetDevicePointer_v3020_params;
+typedef struct cudaHostGetFlags_v3020_params_st {
+    unsigned int *pFlags;
+    void *pHost;
+} cudaHostGetFlags_v3020_params;
+typedef struct cudaMalloc3D_v3020_params_st {
+    struct cudaPitchedPtr *pitchedDevPtr;
+    struct cudaExtent extent;
+} cudaMalloc3D_v3020_params;
+typedef struct cudaMalloc3DArray_v3020_params_st {
+    cudaArray_t *array;
+    const struct cudaChannelFormatDesc *desc;
+    struct cudaExtent extent;
+    unsigned int flags;
+} cudaMalloc3DArray_v3020_params;
+typedef struct cudaMallocMipmappedArray_v5000_params_st {
+    cudaMipmappedArray_t *mipmappedArray;
+    const struct cudaChannelFormatDesc *desc;
+    struct cudaExtent extent;
+    unsigned int numLevels;
+    unsigned int flags;
+} cudaMallocMipmappedArray_v5000_params;
+typedef struct cudaGetMipmappedArrayLevel_v5000_params_st {
+    cudaArray_t *levelArray;
+    cudaMipmappedArray_const_t mipmappedArray;
+    unsigned int level;
+} cudaGetMipmappedArrayLevel_v5000_params;
+typedef struct cudaMemcpy3D_ptds_v7000_params_st {
+    const struct cudaMemcpy3DParms *p;
+} cudaMemcpy3D_ptds_v7000_params;
+typedef struct cudaMemcpy3DPeer_ptds_v7000_params_st {
+    const struct cudaMemcpy3DPeerParms *p;
+} cudaMemcpy3DPeer_ptds_v7000_params;
+typedef struct cudaMemcpy3DAsync_ptsz_v7000_params_st {
+    const struct cudaMemcpy3DParms *p;
+    cudaStream_t stream;
+} cudaMemcpy3DAsync_ptsz_v7000_params;
+typedef struct cudaMemcpy3DPeerAsync_ptsz_v7000_params_st {
+    const struct cudaMemcpy3DPeerParms *p;
+    cudaStream_t stream;
+} cudaMemcpy3DPeerAsync_ptsz_v7000_params;
+typedef struct cudaMemGetInfo_v3020_params_st {
+    size_t *free;
+    size_t *total;
+} cudaMemGetInfo_v3020_params;
+typedef struct cudaArrayGetInfo_v4010_params_st {
+    struct cudaChannelFormatDesc *desc;
+    struct cudaExtent *extent;
+    unsigned int *flags;
+    cudaArray_t array;
+} cudaArrayGetInfo_v4010_params;
+typedef struct cudaArrayGetPlane_v11020_params_st {
+    cudaArray_t *pPlaneArray;
+    cudaArray_t hArray;
+    unsigned int planeIdx;
+} cudaArrayGetPlane_v11020_params;
+typedef struct cudaArrayGetMemoryRequirements_v11060_params_st {
+    struct cudaArrayMemoryRequirements *memoryRequirements;
+    cudaArray_t array;
+    int device;
+} cudaArrayGetMemoryRequirements_v11060_params;
+typedef struct cudaMipmappedArrayGetMemoryRequirements_v11060_params_st {
+    struct cudaArrayMemoryRequirements *memoryRequirements;
+    cudaMipmappedArray_t mipmap;
+    int device;
+} cudaMipmappedArrayGetMemoryRequirements_v11060_params;
+typedef struct cudaArrayGetSparseProperties_v11010_params_st {
+    struct cudaArraySparseProperties *sparseProperties;
+    cudaArray_t array;
+} cudaArrayGetSparseProperties_v11010_params;
+typedef struct cudaMipmappedArrayGetSparseProperties_v11010_params_st {
+    struct cudaArraySparseProperties *sparseProperties;
+    cudaMipmappedArray_t mipmap;
+} cudaMipmappedArrayGetSparseProperties_v11010_params;
+typedef struct cudaMemcpy_ptds_v7000_params_st {
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy_ptds_v7000_params;
+typedef struct cudaMemcpyPeer_v4000_params_st {
+    void *dst;
+    int dstDevice;
+    const void *src;
+    int srcDevice;
+    size_t count;
+} cudaMemcpyPeer_v4000_params;
+typedef struct cudaMemcpy2D_ptds_v7000_params_st {
+    void *dst;
+    size_t dpitch;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2D_ptds_v7000_params;
+typedef struct cudaMemcpy2DToArray_ptds_v7000_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2DToArray_ptds_v7000_params;
+typedef struct cudaMemcpy2DFromArray_ptds_v7000_params_st {
+    void *dst;
+    size_t dpitch;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2DFromArray_ptds_v7000_params;
+typedef struct cudaMemcpy2DArrayToArray_ptds_v7000_params_st {
+    cudaArray_t dst;
+    size_t wOffsetDst;
+    size_t hOffsetDst;
+    cudaArray_const_t src;
+    size_t wOffsetSrc;
+    size_t hOffsetSrc;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2DArrayToArray_ptds_v7000_params;
+typedef struct cudaMemcpyToSymbol_ptds_v7000_params_st {
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyToSymbol_ptds_v7000_params;
+typedef struct cudaMemcpyFromSymbol_ptds_v7000_params_st {
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyFromSymbol_ptds_v7000_params;
+typedef struct cudaMemcpyAsync_ptsz_v7000_params_st {
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyAsync_ptsz_v7000_params;
+typedef struct cudaMemcpyPeerAsync_v4000_params_st {
+    void *dst;
+    int dstDevice;
+    const void *src;
+    int srcDevice;
+    size_t count;
+    cudaStream_t stream;
+} cudaMemcpyPeerAsync_v4000_params;
+typedef struct cudaMemcpy2DAsync_ptsz_v7000_params_st {
+    void *dst;
+    size_t dpitch;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpy2DAsync_ptsz_v7000_params;
+typedef struct cudaMemcpy2DToArrayAsync_ptsz_v7000_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpy2DToArrayAsync_ptsz_v7000_params;
+typedef struct cudaMemcpy2DFromArrayAsync_ptsz_v7000_params_st {
+    void *dst;
+    size_t dpitch;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpy2DFromArrayAsync_ptsz_v7000_params;
+typedef struct cudaMemcpyToSymbolAsync_ptsz_v7000_params_st {
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyToSymbolAsync_ptsz_v7000_params;
+typedef struct cudaMemcpyFromSymbolAsync_ptsz_v7000_params_st {
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyFromSymbolAsync_ptsz_v7000_params;
+typedef struct cudaMemset_ptds_v7000_params_st {
+    void *devPtr;
+    int value;
+    size_t count;
+} cudaMemset_ptds_v7000_params;
+typedef struct cudaMemset2D_ptds_v7000_params_st {
+    void *devPtr;
+    size_t pitch;
+    int value;
+    size_t width;
+    size_t height;
+} cudaMemset2D_ptds_v7000_params;
+typedef struct cudaMemset3D_ptds_v7000_params_st {
+    struct cudaPitchedPtr pitchedDevPtr;
+    int value;
+    struct cudaExtent extent;
+} cudaMemset3D_ptds_v7000_params;
+typedef struct cudaMemsetAsync_ptsz_v7000_params_st {
+    void *devPtr;
+    int value;
+    size_t count;
+    cudaStream_t stream;
+} cudaMemsetAsync_ptsz_v7000_params;
+typedef struct cudaMemset2DAsync_ptsz_v7000_params_st {
+    void *devPtr;
+    size_t pitch;
+    int value;
+    size_t width;
+    size_t height;
+    cudaStream_t stream;
+} cudaMemset2DAsync_ptsz_v7000_params;
+typedef struct cudaMemset3DAsync_ptsz_v7000_params_st {
+    struct cudaPitchedPtr pitchedDevPtr;
+    int value;
+    struct cudaExtent extent;
+    cudaStream_t stream;
+} cudaMemset3DAsync_ptsz_v7000_params;
+typedef struct cudaGetSymbolAddress_v3020_params_st {
+    void **devPtr;
+    const void *symbol;
+} cudaGetSymbolAddress_v3020_params;
+typedef struct cudaGetSymbolSize_v3020_params_st {
+    size_t *size;
+    const void *symbol;
+} cudaGetSymbolSize_v3020_params;
+typedef struct cudaMemPrefetchAsync_ptsz_v8000_params_st {
+    const void *devPtr;
+    size_t count;
+    int dstDevice;
+    cudaStream_t stream;
+} cudaMemPrefetchAsync_ptsz_v8000_params;
+typedef struct cudaMemAdvise_v8000_params_st {
+    const void *devPtr;
+    size_t count;
+    enum cudaMemoryAdvise advice;
+    int device;
+} cudaMemAdvise_v8000_params;
+typedef struct cudaMemRangeGetAttribute_v8000_params_st {
+    void *data;
+    size_t dataSize;
+    enum cudaMemRangeAttribute attribute;
+    const void *devPtr;
+    size_t count;
+} cudaMemRangeGetAttribute_v8000_params;
+typedef struct cudaMemRangeGetAttributes_v8000_params_st {
+    void **data;
+    size_t *dataSizes;
+    enum cudaMemRangeAttribute *attributes;
+    size_t numAttributes;
+    const void *devPtr;
+    size_t count;
+} cudaMemRangeGetAttributes_v8000_params;
+typedef struct cudaMemcpyToArray_ptds_v7000_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyToArray_ptds_v7000_params;
+typedef struct cudaMemcpyFromArray_ptds_v7000_params_st {
+    void *dst;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyFromArray_ptds_v7000_params;
+typedef struct cudaMemcpyArrayToArray_ptds_v7000_params_st {
+    cudaArray_t dst;
+    size_t wOffsetDst;
+    size_t hOffsetDst;
+    cudaArray_const_t src;
+    size_t wOffsetSrc;
+    size_t hOffsetSrc;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyArrayToArray_ptds_v7000_params;
+typedef struct cudaMemcpyToArrayAsync_ptsz_v7000_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyToArrayAsync_ptsz_v7000_params;
+typedef struct cudaMemcpyFromArrayAsync_ptsz_v7000_params_st {
+    void *dst;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t count;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyFromArrayAsync_ptsz_v7000_params;
+typedef struct cudaMallocAsync_ptsz_v11020_params_st {
+    void **devPtr;
+    size_t size;
+    cudaStream_t hStream;
+} cudaMallocAsync_ptsz_v11020_params;
+typedef struct cudaFreeAsync_ptsz_v11020_params_st {
+    void *devPtr;
+    cudaStream_t hStream;
+} cudaFreeAsync_ptsz_v11020_params;
+typedef struct cudaMemPoolTrimTo_v11020_params_st {
+    cudaMemPool_t memPool;
+    size_t minBytesToKeep;
+} cudaMemPoolTrimTo_v11020_params;
+typedef struct cudaMemPoolSetAttribute_v11020_params_st {
+    cudaMemPool_t memPool;
+    enum cudaMemPoolAttr attr;
+    void *value;
+} cudaMemPoolSetAttribute_v11020_params;
+typedef struct cudaMemPoolGetAttribute_v11020_params_st {
+    cudaMemPool_t memPool;
+    enum cudaMemPoolAttr attr;
+    void *value;
+} cudaMemPoolGetAttribute_v11020_params;
+typedef struct cudaMemPoolSetAccess_v11020_params_st {
+    cudaMemPool_t memPool;
+    const struct cudaMemAccessDesc *descList;
+    size_t count;
+} cudaMemPoolSetAccess_v11020_params;
+typedef struct cudaMemPoolGetAccess_v11020_params_st {
+    enum cudaMemAccessFlags *flags;
+    cudaMemPool_t memPool;
+    struct cudaMemLocation *location;
+} cudaMemPoolGetAccess_v11020_params;
+typedef struct cudaMemPoolCreate_v11020_params_st {
+    cudaMemPool_t *memPool;
+    const struct cudaMemPoolProps *poolProps;
+} cudaMemPoolCreate_v11020_params;
+typedef struct cudaMemPoolDestroy_v11020_params_st {
+    cudaMemPool_t memPool;
+} cudaMemPoolDestroy_v11020_params;
+typedef struct cudaMallocFromPoolAsync_ptsz_v11020_params_st {
+    void **ptr;
+    size_t size;
+    cudaMemPool_t memPool;
+    cudaStream_t stream;
+} cudaMallocFromPoolAsync_ptsz_v11020_params;
+typedef struct cudaMemPoolExportToShareableHandle_v11020_params_st {
+    void *shareableHandle;
+    cudaMemPool_t memPool;
+    enum cudaMemAllocationHandleType handleType;
+    unsigned int flags;
+} cudaMemPoolExportToShareableHandle_v11020_params;
+typedef struct cudaMemPoolImportFromShareableHandle_v11020_params_st {
+    cudaMemPool_t *memPool;
+    void *shareableHandle;
+    enum cudaMemAllocationHandleType handleType;
+    unsigned int flags;
+} cudaMemPoolImportFromShareableHandle_v11020_params;
+typedef struct cudaMemPoolExportPointer_v11020_params_st {
+    struct cudaMemPoolPtrExportData *exportData;
+    void *ptr;
+} cudaMemPoolExportPointer_v11020_params;
+typedef struct cudaMemPoolImportPointer_v11020_params_st {
+    void **ptr;
+    cudaMemPool_t memPool;
+    struct cudaMemPoolPtrExportData *exportData;
+} cudaMemPoolImportPointer_v11020_params;
+typedef struct cudaPointerGetAttributes_v4000_params_st {
+    struct cudaPointerAttributes *attributes;
+    const void *ptr;
+} cudaPointerGetAttributes_v4000_params;
+typedef struct cudaDeviceCanAccessPeer_v4000_params_st {
+    int *canAccessPeer;
+    int device;
+    int peerDevice;
+} cudaDeviceCanAccessPeer_v4000_params;
+typedef struct cudaDeviceEnablePeerAccess_v4000_params_st {
+    int peerDevice;
+    unsigned int flags;
+} cudaDeviceEnablePeerAccess_v4000_params;
+typedef struct cudaDeviceDisablePeerAccess_v4000_params_st {
+    int peerDevice;
+} cudaDeviceDisablePeerAccess_v4000_params;
+typedef struct cudaGraphicsUnregisterResource_v3020_params_st {
+    cudaGraphicsResource_t resource;
+} cudaGraphicsUnregisterResource_v3020_params;
+typedef struct cudaGraphicsResourceSetMapFlags_v3020_params_st {
+    cudaGraphicsResource_t resource;
+    unsigned int flags;
+} cudaGraphicsResourceSetMapFlags_v3020_params;
+typedef struct cudaGraphicsMapResources_v3020_params_st {
+    int count;
+    cudaGraphicsResource_t *resources;
+    cudaStream_t stream;
+} cudaGraphicsMapResources_v3020_params;
+typedef struct cudaGraphicsUnmapResources_v3020_params_st {
+    int count;
+    cudaGraphicsResource_t *resources;
+    cudaStream_t stream;
+} cudaGraphicsUnmapResources_v3020_params;
+typedef struct cudaGraphicsResourceGetMappedPointer_v3020_params_st {
+    void **devPtr;
+    size_t *size;
+    cudaGraphicsResource_t resource;
+} cudaGraphicsResourceGetMappedPointer_v3020_params;
+typedef struct cudaGraphicsSubResourceGetMappedArray_v3020_params_st {
+    cudaArray_t *array;
+    cudaGraphicsResource_t resource;
+    unsigned int arrayIndex;
+    unsigned int mipLevel;
+} cudaGraphicsSubResourceGetMappedArray_v3020_params;
+typedef struct cudaGraphicsResourceGetMappedMipmappedArray_v5000_params_st {
+    cudaMipmappedArray_t *mipmappedArray;
+    cudaGraphicsResource_t resource;
+} cudaGraphicsResourceGetMappedMipmappedArray_v5000_params;
+typedef struct cudaBindTexture_v3020_params_st {
+    size_t *offset;
+    const struct textureReference *texref;
+    const void *devPtr;
+    const struct cudaChannelFormatDesc *desc;
+    size_t size;
+} cudaBindTexture_v3020_params;
+typedef struct cudaBindTexture2D_v3020_params_st {
+    size_t *offset;
+    const struct textureReference *texref;
+    const void *devPtr;
+    const struct cudaChannelFormatDesc *desc;
+    size_t width;
+    size_t height;
+    size_t pitch;
+} cudaBindTexture2D_v3020_params;
+typedef struct cudaBindTextureToArray_v3020_params_st {
+    const struct textureReference *texref;
+    cudaArray_const_t array;
+    const struct cudaChannelFormatDesc *desc;
+} cudaBindTextureToArray_v3020_params;
+typedef struct cudaBindTextureToMipmappedArray_v5000_params_st {
+    const struct textureReference *texref;
+    cudaMipmappedArray_const_t mipmappedArray;
+    const struct cudaChannelFormatDesc *desc;
+} cudaBindTextureToMipmappedArray_v5000_params;
+typedef struct cudaUnbindTexture_v3020_params_st {
+    const struct textureReference *texref;
+} cudaUnbindTexture_v3020_params;
+typedef struct cudaGetTextureAlignmentOffset_v3020_params_st {
+    size_t *offset;
+    const struct textureReference *texref;
+} cudaGetTextureAlignmentOffset_v3020_params;
+typedef struct cudaGetTextureReference_v3020_params_st {
+    const struct textureReference **texref;
+    const void *symbol;
+} cudaGetTextureReference_v3020_params;
+typedef struct cudaBindSurfaceToArray_v3020_params_st {
+    const struct surfaceReference *surfref;
+    cudaArray_const_t array;
+    const struct cudaChannelFormatDesc *desc;
+} cudaBindSurfaceToArray_v3020_params;
+typedef struct cudaGetSurfaceReference_v3020_params_st {
+    const struct surfaceReference **surfref;
+    const void *symbol;
+} cudaGetSurfaceReference_v3020_params;
+typedef struct cudaGetChannelDesc_v3020_params_st {
+    struct cudaChannelFormatDesc *desc;
+    cudaArray_const_t array;
+} cudaGetChannelDesc_v3020_params;
+typedef struct cudaCreateChannelDesc_v3020_params_st {
+    int x;
+    int y;
+    int z;
+    int w;
+    enum cudaChannelFormatKind f;
+} cudaCreateChannelDesc_v3020_params;
+typedef struct cudaCreateTextureObject_v5000_params_st {
+    cudaTextureObject_t *pTexObject;
+    const struct cudaResourceDesc *pResDesc;
+    const struct cudaTextureDesc *pTexDesc;
+    const struct cudaResourceViewDesc *pResViewDesc;
+} cudaCreateTextureObject_v5000_params;
+typedef struct cudaDestroyTextureObject_v5000_params_st {
+    cudaTextureObject_t texObject;
+} cudaDestroyTextureObject_v5000_params;
+typedef struct cudaGetTextureObjectResourceDesc_v5000_params_st {
+    struct cudaResourceDesc *pResDesc;
+    cudaTextureObject_t texObject;
+} cudaGetTextureObjectResourceDesc_v5000_params;
+typedef struct cudaGetTextureObjectTextureDesc_v5000_params_st {
+    struct cudaTextureDesc *pTexDesc;
+    cudaTextureObject_t texObject;
+} cudaGetTextureObjectTextureDesc_v5000_params;
+typedef struct cudaGetTextureObjectResourceViewDesc_v5000_params_st {
+    struct cudaResourceViewDesc *pResViewDesc;
+    cudaTextureObject_t texObject;
+} cudaGetTextureObjectResourceViewDesc_v5000_params;
+typedef struct cudaCreateSurfaceObject_v5000_params_st {
+    cudaSurfaceObject_t *pSurfObject;
+    const struct cudaResourceDesc *pResDesc;
+} cudaCreateSurfaceObject_v5000_params;
+typedef struct cudaDestroySurfaceObject_v5000_params_st {
+    cudaSurfaceObject_t surfObject;
+} cudaDestroySurfaceObject_v5000_params;
+typedef struct cudaGetSurfaceObjectResourceDesc_v5000_params_st {
+    struct cudaResourceDesc *pResDesc;
+    cudaSurfaceObject_t surfObject;
+} cudaGetSurfaceObjectResourceDesc_v5000_params;
+typedef struct cudaDriverGetVersion_v3020_params_st {
+    int *driverVersion;
+} cudaDriverGetVersion_v3020_params;
+typedef struct cudaRuntimeGetVersion_v3020_params_st {
+    int *runtimeVersion;
+} cudaRuntimeGetVersion_v3020_params;
+typedef struct cudaGraphCreate_v10000_params_st {
+    cudaGraph_t *pGraph;
+    unsigned int flags;
+} cudaGraphCreate_v10000_params;
+typedef struct cudaGraphAddKernelNode_v10000_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const struct cudaKernelNodeParams *pNodeParams;
+} cudaGraphAddKernelNode_v10000_params;
+typedef struct cudaGraphKernelNodeGetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    struct cudaKernelNodeParams *pNodeParams;
+} cudaGraphKernelNodeGetParams_v10000_params;
+typedef struct cudaGraphKernelNodeSetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    const struct cudaKernelNodeParams *pNodeParams;
+} cudaGraphKernelNodeSetParams_v10000_params;
+typedef struct cudaGraphKernelNodeCopyAttributes_v11000_params_st {
+    cudaGraphNode_t hSrc;
+    cudaGraphNode_t hDst;
+} cudaGraphKernelNodeCopyAttributes_v11000_params;
+typedef struct cudaGraphKernelNodeGetAttribute_v11000_params_st {
+    cudaGraphNode_t hNode;
+    cudaKernelNodeAttrID attr;
+    cudaKernelNodeAttrValue *value_out;
+} cudaGraphKernelNodeGetAttribute_v11000_params;
+typedef struct cudaGraphKernelNodeSetAttribute_v11000_params_st {
+    cudaGraphNode_t hNode;
+    cudaKernelNodeAttrID attr;
+    const cudaKernelNodeAttrValue *value;
+} cudaGraphKernelNodeSetAttribute_v11000_params;
+typedef struct cudaGraphAddMemcpyNode_v10000_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const struct cudaMemcpy3DParms *pCopyParams;
+} cudaGraphAddMemcpyNode_v10000_params;
+typedef struct cudaGraphAddMemcpyNodeToSymbol_v11010_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaGraphAddMemcpyNodeToSymbol_v11010_params;
+typedef struct cudaGraphAddMemcpyNodeFromSymbol_v11010_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaGraphAddMemcpyNodeFromSymbol_v11010_params;
+typedef struct cudaGraphAddMemcpyNode1D_v11010_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaGraphAddMemcpyNode1D_v11010_params;
+typedef struct cudaGraphMemcpyNodeGetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    struct cudaMemcpy3DParms *pNodeParams;
+} cudaGraphMemcpyNodeGetParams_v10000_params;
+typedef struct cudaGraphMemcpyNodeSetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    const struct cudaMemcpy3DParms *pNodeParams;
+} cudaGraphMemcpyNodeSetParams_v10000_params;
+typedef struct cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params_st {
+    cudaGraphNode_t node;
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params;
+typedef struct cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params_st {
+    cudaGraphNode_t node;
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params;
+typedef struct cudaGraphMemcpyNodeSetParams1D_v11010_params_st {
+    cudaGraphNode_t node;
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaGraphMemcpyNodeSetParams1D_v11010_params;
+typedef struct cudaGraphAddMemsetNode_v10000_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const struct cudaMemsetParams *pMemsetParams;
+} cudaGraphAddMemsetNode_v10000_params;
+typedef struct cudaGraphMemsetNodeGetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    struct cudaMemsetParams *pNodeParams;
+} cudaGraphMemsetNodeGetParams_v10000_params;
+typedef struct cudaGraphMemsetNodeSetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    const struct cudaMemsetParams *pNodeParams;
+} cudaGraphMemsetNodeSetParams_v10000_params;
+typedef struct cudaGraphAddHostNode_v10000_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const struct cudaHostNodeParams *pNodeParams;
+} cudaGraphAddHostNode_v10000_params;
+typedef struct cudaGraphHostNodeGetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    struct cudaHostNodeParams *pNodeParams;
+} cudaGraphHostNodeGetParams_v10000_params;
+typedef struct cudaGraphHostNodeSetParams_v10000_params_st {
+    cudaGraphNode_t node;
+    const struct cudaHostNodeParams *pNodeParams;
+} cudaGraphHostNodeSetParams_v10000_params;
+typedef struct cudaGraphAddChildGraphNode_v10000_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    cudaGraph_t childGraph;
+} cudaGraphAddChildGraphNode_v10000_params;
+typedef struct cudaGraphChildGraphNodeGetGraph_v10000_params_st {
+    cudaGraphNode_t node;
+    cudaGraph_t *pGraph;
+} cudaGraphChildGraphNodeGetGraph_v10000_params;
+typedef struct cudaGraphAddEmptyNode_v10000_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+} cudaGraphAddEmptyNode_v10000_params;
+typedef struct cudaGraphAddEventRecordNode_v11010_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    cudaEvent_t event;
+} cudaGraphAddEventRecordNode_v11010_params;
+typedef struct cudaGraphEventRecordNodeGetEvent_v11010_params_st {
+    cudaGraphNode_t node;
+    cudaEvent_t *event_out;
+} cudaGraphEventRecordNodeGetEvent_v11010_params;
+typedef struct cudaGraphEventRecordNodeSetEvent_v11010_params_st {
+    cudaGraphNode_t node;
+    cudaEvent_t event;
+} cudaGraphEventRecordNodeSetEvent_v11010_params;
+typedef struct cudaGraphAddEventWaitNode_v11010_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    cudaEvent_t event;
+} cudaGraphAddEventWaitNode_v11010_params;
+typedef struct cudaGraphEventWaitNodeGetEvent_v11010_params_st {
+    cudaGraphNode_t node;
+    cudaEvent_t *event_out;
+} cudaGraphEventWaitNodeGetEvent_v11010_params;
+typedef struct cudaGraphEventWaitNodeSetEvent_v11010_params_st {
+    cudaGraphNode_t node;
+    cudaEvent_t event;
+} cudaGraphEventWaitNodeSetEvent_v11010_params;
+typedef struct cudaGraphAddExternalSemaphoresSignalNode_v11020_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
+} cudaGraphAddExternalSemaphoresSignalNode_v11020_params;
+typedef struct cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params_st {
+    cudaGraphNode_t hNode;
+    struct cudaExternalSemaphoreSignalNodeParams *params_out;
+} cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params;
+typedef struct cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params_st {
+    cudaGraphNode_t hNode;
+    const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
+} cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params;
+typedef struct cudaGraphAddExternalSemaphoresWaitNode_v11020_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
+} cudaGraphAddExternalSemaphoresWaitNode_v11020_params;
+typedef struct cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params_st {
+    cudaGraphNode_t hNode;
+    struct cudaExternalSemaphoreWaitNodeParams *params_out;
+} cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params;
+typedef struct cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params_st {
+    cudaGraphNode_t hNode;
+    const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
+} cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params;
+typedef struct cudaGraphAddMemAllocNode_v11040_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    struct cudaMemAllocNodeParams *nodeParams;
+} cudaGraphAddMemAllocNode_v11040_params;
+typedef struct cudaGraphMemAllocNodeGetParams_v11040_params_st {
+    cudaGraphNode_t node;
+    struct cudaMemAllocNodeParams *params_out;
+} cudaGraphMemAllocNodeGetParams_v11040_params;
+typedef struct cudaGraphAddMemFreeNode_v11040_params_st {
+    cudaGraphNode_t *pGraphNode;
+    cudaGraph_t graph;
+    const cudaGraphNode_t *pDependencies;
+    size_t numDependencies;
+    void *dptr;
+} cudaGraphAddMemFreeNode_v11040_params;
+typedef struct cudaGraphMemFreeNodeGetParams_v11040_params_st {
+    cudaGraphNode_t node;
+    void *dptr_out;
+} cudaGraphMemFreeNodeGetParams_v11040_params;
+typedef struct cudaDeviceGraphMemTrim_v11040_params_st {
+    int device;
+} cudaDeviceGraphMemTrim_v11040_params;
+typedef struct cudaDeviceGetGraphMemAttribute_v11040_params_st {
+    int device;
+    enum cudaGraphMemAttributeType attr;
+    void *value;
+} cudaDeviceGetGraphMemAttribute_v11040_params;
+typedef struct cudaDeviceSetGraphMemAttribute_v11040_params_st {
+    int device;
+    enum cudaGraphMemAttributeType attr;
+    void *value;
+} cudaDeviceSetGraphMemAttribute_v11040_params;
+typedef struct cudaGraphClone_v10000_params_st {
+    cudaGraph_t *pGraphClone;
+    cudaGraph_t originalGraph;
+} cudaGraphClone_v10000_params;
+typedef struct cudaGraphNodeFindInClone_v10000_params_st {
+    cudaGraphNode_t *pNode;
+    cudaGraphNode_t originalNode;
+    cudaGraph_t clonedGraph;
+} cudaGraphNodeFindInClone_v10000_params;
+typedef struct cudaGraphNodeGetType_v10000_params_st {
+    cudaGraphNode_t node;
+    enum cudaGraphNodeType *pType;
+} cudaGraphNodeGetType_v10000_params;
+typedef struct cudaGraphGetNodes_v10000_params_st {
+    cudaGraph_t graph;
+    cudaGraphNode_t *nodes;
+    size_t *numNodes;
+} cudaGraphGetNodes_v10000_params;
+typedef struct cudaGraphGetRootNodes_v10000_params_st {
+    cudaGraph_t graph;
+    cudaGraphNode_t *pRootNodes;
+    size_t *pNumRootNodes;
+} cudaGraphGetRootNodes_v10000_params;
+typedef struct cudaGraphGetEdges_v10000_params_st {
+    cudaGraph_t graph;
+    cudaGraphNode_t *from;
+    cudaGraphNode_t *to;
+    size_t *numEdges;
+} cudaGraphGetEdges_v10000_params;
+typedef struct cudaGraphNodeGetDependencies_v10000_params_st {
+    cudaGraphNode_t node;
+    cudaGraphNode_t *pDependencies;
+    size_t *pNumDependencies;
+} cudaGraphNodeGetDependencies_v10000_params;
+typedef struct cudaGraphNodeGetDependentNodes_v10000_params_st {
+    cudaGraphNode_t node;
+    cudaGraphNode_t *pDependentNodes;
+    size_t *pNumDependentNodes;
+} cudaGraphNodeGetDependentNodes_v10000_params;
+typedef struct cudaGraphAddDependencies_v10000_params_st {
+    cudaGraph_t graph;
+    const cudaGraphNode_t *from;
+    const cudaGraphNode_t *to;
+    size_t numDependencies;
+} cudaGraphAddDependencies_v10000_params;
+typedef struct cudaGraphRemoveDependencies_v10000_params_st {
+    cudaGraph_t graph;
+    const cudaGraphNode_t *from;
+    const cudaGraphNode_t *to;
+    size_t numDependencies;
+} cudaGraphRemoveDependencies_v10000_params;
+typedef struct cudaGraphDestroyNode_v10000_params_st {
+    cudaGraphNode_t node;
+} cudaGraphDestroyNode_v10000_params;
+typedef struct cudaGraphInstantiate_v10000_params_st {
+    cudaGraphExec_t *pGraphExec;
+    cudaGraph_t graph;
+    cudaGraphNode_t *pErrorNode;
+    char *pLogBuffer;
+    size_t bufferSize;
+} cudaGraphInstantiate_v10000_params;
+typedef struct cudaGraphInstantiateWithFlags_v11040_params_st {
+    cudaGraphExec_t *pGraphExec;
+    cudaGraph_t graph;
+    unsigned long long flags;
+} cudaGraphInstantiateWithFlags_v11040_params;
+typedef struct cudaGraphExecKernelNodeSetParams_v10010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    const struct cudaKernelNodeParams *pNodeParams;
+} cudaGraphExecKernelNodeSetParams_v10010_params;
+typedef struct cudaGraphExecMemcpyNodeSetParams_v10020_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    const struct cudaMemcpy3DParms *pNodeParams;
+} cudaGraphExecMemcpyNodeSetParams_v10020_params;
+typedef struct cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params;
+typedef struct cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params;
+typedef struct cudaGraphExecMemcpyNodeSetParams1D_v11010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaGraphExecMemcpyNodeSetParams1D_v11010_params;
+typedef struct cudaGraphExecMemsetNodeSetParams_v10020_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    const struct cudaMemsetParams *pNodeParams;
+} cudaGraphExecMemsetNodeSetParams_v10020_params;
+typedef struct cudaGraphExecHostNodeSetParams_v10020_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    const struct cudaHostNodeParams *pNodeParams;
+} cudaGraphExecHostNodeSetParams_v10020_params;
+typedef struct cudaGraphExecChildGraphNodeSetParams_v11010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t node;
+    cudaGraph_t childGraph;
+} cudaGraphExecChildGraphNodeSetParams_v11010_params;
+typedef struct cudaGraphExecEventRecordNodeSetEvent_v11010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t hNode;
+    cudaEvent_t event;
+} cudaGraphExecEventRecordNodeSetEvent_v11010_params;
+typedef struct cudaGraphExecEventWaitNodeSetEvent_v11010_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t hNode;
+    cudaEvent_t event;
+} cudaGraphExecEventWaitNodeSetEvent_v11010_params;
+typedef struct cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t hNode;
+    const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
+} cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params;
+typedef struct cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t hNode;
+    const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
+} cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params;
+typedef struct cudaGraphNodeSetEnabled_v11060_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t hNode;
+    unsigned int isEnabled;
+} cudaGraphNodeSetEnabled_v11060_params;
+typedef struct cudaGraphNodeGetEnabled_v11060_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraphNode_t hNode;
+    unsigned int *isEnabled;
+} cudaGraphNodeGetEnabled_v11060_params;
+typedef struct cudaGraphExecUpdate_v10020_params_st {
+    cudaGraphExec_t hGraphExec;
+    cudaGraph_t hGraph;
+    cudaGraphNode_t *hErrorNode_out;
+    enum cudaGraphExecUpdateResult *updateResult_out;
+} cudaGraphExecUpdate_v10020_params;
+typedef struct cudaGraphUpload_ptsz_v10000_params_st {
+    cudaGraphExec_t graphExec;
+    cudaStream_t stream;
+} cudaGraphUpload_ptsz_v10000_params;
+typedef struct cudaGraphLaunch_ptsz_v10000_params_st {
+    cudaGraphExec_t graphExec;
+    cudaStream_t stream;
+} cudaGraphLaunch_ptsz_v10000_params;
+typedef struct cudaGraphExecDestroy_v10000_params_st {
+    cudaGraphExec_t graphExec;
+} cudaGraphExecDestroy_v10000_params;
+typedef struct cudaGraphDestroy_v10000_params_st {
+    cudaGraph_t graph;
+} cudaGraphDestroy_v10000_params;
+typedef struct cudaGraphDebugDotPrint_v11030_params_st {
+    cudaGraph_t graph;
+    const char *path;
+    unsigned int flags;
+} cudaGraphDebugDotPrint_v11030_params;
+typedef struct cudaUserObjectCreate_v11030_params_st {
+    cudaUserObject_t *object_out;
+    void *ptr;
+    cudaHostFn_t destroy;
+    unsigned int initialRefcount;
+    unsigned int flags;
+} cudaUserObjectCreate_v11030_params;
+typedef struct cudaUserObjectRetain_v11030_params_st {
+    cudaUserObject_t object;
+    unsigned int count;
+} cudaUserObjectRetain_v11030_params;
+typedef struct cudaUserObjectRelease_v11030_params_st {
+    cudaUserObject_t object;
+    unsigned int count;
+} cudaUserObjectRelease_v11030_params;
+typedef struct cudaGraphRetainUserObject_v11030_params_st {
+    cudaGraph_t graph;
+    cudaUserObject_t object;
+    unsigned int count;
+    unsigned int flags;
+} cudaGraphRetainUserObject_v11030_params;
+typedef struct cudaGraphReleaseUserObject_v11030_params_st {
+    cudaGraph_t graph;
+    cudaUserObject_t object;
+    unsigned int count;
+} cudaGraphReleaseUserObject_v11030_params;
+typedef struct cudaGetDriverEntryPoint_ptsz_v11030_params_st {
+    const char *symbol;
+    void **funcPtr;
+    unsigned long long flags;
+} cudaGetDriverEntryPoint_ptsz_v11030_params;
+typedef struct cudaGetFuncBySymbol_v11000_params_st {
+    cudaFunction_t *functionPtr;
+    const void *symbolPtr;
+} cudaGetFuncBySymbol_v11000_params;
+typedef struct cudaMemcpy_v3020_params_st {
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy_v3020_params;
+typedef struct cudaMemcpyToSymbol_v3020_params_st {
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyToSymbol_v3020_params;
+typedef struct cudaMemcpyFromSymbol_v3020_params_st {
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyFromSymbol_v3020_params;
+typedef struct cudaMemcpy2D_v3020_params_st {
+    void *dst;
+    size_t dpitch;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2D_v3020_params;
+typedef struct cudaMemcpyToArray_v3020_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyToArray_v3020_params;
+typedef struct cudaMemcpy2DToArray_v3020_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2DToArray_v3020_params;
+typedef struct cudaMemcpyFromArray_v3020_params_st {
+    void *dst;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyFromArray_v3020_params;
+typedef struct cudaMemcpy2DFromArray_v3020_params_st {
+    void *dst;
+    size_t dpitch;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2DFromArray_v3020_params;
+typedef struct cudaMemcpyArrayToArray_v3020_params_st {
+    cudaArray_t dst;
+    size_t wOffsetDst;
+    size_t hOffsetDst;
+    cudaArray_const_t src;
+    size_t wOffsetSrc;
+    size_t hOffsetSrc;
+    size_t count;
+    enum cudaMemcpyKind kind;
+} cudaMemcpyArrayToArray_v3020_params;
+typedef struct cudaMemcpy2DArrayToArray_v3020_params_st {
+    cudaArray_t dst;
+    size_t wOffsetDst;
+    size_t hOffsetDst;
+    cudaArray_const_t src;
+    size_t wOffsetSrc;
+    size_t hOffsetSrc;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+} cudaMemcpy2DArrayToArray_v3020_params;
+typedef struct cudaMemcpy3D_v3020_params_st {
+    const struct cudaMemcpy3DParms *p;
+} cudaMemcpy3D_v3020_params;
+typedef struct cudaMemcpy3DPeer_v4000_params_st {
+    const struct cudaMemcpy3DPeerParms *p;
+} cudaMemcpy3DPeer_v4000_params;
+typedef struct cudaMemset_v3020_params_st {
+    void *devPtr;
+    int value;
+    size_t count;
+} cudaMemset_v3020_params;
+typedef struct cudaMemset2D_v3020_params_st {
+    void *devPtr;
+    size_t pitch;
+    int value;
+    size_t width;
+    size_t height;
+} cudaMemset2D_v3020_params;
+typedef struct cudaMemset3D_v3020_params_st {
+    struct cudaPitchedPtr pitchedDevPtr;
+    int value;
+    struct cudaExtent extent;
+} cudaMemset3D_v3020_params;
+typedef struct cudaMemcpyAsync_v3020_params_st {
+    void *dst;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyAsync_v3020_params;
+typedef struct cudaMemcpyToSymbolAsync_v3020_params_st {
+    const void *symbol;
+    const void *src;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyToSymbolAsync_v3020_params;
+typedef struct cudaMemcpyFromSymbolAsync_v3020_params_st {
+    void *dst;
+    const void *symbol;
+    size_t count;
+    size_t offset;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyFromSymbolAsync_v3020_params;
+typedef struct cudaMemcpy2DAsync_v3020_params_st {
+    void *dst;
+    size_t dpitch;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpy2DAsync_v3020_params;
+typedef struct cudaMemcpyToArrayAsync_v3020_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t count;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyToArrayAsync_v3020_params;
+typedef struct cudaMemcpy2DToArrayAsync_v3020_params_st {
+    cudaArray_t dst;
+    size_t wOffset;
+    size_t hOffset;
+    const void *src;
+    size_t spitch;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpy2DToArrayAsync_v3020_params;
+typedef struct cudaMemcpyFromArrayAsync_v3020_params_st {
+    void *dst;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t count;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpyFromArrayAsync_v3020_params;
+typedef struct cudaMemcpy2DFromArrayAsync_v3020_params_st {
+    void *dst;
+    size_t dpitch;
+    cudaArray_const_t src;
+    size_t wOffset;
+    size_t hOffset;
+    size_t width;
+    size_t height;
+    enum cudaMemcpyKind kind;
+    cudaStream_t stream;
+} cudaMemcpy2DFromArrayAsync_v3020_params;
+typedef struct cudaMemcpy3DAsync_v3020_params_st {
+    const struct cudaMemcpy3DParms *p;
+    cudaStream_t stream;
+} cudaMemcpy3DAsync_v3020_params;
+typedef struct cudaMemcpy3DPeerAsync_v4000_params_st {
+    const struct cudaMemcpy3DPeerParms *p;
+    cudaStream_t stream;
+} cudaMemcpy3DPeerAsync_v4000_params;
+typedef struct cudaMemsetAsync_v3020_params_st {
+    void *devPtr;
+    int value;
+    size_t count;
+    cudaStream_t stream;
+} cudaMemsetAsync_v3020_params;
+typedef struct cudaMemset2DAsync_v3020_params_st {
+    void *devPtr;
+    size_t pitch;
+    int value;
+    size_t width;
+    size_t height;
+    cudaStream_t stream;
+} cudaMemset2DAsync_v3020_params;
+typedef struct cudaMemset3DAsync_v3020_params_st {
+    struct cudaPitchedPtr pitchedDevPtr;
+    int value;
+    struct cudaExtent extent;
+    cudaStream_t stream;
+} cudaMemset3DAsync_v3020_params;
+typedef struct cudaStreamQuery_v3020_params_st {
+    cudaStream_t stream;
+} cudaStreamQuery_v3020_params;
+typedef struct cudaStreamGetFlags_v5050_params_st {
+    cudaStream_t hStream;
+    unsigned int *flags;
+} cudaStreamGetFlags_v5050_params;
+typedef struct cudaStreamGetPriority_v5050_params_st {
+    cudaStream_t hStream;
+    int *priority;
+} cudaStreamGetPriority_v5050_params;
+typedef struct cudaEventRecord_v3020_params_st {
+    cudaEvent_t event;
+    cudaStream_t stream;
+} cudaEventRecord_v3020_params;
+typedef struct cudaEventRecordWithFlags_v11010_params_st {
+    cudaEvent_t event;
+    cudaStream_t stream;
+    unsigned int flags;
+} cudaEventRecordWithFlags_v11010_params;
+typedef struct cudaStreamWaitEvent_v3020_params_st {
+    cudaStream_t stream;
+    cudaEvent_t event;
+    unsigned int flags;
+} cudaStreamWaitEvent_v3020_params;
+typedef struct cudaStreamAddCallback_v5000_params_st {
+    cudaStream_t stream;
+    cudaStreamCallback_t callback;
+    void *userData;
+    unsigned int flags;
+} cudaStreamAddCallback_v5000_params;
+typedef struct cudaStreamAttachMemAsync_v6000_params_st {
+    cudaStream_t stream;
+    void *devPtr;
+    size_t length;
+    unsigned int flags;
+} cudaStreamAttachMemAsync_v6000_params;
+typedef struct cudaStreamSynchronize_v3020_params_st {
+    cudaStream_t stream;
+} cudaStreamSynchronize_v3020_params;
+typedef struct cudaLaunchKernel_v7000_params_st {
+    const void *func;
+    dim3 gridDim;
+    dim3 blockDim;
+    void **args;
+    size_t sharedMem;
+    cudaStream_t stream;
+} cudaLaunchKernel_v7000_params;
+typedef struct cudaLaunchKernelExC_v11060_params_st {
+    const cudaLaunchConfig_t *config;
+    const void *func;
+    void **args;
+} cudaLaunchKernelExC_v11060_params;
+typedef struct cudaLaunchCooperativeKernel_v9000_params_st {
+    const void *func;
+    dim3 gridDim;
+    dim3 blockDim;
+    void **args;
+    size_t sharedMem;
+    cudaStream_t stream;
+} cudaLaunchCooperativeKernel_v9000_params;
+typedef struct cudaLaunchHostFunc_v10000_params_st {
+    cudaStream_t stream;
+    cudaHostFn_t fn;
+    void *userData;
+} cudaLaunchHostFunc_v10000_params;
+typedef struct cudaMemPrefetchAsync_v8000_params_st {
+    const void *devPtr;
+    size_t count;
+    int dstDevice;
+    cudaStream_t stream;
+} cudaMemPrefetchAsync_v8000_params;
+typedef struct cudaSignalExternalSemaphoresAsync_v10000_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaSignalExternalSemaphoresAsync_v10000_params;
+typedef struct cudaSignalExternalSemaphoresAsync_ptsz_v10000_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaSignalExternalSemaphoresAsync_ptsz_v10000_params;
+typedef struct cudaSignalExternalSemaphoresAsync_v2_v11020_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreSignalParams *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaSignalExternalSemaphoresAsync_v2_v11020_params;
+typedef struct cudaWaitExternalSemaphoresAsync_v10000_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaWaitExternalSemaphoresAsync_v10000_params;
+typedef struct cudaWaitExternalSemaphoresAsync_ptsz_v10000_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaWaitExternalSemaphoresAsync_ptsz_v10000_params;
+typedef struct cudaWaitExternalSemaphoresAsync_v2_v11020_params_st {
+    const cudaExternalSemaphore_t *extSemArray;
+    const struct cudaExternalSemaphoreWaitParams *paramsArray;
+    unsigned int numExtSems;
+    cudaStream_t stream;
+} cudaWaitExternalSemaphoresAsync_v2_v11020_params;
+typedef struct cudaGraphUpload_v10000_params_st {
+    cudaGraphExec_t graphExec;
+    cudaStream_t stream;
+} cudaGraphUpload_v10000_params;
+typedef struct cudaGraphLaunch_v10000_params_st {
+    cudaGraphExec_t graphExec;
+    cudaStream_t stream;
+} cudaGraphLaunch_v10000_params;
+typedef struct cudaStreamBeginCapture_v10000_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureMode mode;
+} cudaStreamBeginCapture_v10000_params;
+typedef struct cudaStreamEndCapture_v10000_params_st {
+    cudaStream_t stream;
+    cudaGraph_t *pGraph;
+} cudaStreamEndCapture_v10000_params;
+typedef struct cudaStreamIsCapturing_v10000_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureStatus *pCaptureStatus;
+} cudaStreamIsCapturing_v10000_params;
+typedef struct cudaStreamGetCaptureInfo_v10010_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureStatus *captureStatus_out;
+    unsigned long long *id_out;
+} cudaStreamGetCaptureInfo_v10010_params;
+typedef struct cudaStreamGetCaptureInfo_v2_v11030_params_st {
+    cudaStream_t stream;
+    enum cudaStreamCaptureStatus *captureStatus_out;
+    unsigned long long *id_out;
+    cudaGraph_t *graph_out;
+    const cudaGraphNode_t **dependencies_out;
+    size_t *numDependencies_out;
+} cudaStreamGetCaptureInfo_v2_v11030_params;
+typedef struct cudaStreamUpdateCaptureDependencies_ptsz_v11030_params_st {
+    cudaStream_t stream;
+    cudaGraphNode_t *dependencies;
+    size_t numDependencies;
+    unsigned int flags;
+} cudaStreamUpdateCaptureDependencies_ptsz_v11030_params;
+typedef struct cudaStreamCopyAttributes_v11000_params_st {
+    cudaStream_t dstStream;
+    cudaStream_t srcStream;
+} cudaStreamCopyAttributes_v11000_params;
+typedef struct cudaStreamGetAttribute_v11000_params_st {
+    cudaStream_t stream;
+    cudaStreamAttrID attr;
+    cudaStreamAttrValue *value;
+} cudaStreamGetAttribute_v11000_params;
+typedef struct cudaStreamSetAttribute_v11000_params_st {
+    cudaStream_t stream;
+    cudaStreamAttrID attr;
+    const cudaStreamAttrValue *param;
+} cudaStreamSetAttribute_v11000_params;
+typedef struct cudaMallocAsync_v11020_params_st {
+    void **devPtr;
+    size_t size;
+    cudaStream_t hStream;
+} cudaMallocAsync_v11020_params;
+typedef struct cudaFreeAsync_v11020_params_st {
+    void *devPtr;
+    cudaStream_t hStream;
+} cudaFreeAsync_v11020_params;
+typedef struct cudaMallocFromPoolAsync_v11020_params_st {
+    void **ptr;
+    size_t size;
+    cudaMemPool_t memPool;
+    cudaStream_t stream;
+} cudaMallocFromPoolAsync_v11020_params;
+typedef struct cudaGetDriverEntryPoint_v11030_params_st {
+    const char *symbol;
+    void **funcPtr;
+    unsigned long long flags;
+} cudaGetDriverEntryPoint_v11030_params;
+// Parameter trace structures for removed functions
+// End of parameter trace structures

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h ADDED Viewed

	@@ -0,0 +1,38 @@

+// This file is generated.  Any changes you make will be lost during the next clean build.
+// CUDA public interface, for type definitions and api function prototypes
+#include "cuda_vdpau_interop.h"
+// *************************************************************************
+//      Definitions of structs to hold parameters for each function
+// *************************************************************************
+// Currently used parameter trace structures
+typedef struct cudaVDPAUGetDevice_v3020_params_st {
+    int *device;
+    VdpDevice vdpDevice;
+    VdpGetProcAddress *vdpGetProcAddress;
+} cudaVDPAUGetDevice_v3020_params;
+typedef struct cudaVDPAUSetVDPAUDevice_v3020_params_st {
+    int device;
+    VdpDevice vdpDevice;
+    VdpGetProcAddress *vdpGetProcAddress;
+} cudaVDPAUSetVDPAUDevice_v3020_params;
+typedef struct cudaGraphicsVDPAURegisterVideoSurface_v3020_params_st {
+    struct cudaGraphicsResource **resource;
+    VdpVideoSurface vdpSurface;
+    unsigned int flags;
+} cudaGraphicsVDPAURegisterVideoSurface_v3020_params;
+typedef struct cudaGraphicsVDPAURegisterOutputSurface_v3020_params_st {
+    struct cudaGraphicsResource **resource;
+    VdpOutputSurface vdpSurface;
+    unsigned int flags;
+} cudaGraphicsVDPAURegisterOutputSurface_v3020_params;
+// Parameter trace structures for removed functions
+// End of parameter trace structures

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer.h ADDED Viewed

	@@ -0,0 +1,571 @@

+/*
+ * Copyright 2017-2022 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+/*
+ *  cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions.
+ */
+#if !defined(CUDNN_CNN_INFER_H_)
+#define CUDNN_CNN_INFER_H_
+#pragma once
+#include <cuda_runtime.h>
+#include <stdint.h>
+#include "cudnn_version.h"
+#include "cudnn_ops_infer.h"
+/* These version numbers are autogenerated, do not edit manually. */
+#define CUDNN_CNN_INFER_MAJOR 8
+#define CUDNN_CNN_INFER_MINOR 7
+#define CUDNN_CNN_INFER_PATCH 0
+#if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) || \
+    (CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL)
+#error Version mismatch in cuDNN CNN INFER!!!
+#endif
+#if defined(__cplusplus)
+extern "C" {
+#endif
+typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t;
+/*
+ *  convolution mode
+ */
+typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
+/*
+ * CUDNN Reorder
+ */
+typedef enum {
+    CUDNN_DEFAULT_REORDER = 0,
+    CUDNN_NO_REORDER      = 1,
+} cudnnReorderType_t;
+typedef struct cudnnConvolutionFwdAlgoPerfStruct {
+    cudnnConvolutionFwdAlgo_t algo;
+    cudnnStatus_t status;
+    float time;
+    size_t memory;
+    cudnnDeterminism_t determinism;
+    cudnnMathType_t mathType;
+    int reserved[3];
+} cudnnConvolutionFwdAlgoPerf_t;
+/* Create an instance of convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
+/* Destroy an instance of convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
+                                int pad_h,      /* zero-padding height */
+                                int pad_w,      /* zero-padding width */
+                                int u,          /* vertical filter stride */
+                                int v,          /* horizontal filter stride */
+                                int dilation_h, /* filter dilation in the vertical dimension */
+                                int dilation_w, /* filter dilation in the horizontal dimension */
+                                cudnnConvolutionMode_t mode,
+                                cudnnDataType_t computeType);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
+                                int *pad_h,      /* zero-padding height */
+                                int *pad_w,      /* zero-padding width */
+                                int *u,          /* vertical filter stride */
+                                int *v,          /* horizontal filter stride */
+                                int *dilation_h, /* filter dilation in the vertical dimension */
+                                int *dilation_w, /* filter dilation in the horizontal dimension */
+                                cudnnConvolutionMode_t *mode,
+                                cudnnDataType_t *computeType);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
+                                int arrayLength, /* nbDims-2 size */
+                                const int padA[],
+                                const int filterStrideA[],
+                                const int dilationA[],
+                                cudnnConvolutionMode_t mode,
+                                cudnnDataType_t computeType); /* convolution data type */
+/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
+                                int arrayLengthRequested,
+                                int *arrayLength,
+                                int padA[],
+                                int strideA[],
+                                int dilationA[],
+                                cudnnConvolutionMode_t *mode,
+                                cudnnDataType_t *computeType); /* convolution data type */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
+                                      const cudnnTensorDescriptor_t inputTensorDesc,
+                                      const cudnnFilterDescriptor_t filterDesc,
+                                      int *n,
+                                      int *c,
+                                      int *h,
+                                      int *w);
+/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
+                                      const cudnnTensorDescriptor_t inputTensorDesc,
+                                      const cudnnFilterDescriptor_t filterDesc,
+                                      int nbDims,
+                                      int tensorOuputDimA[]);
+/* helper function to provide the convolution forward algo that fit best the requirement */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
+                                       const cudnnTensorDescriptor_t srcDesc,
+                                       const cudnnFilterDescriptor_t filterDesc,
+                                       const cudnnConvolutionDescriptor_t convDesc,
+                                       const cudnnTensorDescriptor_t destDesc,
+                                       const int requestedAlgoCount,
+                                       int *returnedAlgoCount,
+                                       cudnnConvolutionFwdAlgoPerf_t *perfResults);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
+                                     const cudnnTensorDescriptor_t xDesc,
+                                     const cudnnFilterDescriptor_t wDesc,
+                                     const cudnnConvolutionDescriptor_t convDesc,
+                                     const cudnnTensorDescriptor_t yDesc,
+                                     const int requestedAlgoCount,
+                                     int *returnedAlgoCount,
+                                     cudnnConvolutionFwdAlgoPerf_t *perfResults);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
+                                       const cudnnTensorDescriptor_t xDesc,
+                                       const void *x,
+                                       const cudnnFilterDescriptor_t wDesc,
+                                       const void *w,
+                                       const cudnnConvolutionDescriptor_t convDesc,
+                                       const cudnnTensorDescriptor_t yDesc,
+                                       void *y,
+                                       const int requestedAlgoCount,
+                                       int *returnedAlgoCount,
+                                       cudnnConvolutionFwdAlgoPerf_t *perfResults,
+                                       void *workSpace,
+                                       size_t workSpaceSizeInBytes);
+cudnnStatus_t CUDNNWINAPI
+cudnnIm2Col(cudnnHandle_t handle,
+            const cudnnTensorDescriptor_t xDesc,
+            const void *x,
+            const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc,
+            void *colBuffer);
+cudnnStatus_t CUDNNWINAPI
+cudnnReorderFilterAndBias(cudnnHandle_t handle,
+                          const cudnnFilterDescriptor_t filterDesc,
+                          cudnnReorderType_t reorderType,
+                          const void *filterData,
+                          void *reorderedFilterData,
+                          int reorderBias,
+                          const void *biasData,
+                          void *reorderedBiasData);
+/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
+                                        const cudnnTensorDescriptor_t xDesc,
+                                        const cudnnFilterDescriptor_t wDesc,
+                                        const cudnnConvolutionDescriptor_t convDesc,
+                                        const cudnnTensorDescriptor_t yDesc,
+                                        cudnnConvolutionFwdAlgo_t algo,
+                                        size_t *sizeInBytes);
+/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
+/* Function to perform the forward pass for batch convolution */
+cudnnStatus_t CUDNNWINAPI
+cudnnConvolutionForward(cudnnHandle_t handle,
+                        const void *alpha,
+                        const cudnnTensorDescriptor_t xDesc,
+                        const void *x,
+                        const cudnnFilterDescriptor_t wDesc,
+                        const void *w,
+                        const cudnnConvolutionDescriptor_t convDesc,
+                        cudnnConvolutionFwdAlgo_t algo,
+                        void *workSpace,
+                        size_t workSpaceSizeInBytes,
+                        const void *beta,
+                        const cudnnTensorDescriptor_t yDesc,
+                        void *y);
+/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
+cudnnStatus_t CUDNNWINAPI
+cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
+                                      const void *alpha1,
+                                      const cudnnTensorDescriptor_t xDesc,
+                                      const void *x,
+                                      const cudnnFilterDescriptor_t wDesc,
+                                      const void *w,
+                                      const cudnnConvolutionDescriptor_t convDesc,
+                                      cudnnConvolutionFwdAlgo_t algo,
+                                      void *workSpace,
+                                      size_t workSpaceSizeInBytes,
+                                      const void *alpha2,
+                                      const cudnnTensorDescriptor_t zDesc,
+                                      const void *z,
+                                      const cudnnTensorDescriptor_t biasDesc,
+                                      const void *bias,
+                                      const cudnnActivationDescriptor_t activationDesc,
+                                      const cudnnTensorDescriptor_t yDesc,
+                                      void *y);
+/* helper function to provide the convolution backward data algo that fit best the requirement */
+typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
+    cudnnConvolutionBwdDataAlgo_t algo;
+    cudnnStatus_t status;
+    float time;
+    size_t memory;
+    cudnnDeterminism_t determinism;
+    cudnnMathType_t mathType;
+    int reserved[3];
+} cudnnConvolutionBwdDataAlgoPerf_t;
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
+                                          const cudnnFilterDescriptor_t wDesc,
+                                          const cudnnTensorDescriptor_t dyDesc,
+                                          const cudnnConvolutionDescriptor_t convDesc,
+                                          const cudnnTensorDescriptor_t dxDesc,
+                                          const int requestedAlgoCount,
+                                          int *returnedAlgoCount,
+                                          cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
+cudnnStatus_t CUDNNWINAPI
+cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
+                                            const cudnnFilterDescriptor_t wDesc,
+                                            const void *w,
+                                            const cudnnTensorDescriptor_t dyDesc,
+                                            const void *dy,
+                                            const cudnnConvolutionDescriptor_t convDesc,
+                                            const cudnnTensorDescriptor_t dxDesc,
+                                            void *dx,
+                                            const int requestedAlgoCount,
+                                            int *returnedAlgoCount,
+                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
+                                            void *workSpace,
+                                            size_t workSpaceSizeInBytes);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
+                                            const cudnnFilterDescriptor_t filterDesc,
+                                            const cudnnTensorDescriptor_t diffDesc,
+                                            const cudnnConvolutionDescriptor_t convDesc,
+                                            const cudnnTensorDescriptor_t gradDesc,
+                                            const int requestedAlgoCount,
+                                            int *returnedAlgoCount,
+                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
+/*
+ *  convolution algorithm (which requires potentially some workspace)
+ */
+/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
+                                             const cudnnFilterDescriptor_t wDesc,
+                                             const cudnnTensorDescriptor_t dyDesc,
+                                             const cudnnConvolutionDescriptor_t convDesc,
+                                             const cudnnTensorDescriptor_t dxDesc,
+                                             cudnnConvolutionBwdDataAlgo_t algo,
+                                             size_t *sizeInBytes);
+cudnnStatus_t CUDNNWINAPI
+cudnnConvolutionBackwardData(cudnnHandle_t handle,
+                             const void *alpha,
+                             const cudnnFilterDescriptor_t wDesc,
+                             const void *w,
+                             const cudnnTensorDescriptor_t dyDesc,
+                             const void *dy,
+                             const cudnnConvolutionDescriptor_t convDesc,
+                             cudnnConvolutionBwdDataAlgo_t algo,
+                             void *workSpace,
+                             size_t workSpaceSizeInBytes,
+                             const void *beta,
+                             const cudnnTensorDescriptor_t dxDesc,
+                             void *dx);
+/* Helper function to calculate folding descriptors for dgrad */
+cudnnStatus_t CUDNNWINAPI
+cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
+                                          const cudnnFilterDescriptor_t filterDesc,
+                                          const cudnnTensorDescriptor_t diffDesc,
+                                          const cudnnConvolutionDescriptor_t convDesc,
+                                          const cudnnTensorDescriptor_t gradDesc,
+                                          const cudnnTensorFormat_t transformFormat,
+                                          cudnnFilterDescriptor_t foldedFilterDesc,
+                                          cudnnTensorDescriptor_t paddedDiffDesc,
+                                          cudnnConvolutionDescriptor_t foldedConvDesc,
+                                          cudnnTensorDescriptor_t foldedGradDesc,
+                                          cudnnTensorTransformDescriptor_t filterFoldTransDesc,
+                                          cudnnTensorTransformDescriptor_t diffPadTransDesc,
+                                          cudnnTensorTransformDescriptor_t gradFoldTransDesc,
+                                          cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
+/* cudnnFusedOps... */
+struct cudnnFusedOpsConstParamStruct;
+typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t;
+struct cudnnFusedOpsVariantParamStruct;
+typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t;
+struct cudnnFusedOpsPlanStruct;
+typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t;
+typedef enum {
+    /* each op in [ ] can be disabled by passing NULL ptr */
+    /* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
+    CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
+    /* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
+    CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
+    /* utility for BN training in BN-conv fusion */
+    /* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
+    /* optionally update running stats and generate saved stats */
+    CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
+    /* utility for BN inference in BN-conv fusion */
+    /* computes the equivalent scale and bias from learned running stats and learned scale, bias */
+    CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
+    /* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
+    CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
+    /* reserved for future use: [per channel scale], [per channel bias], [residual add],  activation, bitmask */
+    CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
+    /* reserved for future use */
+    CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
+} cudnnFusedOps_t;
+typedef enum {
+    /* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get XDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_XDESC = 0,
+    /* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
+    /* set/get BN_MODE: pass cudnnBatchNormMode_t* */
+    CUDNN_PARAM_BN_MODE = 2,
+    /* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
+    /* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
+    /* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
+    /* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
+    /* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
+    CUDNN_PARAM_ACTIVATION_DESC = 6,
+    /* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
+    /* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
+    CUDNN_PARAM_CONV_DESC = 7,
+    /* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
+    /* get WDESC: pass previously created cudnnFilterDescriptor_t */
+    CUDNN_PARAM_WDESC = 8,
+    /* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
+    /* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
+    /* get DWDESC: pass previously created cudnnFilterDescriptor_t */
+    CUDNN_PARAM_DWDESC = 10,
+    /* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
+    /* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get YDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_YDESC = 12,
+    /* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
+    /* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get DYDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_DYDESC = 14,
+    /* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
+    /* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_YSTATS_DESC = 16,
+    /* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
+    /* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
+    /* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
+    /* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
+    /* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
+    /* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
+    /* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
+    /* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
+    /* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
+    /* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get ZDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_ZDESC = 26,
+    /* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
+    /* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
+    /* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
+    /* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
+    /* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
+    /* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
+    /* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get DXDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_DXDESC = 33,
+    /* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
+    /* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
+    /* get DZDESC: pass previously created cudnnTensorDescriptor_t */
+    CUDNN_PARAM_DZDESC = 35,
+    /* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
+    /* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
+    /* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
+    CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
+} cudnnFusedOpsConstParamLabel_t;
+typedef enum {
+    CUDNN_PTR_NULL         = 0,
+    CUDNN_PTR_ELEM_ALIGNED = 1,
+    CUDNN_PTR_16B_ALIGNED  = 2,
+} cudnnFusedOpsPointerPlaceHolder_t;
+typedef enum {
+    /* set: pass void* pointing to dev memory */
+    /* get: pass void** pointing to host memory */
+    CUDNN_PTR_XDATA              = 0,
+    CUDNN_PTR_BN_EQSCALE         = 1,
+    CUDNN_PTR_BN_EQBIAS          = 2,
+    CUDNN_PTR_WDATA              = 3,
+    CUDNN_PTR_DWDATA             = 4,
+    CUDNN_PTR_YDATA              = 5,
+    CUDNN_PTR_DYDATA             = 6,
+    CUDNN_PTR_YSUM               = 7,
+    CUDNN_PTR_YSQSUM             = 8,
+    CUDNN_PTR_WORKSPACE          = 9,
+    CUDNN_PTR_BN_SCALE           = 10,
+    CUDNN_PTR_BN_BIAS            = 11,
+    CUDNN_PTR_BN_SAVED_MEAN      = 12,
+    CUDNN_PTR_BN_SAVED_INVSTD    = 13,
+    CUDNN_PTR_BN_RUNNING_MEAN    = 14,
+    CUDNN_PTR_BN_RUNNING_VAR     = 15,
+    CUDNN_PTR_ZDATA              = 16,
+    CUDNN_PTR_BN_Z_EQSCALE       = 17,
+    CUDNN_PTR_BN_Z_EQBIAS        = 18,
+    CUDNN_PTR_ACTIVATION_BITMASK = 19,
+    CUDNN_PTR_DXDATA             = 20,
+    CUDNN_PTR_DZDATA             = 21,
+    CUDNN_PTR_BN_DSCALE          = 22,
+    CUDNN_PTR_BN_DBIAS           = 23,
+    /* set/get: pass size_t* pointing to host memory */
+    CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
+    /* set/get: pass int64_t* pointing to host memory */
+    CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
+    /* set/get: pass double* pointing to host memory */
+    CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
+    /* set/get: pass double* pointing to host memory */
+    CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
+} cudnnFusedOpsVariantParamLabel_t;
+cudnnStatus_t CUDNNWINAPI
+cudnnCnnInferVersionCheck(void);
+#if defined(__cplusplus)
+}
+#endif
+#endif /* CUDNN_CNN_INFER_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (221 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftw.h ADDED Viewed

	@@ -0,0 +1,454 @@

+ /* Copyright 2005-2014 NVIDIA Corporation.  All rights reserved.
+  *
+  * NOTICE TO LICENSEE:
+  *
+  * The source code and/or documentation ("Licensed Deliverables") are
+  * subject to NVIDIA intellectual property rights under U.S. and
+  * international Copyright laws.
+  *
+  * The Licensed Deliverables contained herein are PROPRIETARY and
+  * CONFIDENTIAL to NVIDIA and are being provided under the terms and
+  * conditions of a form of NVIDIA software license agreement by and
+  * between NVIDIA and Licensee ("License Agreement") or electronically
+  * accepted by Licensee.  Notwithstanding any terms or conditions to
+  * the contrary in the License Agreement, reproduction or disclosure
+  * of the Licensed Deliverables to any third party without the express
+  * written consent of NVIDIA is prohibited.
+  *
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE
+  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+  * OF THESE LICENSED DELIVERABLES.
+  *
+  * U.S. Government End Users.  These Licensed Deliverables are a
+  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+  * 1995), consisting of "commercial computer software" and "commercial
+  * computer software documentation" as such terms are used in 48
+  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
+  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+  * U.S. Government End Users acquire the Licensed Deliverables with
+  * only those rights set forth herein.
+  *
+  * Any use of the Licensed Deliverables in individual and commercial
+  * software must include, in the user documentation and internal
+  * comments to the code, the above Disclaimer and U.S. Government End
+  * Users Notice.
+  */
+/*!
+* \file cufftw.h
+* \brief Public header file for the NVIDIA CUDA FFTW library (CUFFTW)
+*/
+#ifndef _CUFFTW_H_
+#define _CUFFTW_H_
+#include <stdio.h>
+#include "cufft.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+// transform direction
+#define FFTW_FORWARD -1
+#define FFTW_INVERSE  1
+#define FFTW_BACKWARD 1
+// Planner flags
+#define FFTW_ESTIMATE           0x01
+#define FFTW_MEASURE            0x02
+#define FFTW_PATIENT            0x03
+#define FFTW_EXHAUSTIVE         0x04
+#define FFTW_WISDOM_ONLY        0x05
+//Algorithm restriction flags
+#define FFTW_DESTROY_INPUT      0x08
+#define FFTW_PRESERVE_INPUT     0x0C
+#define FFTW_UNALIGNED          0x10
+// CUFFTW defines and supports the following data types
+// note if complex.h has been included we use the C99 complex types
+#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined (complex)
+  typedef double _Complex fftw_complex;
+  typedef float _Complex fftwf_complex;
+#else
+  typedef double fftw_complex[2];
+  typedef float fftwf_complex[2];
+#endif
+typedef void *fftw_plan;
+typedef void *fftwf_plan;
+typedef struct {
+    int n;
+    int is;
+    int os;
+} fftw_iodim;
+typedef fftw_iodim fftwf_iodim;
+typedef struct {
+    ptrdiff_t n;
+    ptrdiff_t is;
+    ptrdiff_t os;
+} fftw_iodim64;
+typedef fftw_iodim64 fftwf_iodim64;
+// CUFFTW defines and supports the following double precision APIs
+fftw_plan CUFFTAPI fftw_plan_dft_1d(int n,
+                                    fftw_complex *in,
+                                    fftw_complex *out,
+                                    int sign,
+                                    unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_2d(int n0,
+                                    int n1,
+                                    fftw_complex *in,
+                                    fftw_complex *out,
+                                    int sign,
+                                    unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_3d(int n0,
+                                    int n1,
+                                    int n2,
+                                    fftw_complex *in,
+                                    fftw_complex *out,
+                                    int sign,
+                                    unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft(int rank,
+                                 const int *n,
+                                 fftw_complex *in,
+                                 fftw_complex *out,
+                                 int sign,
+                                 unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_r2c_1d(int n,
+                                        double *in,
+                                        fftw_complex *out,
+                                        unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_r2c_2d(int n0,
+                                        int n1,
+                                        double *in,
+                                        fftw_complex *out,
+                                        unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_r2c_3d(int n0,
+                                        int n1,
+                                        int n2,
+                                        double *in,
+                                        fftw_complex *out,
+                                        unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_r2c(int rank,
+                                     const int *n,
+                                     double *in,
+                                     fftw_complex *out,
+                                     unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_c2r_1d(int n,
+                                        fftw_complex *in,
+                                        double *out,
+                                        unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_c2r_2d(int n0,
+                                        int n1,
+                                        fftw_complex *in,
+                                        double *out,
+                                        unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_c2r_3d(int n0,
+                                        int n1,
+                                        int n2,
+                                        fftw_complex *in,
+                                        double *out,
+                                        unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_dft_c2r(int rank,
+                                     const int *n,
+                                     fftw_complex *in,
+                                     double *out,
+                                     unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_many_dft(int rank,
+                                      const int *n,
+                                      int batch,
+                                      fftw_complex *in,
+                                      const int *inembed, int istride, int idist,
+                                      fftw_complex *out,
+                                      const int *onembed, int ostride, int odist,
+                                      int sign, unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_many_dft_r2c(int rank,
+                                          const int *n,
+                                          int batch,
+                                          double *in,
+                                          const int *inembed, int istride, int idist,
+                                          fftw_complex *out,
+                                          const int *onembed, int ostride, int odist,
+                                          unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_many_dft_c2r(int rank,
+                                          const int *n,
+                                          int batch,
+                                          fftw_complex *in,
+                                          const int *inembed, int istride, int idist,
+                                          double *out,
+                                          const int *onembed, int ostride, int odist,
+                                          unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_guru_dft(int rank, const fftw_iodim *dims,
+                                      int batch_rank, const fftw_iodim *batch_dims,
+                                      fftw_complex *in, fftw_complex *out,
+                                      int sign, unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_guru_dft_r2c(int rank, const fftw_iodim *dims,
+                                          int batch_rank, const fftw_iodim *batch_dims,
+                                          double *in, fftw_complex *out,
+                                          unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_guru_dft_c2r(int rank, const fftw_iodim *dims,
+                                          int batch_rank, const fftw_iodim *batch_dims,
+                                          fftw_complex *in, double *out,
+                                          unsigned flags);
+void CUFFTAPI fftw_execute(const fftw_plan plan);
+void CUFFTAPI fftw_execute_dft(const fftw_plan plan,
+                               fftw_complex *idata,
+                               fftw_complex *odata);
+void CUFFTAPI fftw_execute_dft_r2c(const fftw_plan plan,
+                                   double *idata,
+                                   fftw_complex *odata);
+void CUFFTAPI fftw_execute_dft_c2r(const fftw_plan plan,
+                                   fftw_complex *idata,
+                                   double *odata);
+// CUFFTW defines and supports the following single precision APIs
+fftwf_plan CUFFTAPI fftwf_plan_dft_1d(int n,
+                                      fftwf_complex *in,
+                                      fftwf_complex *out,
+                                      int sign,
+                                      unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_2d(int n0,
+                                      int n1,
+                                      fftwf_complex *in,
+                                      fftwf_complex *out,
+                                      int sign,
+                                      unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_3d(int n0,
+                                      int n1,
+                                      int n2,
+                                      fftwf_complex *in,
+                                      fftwf_complex *out,
+                                      int sign,
+                                      unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft(int rank,
+                                   const int *n,
+                                   fftwf_complex *in,
+                                   fftwf_complex *out,
+                                   int sign,
+                                   unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_1d(int n,
+                                          float *in,
+                                          fftwf_complex *out,
+                                          unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_2d(int n0,
+                                          int n1,
+                                          float *in,
+                                          fftwf_complex *out,
+                                          unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_3d(int n0,
+                                          int n1,
+                                          int n2,
+                                          float *in,
+                                          fftwf_complex *out,
+                                          unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_r2c(int rank,
+                                       const int *n,
+                                       float *in,
+                                       fftwf_complex *out,
+                                       unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_1d(int n,
+                                          fftwf_complex *in,
+                                          float *out,
+                                          unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_2d(int n0,
+                                          int n1,
+                                          fftwf_complex *in,
+                                          float *out,
+                                          unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_3d(int n0,
+                                        int n1,
+                                        int n2,
+                                        fftwf_complex *in,
+                                        float *out,
+                                        unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_dft_c2r(int rank,
+                                       const int *n,
+                                       fftwf_complex *in,
+                                       float *out,
+                                       unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_many_dft(int rank,
+                                        const int *n,
+                                        int batch,
+                                        fftwf_complex *in,
+                                        const int *inembed, int istride, int idist,
+                                        fftwf_complex *out,
+                                        const int *onembed, int ostride, int odist,
+                                        int sign, unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_many_dft_r2c(int rank,
+                                            const int *n,
+                                            int batch,
+                                            float *in,
+                                            const int *inembed, int istride, int idist,
+                                            fftwf_complex *out,
+                                            const int *onembed, int ostride, int odist,
+                                            unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_many_dft_c2r(int rank,
+                                            const int *n,
+                                            int batch,
+                                            fftwf_complex *in,
+                                            const int *inembed, int istride, int idist,
+                                            float *out,
+                                            const int *onembed, int ostride, int odist,
+                                            unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_guru_dft(int rank, const fftwf_iodim *dims,
+                                        int batch_rank, const fftwf_iodim *batch_dims,
+                                        fftwf_complex *in, fftwf_complex *out,
+                                        int sign, unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_guru_dft_r2c(int rank, const fftwf_iodim *dims,
+                                            int batch_rank, const fftwf_iodim *batch_dims,
+                                            float *in, fftwf_complex *out,
+                                            unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_guru_dft_c2r(int rank, const fftwf_iodim *dims,
+                                            int batch_rank, const fftwf_iodim *batch_dims,
+                                            fftwf_complex *in, float *out,
+                                            unsigned flags);
+void CUFFTAPI fftwf_execute(const fftw_plan plan);
+void CUFFTAPI fftwf_execute_dft(const fftwf_plan plan,
+                                fftwf_complex *idata,
+                                fftwf_complex *odata);
+void CUFFTAPI fftwf_execute_dft_r2c(const fftwf_plan plan,
+                                    float *idata,
+                                    fftwf_complex *odata);
+void CUFFTAPI fftwf_execute_dft_c2r(const fftwf_plan plan,
+                                    fftwf_complex *idata,
+                                    float *odata);
+/// CUFFTW 64-bit Guru Interface
+/// dp
+fftw_plan CUFFTAPI fftw_plan_guru64_dft(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, fftw_complex* out, int sign, unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_guru64_dft_r2c(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, double* in, fftw_complex* out, unsigned flags);
+fftw_plan CUFFTAPI fftw_plan_guru64_dft_c2r(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, double* out, unsigned flags);
+/// sp
+fftwf_plan CUFFTAPI fftwf_plan_guru64_dft(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, fftwf_complex* out, int sign, unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_r2c(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, float* in, fftwf_complex* out, unsigned flags);
+fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_c2r(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, float* out, unsigned flags);
+#ifdef _WIN32
+#define _CUFFTAPI(T) T CUFFTAPI
+#else
+#define _CUFFTAPI(T) CUFFTAPI T
+#endif
+// CUFFTW defines and supports the following support APIs
+_CUFFTAPI(void *) fftw_malloc(size_t n);
+_CUFFTAPI(void *) fftwf_malloc(size_t n);
+void CUFFTAPI fftw_free(void *pointer);
+void CUFFTAPI fftwf_free(void *pointer);
+void CUFFTAPI fftw_export_wisdom_to_file(FILE * output_file);
+void CUFFTAPI fftwf_export_wisdom_to_file(FILE * output_file);
+void CUFFTAPI fftw_import_wisdom_from_file(FILE * input_file);
+void CUFFTAPI fftwf_import_wisdom_from_file(FILE * input_file);
+void CUFFTAPI fftw_print_plan(const fftw_plan plan);
+void CUFFTAPI fftwf_print_plan(const fftwf_plan plan);
+void CUFFTAPI fftw_set_timelimit(double seconds);
+void CUFFTAPI fftwf_set_timelimit(double seconds);
+double CUFFTAPI fftw_cost(const fftw_plan plan);
+double CUFFTAPI fftwf_cost(const fftw_plan plan);
+void CUFFTAPI fftw_flops(const fftw_plan plan, double *add, double *mul, double *fma);
+void CUFFTAPI fftwf_flops(const fftw_plan plan, double *add, double *mul, double *fma);
+void CUFFTAPI fftw_destroy_plan(fftw_plan plan);
+void CUFFTAPI fftwf_destroy_plan(fftwf_plan plan);
+void CUFFTAPI fftw_cleanup(void);
+void CUFFTAPI fftwf_cleanup(void);
+#ifdef __cplusplus
+}
+#endif
+#endif /* _CUFFTW_H_ */

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (217 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e0c01fe1a0e5738b15b6952c63eebb273a28c12beefd13f01594da265a1b156
+size 101565

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
+external contributions to this project including patches, pull requests, etc.

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,65 @@

+../../../bin/pybind11-config,sha256=KwKhJwrv86OeAvCUq7sBopc-kDZzCJdnh_4RZIF8T-c,265
+pybind11-2.13.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pybind11-2.13.6.dist-info/LICENSE,sha256=g5ZbhDuY9nDTqFvQQe1LNyyOxQ17SlmVqDrGl7pnXcs,1684
+pybind11-2.13.6.dist-info/METADATA,sha256=Gg_aZ0f3aFFDF3bQvgzR9kwVT_jogjVEc74kDVldlq0,9513
+pybind11-2.13.6.dist-info/RECORD,,
+pybind11-2.13.6.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pybind11-2.13.6.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
+pybind11-2.13.6.dist-info/entry_points.txt,sha256=Q_kAwEJBDz8wHD0V50hY3AvchDk3Pfyeox2YHrAcWZ0,105
+pybind11-2.13.6.dist-info/top_level.txt,sha256=d1mqwSpUlmlZhXDQ9Y57eNlXc3dVDM1toKmfC1kJbvU,9
+pybind11/__init__.py,sha256=9vt06pvuwvdKW0YwYQKOTxBEgmQ0kb5ZUOJrgtGhdKs,459
+pybind11/__main__.py,sha256=p8vZ4btnkb_TaF03R1ac7qHmp-Eut86gCSUcVP8F3i4,2526
+pybind11/__pycache__/__init__.cpython-311.pyc,,
+pybind11/__pycache__/__main__.cpython-311.pyc,,
+pybind11/__pycache__/_version.cpython-311.pyc,,
+pybind11/__pycache__/commands.cpython-311.pyc,,
+pybind11/__pycache__/setup_helpers.cpython-311.pyc,,
+pybind11/_version.py,sha256=XUUceDIbc3kdRixyEVMy5v0LcGF36QUxMG9rJHlT6P4,232
+pybind11/commands.py,sha256=V43hKb7VE_abYZvaO-TpJLOU65n6W3ZrdYHGF3G3qUs,1243
+pybind11/include/pybind11/attr.h,sha256=QPjH7BfhL8QFwHHkrDak8gNOLMlb1itAO5fobjdoLp8,24334
+pybind11/include/pybind11/buffer_info.h,sha256=_FcQisqdpphfWXKeCGNv3Gq5ivy1z-qF3d1Noeteaok,7778
+pybind11/include/pybind11/cast.h,sha256=8gJ4Y4nc83dyq12CuU7ircAvAV1HoEZEVr0UyfeLQNA,71696
+pybind11/include/pybind11/chrono.h,sha256=A23naeloqn-1NKVAABOsJtHU9Vz8lfvrAICuLk-7qBM,8458
+pybind11/include/pybind11/common.h,sha256=ATg9Bt1pwF8qnNuI086fprM4CUTdrZdk_g2HXE1Sf6A,120
+pybind11/include/pybind11/complex.h,sha256=AaDZ-rEmK4tFaue-K9P5y3TxxnaQF6JwZ_6LAzkdLQI,2096
+pybind11/include/pybind11/detail/class.h,sha256=Bjk3K6xAMgwxPNTKfik7SC5Y24wgKs8Oz5VjvFdy0kA,29026
+pybind11/include/pybind11/detail/common.h,sha256=uxFMVYKW87YPbUz8Mo70xoVrpK2D1NzhKSwlDpwrJxo,54708
+pybind11/include/pybind11/detail/cpp_conduit.h,sha256=Bbx5728XzvyCL2gfW7kG6vgDltS5-V5gtkNQFPFevXg,2589
+pybind11/include/pybind11/detail/descr.h,sha256=D63pIHsF3luO_g51CjbJU8Wl9VOihciEXQhXvfRg-Rk,6035
+pybind11/include/pybind11/detail/exception_translation.h,sha256=fM1J19z00AuDlozHt0srpCJr-1uWW4kj_fLdSJDbdY8,2600
+pybind11/include/pybind11/detail/init.h,sha256=Sb1UkPecC5l9xj5naYLdUM7qIRLVpe614H9Frvyg8xg,17983
+pybind11/include/pybind11/detail/internals.h,sha256=xs-I7JdJACxx7gJf12HBLjL007jRXcAffPDsd0oTrq4,31985
+pybind11/include/pybind11/detail/type_caster_base.h,sha256=mdgZ-FIkxdSShMPPe69EXxjvd1eQDDBVX835B7XqCNo,48938
+pybind11/include/pybind11/detail/typeid.h,sha256=jw5pr9m72vkDsloT8vxl9wj17VJGcEdXDyziBlt89Js,1625
+pybind11/include/pybind11/detail/value_and_holder.h,sha256=hwNYlqxjUhlUqihwMjr6s3LhhKlZiTLaWREtQrgOAkQ,2814
+pybind11/include/pybind11/eigen.h,sha256=-HmSA1kgwCQ-GHUt7PHtTEc-vxqw9xARpF8PHWJip28,316
+pybind11/include/pybind11/eigen/common.h,sha256=dIeqmK7IzW5K4k2larPnA1A863rDp38U9YbNIwiIyYk,378
+pybind11/include/pybind11/eigen/matrix.h,sha256=VjCfx8M2AcD3m8THUbIEYidJyIClaNw9jMbd_Fzfo1s,32142
+pybind11/include/pybind11/eigen/tensor.h,sha256=csE3_N9yy-9k0SWQPJuAxmv8Jp_-lFrrPdVOyMV8-gc,18384
+pybind11/include/pybind11/embed.h,sha256=F3JQiOWnLGSuZ0NuEyBWFhHyVdczD8D_67kriU4QfsY,13362
+pybind11/include/pybind11/eval.h,sha256=7re-O2Eor1yD0Q_KgFkHIjKD17ejzII687Yszl9_KfE,4731
+pybind11/include/pybind11/functional.h,sha256=iOyYuNmbI-K3zgc1IMDwe4iHEOO3F8vwZbVSvbgxFQ4,5267
+pybind11/include/pybind11/gil.h,sha256=hsJj6z1iXqlo5c7fPCgEvK_-eeDoKZm7PKPwPNCdVVo,7702
+pybind11/include/pybind11/gil_safe_call_once.h,sha256=KKcy9Wgc_MJY-U5WpCZeNyzW7oVmC-d6yXkgephZ7zs,3993
+pybind11/include/pybind11/iostream.h,sha256=K5rPXoCYN325r1PptcJCIhPhgtRtTJQjMr7bvUIOwxk,8862
+pybind11/include/pybind11/numpy.h,sha256=xREhfycUTCOPF8CF-UWRdoLX0B23V6YWRiBqeRRElZg,84442
+pybind11/include/pybind11/operators.h,sha256=224RoAXcv1la4NNY9rQ3aD_AeC8S9ZKx3HVK1O8B4MU,9103
+pybind11/include/pybind11/options.h,sha256=qXvmnj--9fZSp56NYefnB3W5V17ppHlY1Srgo3DNBpw,2734
+pybind11/include/pybind11/pybind11.h,sha256=hbzXHRCBIW7dwtwaKjXKPC0Nl1MGHZ5-BjGsMlE3LuU,129898
+pybind11/include/pybind11/pytypes.h,sha256=BF8x4S5fsAzWf-d9pu83UsqjwRRo0ragHPy9sDOpUvk,99894
+pybind11/include/pybind11/stl.h,sha256=aMi1OCCw2Zb-IRLSlAtQEJJHtWsRJiLT9dKDMHST1Ic,15532
+pybind11/include/pybind11/stl/filesystem.h,sha256=lcYRCwNA8Xf4e4FRbeYh36SAwQjxKgyTXXdrguR4gM4,4559
+pybind11/include/pybind11/stl_bind.h,sha256=B5t8E0A4Zdgm2sF0J8Q_UI2U5uqEBQ9TsJCelsJ4q0E,28495
+pybind11/include/pybind11/type_caster_pyobject_ptr.h,sha256=H7pKBYTvUlibiJQEcKmeAkygSQwoCkuIyukNSDmVq-U,1929
+pybind11/include/pybind11/typing.h,sha256=PIjZFNNzY_KsrkHQPlg0Vt24jlTi6kThdOldEJjchtY,7000
+pybind11/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pybind11/setup_helpers.py,sha256=AwD_CjfVzX653nW4_i0U4bkFMCG4ZILoMZixyL8CZ4o,17490
+pybind11/share/cmake/pybind11/FindPythonLibsNew.cmake,sha256=_ZVzgVp6GQSEEv-b2iuauqTgoi1k2jHiNJlpl25MN-4,12187
+pybind11/share/cmake/pybind11/pybind11Common.cmake,sha256=lvJJ518cN7SjKDgjpXw0XU0eKW358wEloIcKCyCNPB0,16164
+pybind11/share/cmake/pybind11/pybind11Config.cmake,sha256=I96KX_zIZvLHbedHknVBj2YKhMt_QjM5LhCbzVNTvD8,7959
+pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake,sha256=vDsLSBg7-Nop8Ar9wRe0xKgGUV4LRzWE4XE0kE5B6fE,1403
+pybind11/share/cmake/pybind11/pybind11GuessPythonExtSuffix.cmake,sha256=WvhK2E-vWi9ArY0WJZXEK4kEFHpDQjl-au963hqH0r0,3321
+pybind11/share/cmake/pybind11/pybind11NewTools.cmake,sha256=zGLNjL28gzi8tvwiabudLsye7id_sZI5ooYfiBBllvM,12169
+pybind11/share/cmake/pybind11/pybind11Targets.cmake,sha256=tIjPtIpfb5m9POtu484cjGgNyWc5E4bbKzESLrcOLA0,4271
+pybind11/share/cmake/pybind11/pybind11Tools.cmake,sha256=5K6EahoS7wIaQIhjrDS4p4jTpYr0b_MronXKee8zCAc,8565
+pybind11/share/pkgconfig/pybind11.pc,sha256=M17R2NbpW6o7ujxioMP5M6WgVGrmJ_1vu_-E-H_rbes,171

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/REQUESTED ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+[console_scripts]
+pybind11-config = pybind11.__main__:main
+[pipx.run]
+pybind11 = pybind11.__main__:main

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/detail/cpp_conduit.h ADDED Viewed

	@@ -0,0 +1,77 @@

+// Copyright (c) 2024 The pybind Community.
+#pragma once
+#include <pybind11/pytypes.h>
+#include "common.h"
+#include "internals.h"
+#include <typeinfo>
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
+// Forward declaration needed here: Refactoring opportunity.
+extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *);
+inline bool type_is_managed_by_our_internals(PyTypeObject *type_obj) {
+#if defined(PYPY_VERSION)
+    auto &internals = get_internals();
+    return bool(internals.registered_types_py.find(type_obj)
+                != internals.registered_types_py.end());
+#else
+    return bool(type_obj->tp_new == pybind11_object_new);
+#endif
+}
+inline bool is_instance_method_of_type(PyTypeObject *type_obj, PyObject *attr_name) {
+    PyObject *descr = _PyType_Lookup(type_obj, attr_name);
+    return bool((descr != nullptr) && PyInstanceMethod_Check(descr));
+}
+inline object try_get_cpp_conduit_method(PyObject *obj) {
+    if (PyType_Check(obj)) {
+        return object();
+    }
+    PyTypeObject *type_obj = Py_TYPE(obj);
+    str attr_name("_pybind11_conduit_v1_");
+    bool assumed_to_be_callable = false;
+    if (type_is_managed_by_our_internals(type_obj)) {
+        if (!is_instance_method_of_type(type_obj, attr_name.ptr())) {
+            return object();
+        }
+        assumed_to_be_callable = true;
+    }
+    PyObject *method = PyObject_GetAttr(obj, attr_name.ptr());
+    if (method == nullptr) {
+        PyErr_Clear();
+        return object();
+    }
+    if (!assumed_to_be_callable && PyCallable_Check(method) == 0) {
+        Py_DECREF(method);
+        return object();
+    }
+    return reinterpret_steal<object>(method);
+}
+inline void *try_raw_pointer_ephemeral_from_cpp_conduit(handle src,
+                                                        const std::type_info *cpp_type_info) {
+    object method = try_get_cpp_conduit_method(src.ptr());
+    if (method) {
+        capsule cpp_type_info_capsule(const_cast<void *>(static_cast<const void *>(cpp_type_info)),
+                                      typeid(std::type_info).name());
+        object cpp_conduit = method(bytes(PYBIND11_PLATFORM_ABI_ID),
+                                    cpp_type_info_capsule,
+                                    bytes("raw_pointer_ephemeral"));
+        if (isinstance<capsule>(cpp_conduit)) {
+            return reinterpret_borrow<capsule>(cpp_conduit).get_pointer();
+        }
+    }
+    return nullptr;
+}
+#define PYBIND11_HAS_CPP_CONDUIT 1
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/gil.h ADDED Viewed

	@@ -0,0 +1,219 @@

+/*
+    pybind11/gil.h: RAII helpers for managing the GIL
+    Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+#pragma once
+#include "detail/common.h"
+#include <cassert>
+#if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+#    include "detail/internals.h"
+#endif
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
+// forward declarations
+PyThreadState *get_thread_state_unchecked();
+PYBIND11_NAMESPACE_END(detail)
+#if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+/* The functions below essentially reproduce the PyGILState_* API using a RAII
+ * pattern, but there are a few important differences:
+ *
+ * 1. When acquiring the GIL from an non-main thread during the finalization
+ *    phase, the GILState API blindly terminates the calling thread, which
+ *    is often not what is wanted. This API does not do this.
+ *
+ * 2. The gil_scoped_release function can optionally cut the relationship
+ *    of a PyThreadState and its associated thread, which allows moving it to
+ *    another thread (this is a fairly rare/advanced use case).
+ *
+ * 3. The reference count of an acquired thread state can be controlled. This
+ *    can be handy to prevent cases where callbacks issued from an external
+ *    thread would otherwise constantly construct and destroy thread state data
+ *    structures.
+ *
+ * See the Python bindings of NanoGUI (http://github.com/wjakob/nanogui) for an
+ * example which uses features 2 and 3 to migrate the Python thread of
+ * execution to another thread (to run the event loop on the original thread,
+ * in this case).
+ */
+class gil_scoped_acquire {
+public:
+    PYBIND11_NOINLINE gil_scoped_acquire() {
+        auto &internals = detail::get_internals();
+        tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);
+        if (!tstate) {
+            /* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if
+               calling from a Python thread). Since we use a different key, this ensures
+               we don't create a new thread state and deadlock in PyEval_AcquireThread
+               below. Note we don't save this state with internals.tstate, since we don't
+               create it we would fail to clear it (its reference count should be > 0). */
+            tstate = PyGILState_GetThisThreadState();
+        }
+        if (!tstate) {
+            tstate = PyThreadState_New(internals.istate);
+#    if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+            if (!tstate) {
+                pybind11_fail("scoped_acquire: could not create thread state!");
+            }
+#    endif
+            tstate->gilstate_counter = 0;
+            PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate);
+        } else {
+            release = detail::get_thread_state_unchecked() != tstate;
+        }
+        if (release) {
+            PyEval_AcquireThread(tstate);
+        }
+        inc_ref();
+    }
+    gil_scoped_acquire(const gil_scoped_acquire &) = delete;
+    gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
+    void inc_ref() { ++tstate->gilstate_counter; }
+    PYBIND11_NOINLINE void dec_ref() {
+        --tstate->gilstate_counter;
+#    if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+        if (detail::get_thread_state_unchecked() != tstate) {
+            pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!");
+        }
+        if (tstate->gilstate_counter < 0) {
+            pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!");
+        }
+#    endif
+        if (tstate->gilstate_counter == 0) {
+#    if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+            if (!release) {
+                pybind11_fail("scoped_acquire::dec_ref(): internal error!");
+            }
+#    endif
+            PyThreadState_Clear(tstate);
+            if (active) {
+                PyThreadState_DeleteCurrent();
+            }
+            PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
+            release = false;
+        }
+    }
+    /// This method will disable the PyThreadState_DeleteCurrent call and the
+    /// GIL won't be acquired. This method should be used if the interpreter
+    /// could be shutting down when this is called, as thread deletion is not
+    /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
+    /// protect subsequent code.
+    PYBIND11_NOINLINE void disarm() { active = false; }
+    PYBIND11_NOINLINE ~gil_scoped_acquire() {
+        dec_ref();
+        if (release) {
+            PyEval_SaveThread();
+        }
+    }
+private:
+    PyThreadState *tstate = nullptr;
+    bool release = true;
+    bool active = true;
+};
+class gil_scoped_release {
+public:
+    // PRECONDITION: The GIL must be held when this constructor is called.
+    explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) {
+        assert(PyGILState_Check());
+        // `get_internals()` must be called here unconditionally in order to initialize
+        // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
+        // initialization race could occur as multiple threads try `gil_scoped_acquire`.
+        auto &internals = detail::get_internals();
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        tstate = PyEval_SaveThread();
+        if (disassoc) {
+            // Python >= 3.7 can remove this, it's an int before 3.7
+            // NOLINTNEXTLINE(readability-qualified-auto)
+            auto key = internals.tstate;
+            PYBIND11_TLS_DELETE_VALUE(key);
+        }
+    }
+    gil_scoped_release(const gil_scoped_release &) = delete;
+    gil_scoped_release &operator=(const gil_scoped_release &) = delete;
+    /// This method will disable the PyThreadState_DeleteCurrent call and the
+    /// GIL won't be acquired. This method should be used if the interpreter
+    /// could be shutting down when this is called, as thread deletion is not
+    /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
+    /// protect subsequent code.
+    PYBIND11_NOINLINE void disarm() { active = false; }
+    ~gil_scoped_release() {
+        if (!tstate) {
+            return;
+        }
+        // `PyEval_RestoreThread()` should not be called if runtime is finalizing
+        if (active) {
+            PyEval_RestoreThread(tstate);
+        }
+        if (disassoc) {
+            // Python >= 3.7 can remove this, it's an int before 3.7
+            // NOLINTNEXTLINE(readability-qualified-auto)
+            auto key = detail::get_internals().tstate;
+            PYBIND11_TLS_REPLACE_VALUE(key, tstate);
+        }
+    }
+private:
+    PyThreadState *tstate;
+    bool disassoc;
+    bool active = true;
+};
+#else // PYBIND11_SIMPLE_GIL_MANAGEMENT
+class gil_scoped_acquire {
+    PyGILState_STATE state;
+public:
+    gil_scoped_acquire() : state{PyGILState_Ensure()} {}
+    gil_scoped_acquire(const gil_scoped_acquire &) = delete;
+    gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
+    ~gil_scoped_acquire() { PyGILState_Release(state); }
+    void disarm() {}
+};
+class gil_scoped_release {
+    PyThreadState *state;
+public:
+    // PRECONDITION: The GIL must be held when this constructor is called.
+    gil_scoped_release() {
+        assert(PyGILState_Check());
+        state = PyEval_SaveThread();
+    }
+    gil_scoped_release(const gil_scoped_release &) = delete;
+    gil_scoped_release &operator=(const gil_scoped_release &) = delete;
+    ~gil_scoped_release() { PyEval_RestoreThread(state); }
+    void disarm() {}
+};
+#endif // PYBIND11_SIMPLE_GIL_MANAGEMENT
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/iostream.h ADDED Viewed

	@@ -0,0 +1,265 @@

+/*
+    pybind11/iostream.h -- Tools to assist with redirecting cout and cerr to Python
+    Copyright (c) 2017 Henry F. Schreiner
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+    WARNING: The implementation in this file is NOT thread safe. Multiple
+    threads writing to a redirected ostream concurrently cause data races
+    and potentially buffer overflows. Therefore it is currently a requirement
+    that all (possibly) concurrent redirected ostream writes are protected by
+    a mutex.
+    #HelpAppreciated: Work on iostream.h thread safety.
+    For more background see the discussions under
+    https://github.com/pybind/pybind11/pull/2982 and
+    https://github.com/pybind/pybind11/pull/2995.
+*/
+#pragma once
+#include "pybind11.h"
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <ostream>
+#include <streambuf>
+#include <string>
+#include <utility>
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
+// Buffer that writes to Python instead of C++
+class pythonbuf : public std::streambuf {
+private:
+    using traits_type = std::streambuf::traits_type;
+    const size_t buf_size;
+    std::unique_ptr<char[]> d_buffer;
+    object pywrite;
+    object pyflush;
+    int overflow(int c) override {
+        if (!traits_type::eq_int_type(c, traits_type::eof())) {
+            *pptr() = traits_type::to_char_type(c);
+            pbump(1);
+        }
+        return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof();
+    }
+    // Computes how many bytes at the end of the buffer are part of an
+    // incomplete sequence of UTF-8 bytes.
+    // Precondition: pbase() < pptr()
+    size_t utf8_remainder() const {
+        const auto rbase = std::reverse_iterator<char *>(pbase());
+        const auto rpptr = std::reverse_iterator<char *>(pptr());
+        auto is_ascii = [](char c) { return (static_cast<unsigned char>(c) & 0x80) == 0x00; };
+        auto is_leading = [](char c) { return (static_cast<unsigned char>(c) & 0xC0) == 0xC0; };
+        auto is_leading_2b = [](char c) { return static_cast<unsigned char>(c) <= 0xDF; };
+        auto is_leading_3b = [](char c) { return static_cast<unsigned char>(c) <= 0xEF; };
+        // If the last character is ASCII, there are no incomplete code points
+        if (is_ascii(*rpptr)) {
+            return 0;
+        }
+        // Otherwise, work back from the end of the buffer and find the first
+        // UTF-8 leading byte
+        const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
+        const auto leading = std::find_if(rpptr, rpend, is_leading);
+        if (leading == rbase) {
+            return 0;
+        }
+        const auto dist = static_cast<size_t>(leading - rpptr);
+        size_t remainder = 0;
+        if (dist == 0) {
+            remainder = 1; // 1-byte code point is impossible
+        } else if (dist == 1) {
+            remainder = is_leading_2b(*leading) ? 0 : dist + 1;
+        } else if (dist == 2) {
+            remainder = is_leading_3b(*leading) ? 0 : dist + 1;
+        }
+        // else if (dist >= 3), at least 4 bytes before encountering an UTF-8
+        // leading byte, either no remainder or invalid UTF-8.
+        // Invalid UTF-8 will cause an exception later when converting
+        // to a Python string, so that's not handled here.
+        return remainder;
+    }
+    // This function must be non-virtual to be called in a destructor.
+    int _sync() {
+        if (pbase() != pptr()) { // If buffer is not empty
+            gil_scoped_acquire tmp;
+            // This subtraction cannot be negative, so dropping the sign.
+            auto size = static_cast<size_t>(pptr() - pbase());
+            size_t remainder = utf8_remainder();
+            if (size > remainder) {
+                str line(pbase(), size - remainder);
+                pywrite(std::move(line));
+                pyflush();
+            }
+            // Copy the remainder at the end of the buffer to the beginning:
+            if (remainder > 0) {
+                std::memmove(pbase(), pptr() - remainder, remainder);
+            }
+            setp(pbase(), epptr());
+            pbump(static_cast<int>(remainder));
+        }
+        return 0;
+    }
+    int sync() override { return _sync(); }
+public:
+    explicit pythonbuf(const object &pyostream, size_t buffer_size = 1024)
+        : buf_size(buffer_size), d_buffer(new char[buf_size]), pywrite(pyostream.attr("write")),
+          pyflush(pyostream.attr("flush")) {
+        setp(d_buffer.get(), d_buffer.get() + buf_size - 1);
+    }
+    pythonbuf(pythonbuf &&) = default;
+    /// Sync before destroy
+    ~pythonbuf() override { _sync(); }
+};
+PYBIND11_NAMESPACE_END(detail)
+/** \rst
+    This a move-only guard that redirects output.
+    .. code-block:: cpp
+        #include <pybind11/iostream.h>
+        ...
+        {
+            py::scoped_ostream_redirect output;
+            std::cout << "Hello, World!"; // Python stdout
+        } // <-- return std::cout to normal
+    You can explicitly pass the c++ stream and the python object,
+    for example to guard stderr instead.
+    .. code-block:: cpp
+        {
+            py::scoped_ostream_redirect output{
+                std::cerr, py::module::import("sys").attr("stderr")};
+            std::cout << "Hello, World!";
+        }
+ \endrst */
+class scoped_ostream_redirect {
+protected:
+    std::streambuf *old;
+    std::ostream &costream;
+    detail::pythonbuf buffer;
+public:
+    explicit scoped_ostream_redirect(std::ostream &costream = std::cout,
+                                     const object &pyostream
+                                     = module_::import("sys").attr("stdout"))
+        : costream(costream), buffer(pyostream) {
+        old = costream.rdbuf(&buffer);
+    }
+    ~scoped_ostream_redirect() { costream.rdbuf(old); }
+    scoped_ostream_redirect(const scoped_ostream_redirect &) = delete;
+    scoped_ostream_redirect(scoped_ostream_redirect &&other) = default;
+    scoped_ostream_redirect &operator=(const scoped_ostream_redirect &) = delete;
+    scoped_ostream_redirect &operator=(scoped_ostream_redirect &&) = delete;
+};
+/** \rst
+    Like `scoped_ostream_redirect`, but redirects cerr by default. This class
+    is provided primary to make ``py::call_guard`` easier to make.
+    .. code-block:: cpp
+     m.def("noisy_func", &noisy_func,
+           py::call_guard<scoped_ostream_redirect,
+                          scoped_estream_redirect>());
+\endrst */
+class scoped_estream_redirect : public scoped_ostream_redirect {
+public:
+    explicit scoped_estream_redirect(std::ostream &costream = std::cerr,
+                                     const object &pyostream
+                                     = module_::import("sys").attr("stderr"))
+        : scoped_ostream_redirect(costream, pyostream) {}
+};
+PYBIND11_NAMESPACE_BEGIN(detail)
+// Class to redirect output as a context manager. C++ backend.
+class OstreamRedirect {
+    bool do_stdout_;
+    bool do_stderr_;
+    std::unique_ptr<scoped_ostream_redirect> redirect_stdout;
+    std::unique_ptr<scoped_estream_redirect> redirect_stderr;
+public:
+    explicit OstreamRedirect(bool do_stdout = true, bool do_stderr = true)
+        : do_stdout_(do_stdout), do_stderr_(do_stderr) {}
+    void enter() {
+        if (do_stdout_) {
+            redirect_stdout.reset(new scoped_ostream_redirect());
+        }
+        if (do_stderr_) {
+            redirect_stderr.reset(new scoped_estream_redirect());
+        }
+    }
+    void exit() {
+        redirect_stdout.reset();
+        redirect_stderr.reset();
+    }
+};
+PYBIND11_NAMESPACE_END(detail)
+/** \rst
+    This is a helper function to add a C++ redirect context manager to Python
+    instead of using a C++ guard. To use it, add the following to your binding code:
+    .. code-block:: cpp
+        #include <pybind11/iostream.h>
+        ...
+        py::add_ostream_redirect(m, "ostream_redirect");
+    You now have a Python context manager that redirects your output:
+    .. code-block:: python
+        with m.ostream_redirect():
+            m.print_to_cout_function()
+    This manager can optionally be told which streams to operate on:
+    .. code-block:: python
+        with m.ostream_redirect(stdout=true, stderr=true):
+            m.noisy_function_with_error_printing()
+ \endrst */
+inline class_<detail::OstreamRedirect>
+add_ostream_redirect(module_ m, const std::string &name = "ostream_redirect") {
+    return class_<detail::OstreamRedirect>(std::move(m), name.c_str(), module_local())
+        .def(init<bool, bool>(), arg("stdout") = true, arg("stderr") = true)
+        .def("__enter__", &detail::OstreamRedirect::enter)
+        .def("__exit__", [](detail::OstreamRedirect &self_, const args &) { self_.exit(); });
+}
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so ADDED Viewed

Binary file (37.9 kB). View file