Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +11 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/INSTALLER +1 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/RECORD +104 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/WHEEL +5 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/top_level.txt +1 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/ctx_mp.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__init__.py +77 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libelefun.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libhyper.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libmpc.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libelefun.py +1428 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libhyper.py +1150 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libintmath.py +584 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libmpc.py +835 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__init__.py +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h +98 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_activity.h +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_events.h +1371 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h +419 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_result.h +328 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h +447 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_target.h +43 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_version.h +130 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h +2941 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h +2139 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h +38 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer.h +571 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftw.h +454 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc +3 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/LICENSE +29 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/RECORD +65 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/REQUESTED +0 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/entry_points.txt +5 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/detail/cpp_conduit.h +77 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/gil.h +219 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/iostream.h +265 -0
- tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so +0 -0
.gitattributes
CHANGED
|
@@ -51,3 +51,14 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Plex/Trans
|
|
| 51 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ExprNodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 52 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 53 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ExprNodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 52 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 53 |
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/gen.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35bbd7708e61d6b2d4704c7139018d3eae67bca303d9fa03228b50845f6fffe6
|
| 3 |
+
size 340320
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e03d22fd7cc8b4e378f65e07858c4720dcc03e0fa3553c776863e4969826cfd4
|
| 3 |
+
size 145746
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c423f97f1ac36f06a8a2c6ff723696608c3e094001049a85ad421706ae558dea
|
| 3 |
+
size 522167
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab3f1d1811e8f1f97f96bc002bc8705a4adb7a26f43def577bf24b25263f4b32
|
| 3 |
+
size 213081
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:465b72a0af764658a62bbb1d50e50b9a762ba16ddb1a6be0dd5b3b1f15c8a205
|
| 3 |
+
size 254554
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f505b823a26bd0da98ceb5e93ba4f79513f56cebf4f8cb1c8ed579dcdabaac32
|
| 3 |
+
size 129942
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.44 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/RECORD
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fsspec-2024.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
fsspec-2024.2.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
| 3 |
+
fsspec-2024.2.0.dist-info/METADATA,sha256=uwzW1Braxnd_QGVI8W6J0KHi5KTiTJEm8YzSUdG-_Dc,6786
|
| 4 |
+
fsspec-2024.2.0.dist-info/RECORD,,
|
| 5 |
+
fsspec-2024.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
| 6 |
+
fsspec-2024.2.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
|
| 7 |
+
fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
|
| 8 |
+
fsspec/__pycache__/__init__.cpython-311.pyc,,
|
| 9 |
+
fsspec/__pycache__/_version.cpython-311.pyc,,
|
| 10 |
+
fsspec/__pycache__/archive.cpython-311.pyc,,
|
| 11 |
+
fsspec/__pycache__/asyn.cpython-311.pyc,,
|
| 12 |
+
fsspec/__pycache__/caching.cpython-311.pyc,,
|
| 13 |
+
fsspec/__pycache__/callbacks.cpython-311.pyc,,
|
| 14 |
+
fsspec/__pycache__/compression.cpython-311.pyc,,
|
| 15 |
+
fsspec/__pycache__/config.cpython-311.pyc,,
|
| 16 |
+
fsspec/__pycache__/conftest.cpython-311.pyc,,
|
| 17 |
+
fsspec/__pycache__/core.cpython-311.pyc,,
|
| 18 |
+
fsspec/__pycache__/dircache.cpython-311.pyc,,
|
| 19 |
+
fsspec/__pycache__/exceptions.cpython-311.pyc,,
|
| 20 |
+
fsspec/__pycache__/fuse.cpython-311.pyc,,
|
| 21 |
+
fsspec/__pycache__/generic.cpython-311.pyc,,
|
| 22 |
+
fsspec/__pycache__/gui.cpython-311.pyc,,
|
| 23 |
+
fsspec/__pycache__/mapping.cpython-311.pyc,,
|
| 24 |
+
fsspec/__pycache__/parquet.cpython-311.pyc,,
|
| 25 |
+
fsspec/__pycache__/registry.cpython-311.pyc,,
|
| 26 |
+
fsspec/__pycache__/spec.cpython-311.pyc,,
|
| 27 |
+
fsspec/__pycache__/transaction.cpython-311.pyc,,
|
| 28 |
+
fsspec/__pycache__/utils.cpython-311.pyc,,
|
| 29 |
+
fsspec/_version.py,sha256=onTKKWe4fXkBjQxbTwM82SUT0H3x4U17IYrciFAryaU,500
|
| 30 |
+
fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
|
| 31 |
+
fsspec/asyn.py,sha256=kJ45sFFya2lZsmu2v8CVc8ZPRs8AccEzAy6Jot2ylkU,36157
|
| 32 |
+
fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
|
| 33 |
+
fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
|
| 34 |
+
fsspec/compression.py,sha256=Yyd8FXw2rwWRtVoRVah_yguv-J7BUcBo4yDu6Qt52a0,4859
|
| 35 |
+
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
| 36 |
+
fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
|
| 37 |
+
fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
|
| 38 |
+
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
| 39 |
+
fsspec/exceptions.py,sha256=xcS7LiRrQ748kvOB9mrUR14kpjNztrHgEkZWi9M-VaI,330
|
| 40 |
+
fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
|
| 41 |
+
fsspec/generic.py,sha256=NuNaP66OaphwMbuLHRFBLda78TD81isa9O4ozJqbUv0,13455
|
| 42 |
+
fsspec/gui.py,sha256=XKoXZpUhRE7jOhRCJH4-jRbKhVu56aS8h9tecvPD3nc,13932
|
| 43 |
+
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 44 |
+
fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
|
| 45 |
+
fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
|
| 46 |
+
fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
|
| 47 |
+
fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
|
| 48 |
+
fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
|
| 49 |
+
fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
|
| 50 |
+
fsspec/implementations/__pycache__/data.cpython-311.pyc,,
|
| 51 |
+
fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
|
| 52 |
+
fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
|
| 53 |
+
fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
|
| 54 |
+
fsspec/implementations/__pycache__/git.cpython-311.pyc,,
|
| 55 |
+
fsspec/implementations/__pycache__/github.cpython-311.pyc,,
|
| 56 |
+
fsspec/implementations/__pycache__/http.cpython-311.pyc,,
|
| 57 |
+
fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
|
| 58 |
+
fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
|
| 59 |
+
fsspec/implementations/__pycache__/local.cpython-311.pyc,,
|
| 60 |
+
fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
|
| 61 |
+
fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
|
| 62 |
+
fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
|
| 63 |
+
fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
|
| 64 |
+
fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
|
| 65 |
+
fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
|
| 66 |
+
fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
|
| 67 |
+
fsspec/implementations/arrow.py,sha256=_7TLuV6ZzNlpmUU_v6ud56u2wadzsKmY5qugPBxgMEs,8649
|
| 68 |
+
fsspec/implementations/cache_mapper.py,sha256=iHgBA6gjzDJ7_mBboHFzpLTf55HP3UEwUOZ43xyUK4M,2429
|
| 69 |
+
fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
|
| 70 |
+
fsspec/implementations/cached.py,sha256=LbbPbeUup07O0y7gXD_atFgajWM9p1vlDKu_BOyLfbo,30943
|
| 71 |
+
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
| 72 |
+
fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
|
| 73 |
+
fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
|
| 74 |
+
fsspec/implementations/dirfs.py,sha256=inDIRSDPhI1_ud1MMBFrpZQ11VIAMJ_dZQtbE4V08Ng,11384
|
| 75 |
+
fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
|
| 76 |
+
fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
|
| 77 |
+
fsspec/implementations/github.py,sha256=0kIiKkeAaROuHgdWBHVQFrzJ2ZfoDgymCehL_kJXHYA,7565
|
| 78 |
+
fsspec/implementations/http.py,sha256=PkhfgUV3-T7fG2Jf-NLX9doH52snV5Wmw91uVA9k74M,29454
|
| 79 |
+
fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
|
| 80 |
+
fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
|
| 81 |
+
fsspec/implementations/local.py,sha256=nxiRKg9FAQHTQss9-ET8ZzDXPGhSOktgkxrg0ffMs2I,13454
|
| 82 |
+
fsspec/implementations/memory.py,sha256=2iU--pOV2KCTrS-d5K8VKSygh9MPk2D7NZ_C8lMMEIw,9701
|
| 83 |
+
fsspec/implementations/reference.py,sha256=0iGu8mscaQ3a5iTlRNByytQ3_-1Bj8__ARqVwyy4q2M,43871
|
| 84 |
+
fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
|
| 85 |
+
fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
|
| 86 |
+
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
| 87 |
+
fsspec/implementations/webhdfs.py,sha256=wqVfno7z0TY1HepaIvKTUUcl_bi5NkV6qWsST8t_s7Y,16745
|
| 88 |
+
fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
|
| 89 |
+
fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
|
| 90 |
+
fsspec/parquet.py,sha256=qVxDhwc960SGOt5etcYAJxCr-7HQKP01687KpDR02Gw,19463
|
| 91 |
+
fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
|
| 92 |
+
fsspec/spec.py,sha256=3t96RgizRN_slIuHXnuR0bXjVUfBS1TfuDrEua4oQvE,66277
|
| 93 |
+
fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
|
| 94 |
+
fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
|
| 95 |
+
fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
|
| 96 |
+
fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
|
| 97 |
+
fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
|
| 98 |
+
fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
|
| 99 |
+
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
| 100 |
+
fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
|
| 101 |
+
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
| 102 |
+
fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
|
| 103 |
+
fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
|
| 104 |
+
fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: bdist_wheel (0.42.0)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
| 5 |
+
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
fsspec
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d0c8228a395e1b7975c5d22cd5fe655e5a7b7024723a69164e0c9045aee847d
|
| 3 |
+
size 324168
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc6ec603b289fea3017e8bb0c8eb537328f368d775f0aee16f2837595da3258b
|
| 3 |
+
size 110499
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/ctx_mp.cpython-311.pyc
ADDED
|
Binary file (71.2 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__init__.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .libmpf import (prec_to_dps, dps_to_prec, repr_dps,
|
| 2 |
+
round_down, round_up, round_floor, round_ceiling, round_nearest,
|
| 3 |
+
to_pickable, from_pickable, ComplexResult,
|
| 4 |
+
fzero, fnzero, fone, fnone, ftwo, ften, fhalf, fnan, finf, fninf,
|
| 5 |
+
math_float_inf, round_int, normalize, normalize1,
|
| 6 |
+
from_man_exp, from_int, to_man_exp, to_int, mpf_ceil, mpf_floor,
|
| 7 |
+
mpf_nint, mpf_frac,
|
| 8 |
+
from_float, from_npfloat, from_Decimal, to_float, from_rational, to_rational, to_fixed,
|
| 9 |
+
mpf_rand, mpf_eq, mpf_hash, mpf_cmp, mpf_lt, mpf_le, mpf_gt, mpf_ge,
|
| 10 |
+
mpf_pos, mpf_neg, mpf_abs, mpf_sign, mpf_add, mpf_sub, mpf_sum,
|
| 11 |
+
mpf_mul, mpf_mul_int, mpf_shift, mpf_frexp,
|
| 12 |
+
mpf_div, mpf_rdiv_int, mpf_mod, mpf_pow_int,
|
| 13 |
+
mpf_perturb,
|
| 14 |
+
to_digits_exp, to_str, str_to_man_exp, from_str, from_bstr, to_bstr,
|
| 15 |
+
mpf_sqrt, mpf_hypot)
|
| 16 |
+
|
| 17 |
+
from .libmpc import (mpc_one, mpc_zero, mpc_two, mpc_half,
|
| 18 |
+
mpc_is_inf, mpc_is_infnan, mpc_to_str, mpc_to_complex, mpc_hash,
|
| 19 |
+
mpc_conjugate, mpc_is_nonzero, mpc_add, mpc_add_mpf,
|
| 20 |
+
mpc_sub, mpc_sub_mpf, mpc_pos, mpc_neg, mpc_shift, mpc_abs,
|
| 21 |
+
mpc_arg, mpc_floor, mpc_ceil, mpc_nint, mpc_frac, mpc_mul, mpc_square,
|
| 22 |
+
mpc_mul_mpf, mpc_mul_imag_mpf, mpc_mul_int,
|
| 23 |
+
mpc_div, mpc_div_mpf, mpc_reciprocal, mpc_mpf_div,
|
| 24 |
+
complex_int_pow, mpc_pow, mpc_pow_mpf, mpc_pow_int,
|
| 25 |
+
mpc_sqrt, mpc_nthroot, mpc_cbrt, mpc_exp, mpc_log, mpc_cos, mpc_sin,
|
| 26 |
+
mpc_tan, mpc_cos_pi, mpc_sin_pi, mpc_cosh, mpc_sinh, mpc_tanh,
|
| 27 |
+
mpc_atan, mpc_acos, mpc_asin, mpc_asinh, mpc_acosh, mpc_atanh,
|
| 28 |
+
mpc_fibonacci, mpf_expj, mpf_expjpi, mpc_expj, mpc_expjpi,
|
| 29 |
+
mpc_cos_sin, mpc_cos_sin_pi)
|
| 30 |
+
|
| 31 |
+
from .libelefun import (ln2_fixed, mpf_ln2, ln10_fixed, mpf_ln10,
|
| 32 |
+
pi_fixed, mpf_pi, e_fixed, mpf_e, phi_fixed, mpf_phi,
|
| 33 |
+
degree_fixed, mpf_degree,
|
| 34 |
+
mpf_pow, mpf_nthroot, mpf_cbrt, log_int_fixed, agm_fixed,
|
| 35 |
+
mpf_log, mpf_log_hypot, mpf_exp, mpf_cos_sin, mpf_cos, mpf_sin, mpf_tan,
|
| 36 |
+
mpf_cos_sin_pi, mpf_cos_pi, mpf_sin_pi, mpf_cosh_sinh,
|
| 37 |
+
mpf_cosh, mpf_sinh, mpf_tanh, mpf_atan, mpf_atan2, mpf_asin,
|
| 38 |
+
mpf_acos, mpf_asinh, mpf_acosh, mpf_atanh, mpf_fibonacci)
|
| 39 |
+
|
| 40 |
+
from .libhyper import (NoConvergence, make_hyp_summator,
|
| 41 |
+
mpf_erf, mpf_erfc, mpf_ei, mpc_ei, mpf_e1, mpc_e1, mpf_expint,
|
| 42 |
+
mpf_ci_si, mpf_ci, mpf_si, mpc_ci, mpc_si, mpf_besseljn,
|
| 43 |
+
mpc_besseljn, mpf_agm, mpf_agm1, mpc_agm, mpc_agm1,
|
| 44 |
+
mpf_ellipk, mpc_ellipk, mpf_ellipe, mpc_ellipe)
|
| 45 |
+
|
| 46 |
+
from .gammazeta import (catalan_fixed, mpf_catalan,
|
| 47 |
+
khinchin_fixed, mpf_khinchin, glaisher_fixed, mpf_glaisher,
|
| 48 |
+
apery_fixed, mpf_apery, euler_fixed, mpf_euler, mertens_fixed,
|
| 49 |
+
mpf_mertens, twinprime_fixed, mpf_twinprime,
|
| 50 |
+
mpf_bernoulli, bernfrac, mpf_gamma_int,
|
| 51 |
+
mpf_factorial, mpc_factorial, mpf_gamma, mpc_gamma,
|
| 52 |
+
mpf_loggamma, mpc_loggamma, mpf_rgamma, mpc_rgamma,
|
| 53 |
+
mpf_harmonic, mpc_harmonic, mpf_psi0, mpc_psi0,
|
| 54 |
+
mpf_psi, mpc_psi, mpf_zeta_int, mpf_zeta, mpc_zeta,
|
| 55 |
+
mpf_altzeta, mpc_altzeta, mpf_zetasum, mpc_zetasum)
|
| 56 |
+
|
| 57 |
+
from .libmpi import (mpi_str,
|
| 58 |
+
mpi_from_str, mpi_to_str,
|
| 59 |
+
mpi_eq, mpi_ne,
|
| 60 |
+
mpi_lt, mpi_le, mpi_gt, mpi_ge,
|
| 61 |
+
mpi_add, mpi_sub, mpi_delta, mpi_mid,
|
| 62 |
+
mpi_pos, mpi_neg, mpi_abs, mpi_mul, mpi_div, mpi_exp,
|
| 63 |
+
mpi_log, mpi_sqrt, mpi_pow_int, mpi_pow, mpi_cos_sin,
|
| 64 |
+
mpi_cos, mpi_sin, mpi_tan, mpi_cot,
|
| 65 |
+
mpi_atan, mpi_atan2,
|
| 66 |
+
mpci_pos, mpci_neg, mpci_add, mpci_sub, mpci_mul, mpci_div, mpci_pow,
|
| 67 |
+
mpci_abs, mpci_pow, mpci_exp, mpci_log, mpci_cos, mpci_sin,
|
| 68 |
+
mpi_gamma, mpci_gamma, mpi_loggamma, mpci_loggamma,
|
| 69 |
+
mpi_rgamma, mpci_rgamma, mpi_factorial, mpci_factorial)
|
| 70 |
+
|
| 71 |
+
from .libintmath import (trailing, bitcount, numeral, bin_to_radix,
|
| 72 |
+
isqrt, isqrt_small, isqrt_fast, sqrt_fixed, sqrtrem, ifib, ifac,
|
| 73 |
+
list_primes, isprime, moebius, gcd, eulernum, stirling1, stirling2)
|
| 74 |
+
|
| 75 |
+
from .backend import (gmpy, sage, BACKEND, STRICT, MPZ, MPZ_TYPE,
|
| 76 |
+
MPZ_ZERO, MPZ_ONE, MPZ_TWO, MPZ_THREE, MPZ_FIVE, int_types,
|
| 77 |
+
HASH_MODULUS, HASH_BITS)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libelefun.cpython-311.pyc
ADDED
|
Binary file (54.8 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libhyper.cpython-311.pyc
ADDED
|
Binary file (52.9 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libmpc.cpython-311.pyc
ADDED
|
Binary file (43.8 kB). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libelefun.py
ADDED
|
@@ -0,0 +1,1428 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module implements computation of elementary transcendental
|
| 3 |
+
functions (powers, logarithms, trigonometric and hyperbolic
|
| 4 |
+
functions, inverse trigonometric and hyperbolic) for real
|
| 5 |
+
floating-point numbers.
|
| 6 |
+
|
| 7 |
+
For complex and interval implementations of the same functions,
|
| 8 |
+
see libmpc and libmpi.
|
| 9 |
+
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import math
|
| 13 |
+
from bisect import bisect
|
| 14 |
+
|
| 15 |
+
from .backend import xrange
|
| 16 |
+
from .backend import MPZ, MPZ_ZERO, MPZ_ONE, MPZ_TWO, MPZ_FIVE, BACKEND
|
| 17 |
+
|
| 18 |
+
from .libmpf import (
|
| 19 |
+
round_floor, round_ceiling, round_down, round_up,
|
| 20 |
+
round_nearest, round_fast,
|
| 21 |
+
ComplexResult,
|
| 22 |
+
bitcount, bctable, lshift, rshift, giant_steps, sqrt_fixed,
|
| 23 |
+
from_int, to_int, from_man_exp, to_fixed, to_float, from_float,
|
| 24 |
+
from_rational, normalize,
|
| 25 |
+
fzero, fone, fnone, fhalf, finf, fninf, fnan,
|
| 26 |
+
mpf_cmp, mpf_sign, mpf_abs,
|
| 27 |
+
mpf_pos, mpf_neg, mpf_add, mpf_sub, mpf_mul, mpf_div, mpf_shift,
|
| 28 |
+
mpf_rdiv_int, mpf_pow_int, mpf_sqrt,
|
| 29 |
+
reciprocal_rnd, negative_rnd, mpf_perturb,
|
| 30 |
+
isqrt_fast
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
from .libintmath import ifib
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
#-------------------------------------------------------------------------------
|
| 37 |
+
# Tuning parameters
|
| 38 |
+
#-------------------------------------------------------------------------------
|
| 39 |
+
|
| 40 |
+
# Cutoff for computing exp from cosh+sinh. This reduces the
|
| 41 |
+
# number of terms by half, but also requires a square root which
|
| 42 |
+
# is expensive with the pure-Python square root code.
|
| 43 |
+
if BACKEND == 'python':
|
| 44 |
+
EXP_COSH_CUTOFF = 600
|
| 45 |
+
else:
|
| 46 |
+
EXP_COSH_CUTOFF = 400
|
| 47 |
+
# Cutoff for using more than 2 series
|
| 48 |
+
EXP_SERIES_U_CUTOFF = 1500
|
| 49 |
+
|
| 50 |
+
# Also basically determined by sqrt
|
| 51 |
+
if BACKEND == 'python':
|
| 52 |
+
COS_SIN_CACHE_PREC = 400
|
| 53 |
+
else:
|
| 54 |
+
COS_SIN_CACHE_PREC = 200
|
| 55 |
+
COS_SIN_CACHE_STEP = 8
|
| 56 |
+
cos_sin_cache = {}
|
| 57 |
+
|
| 58 |
+
# Number of integer logarithms to cache (for zeta sums)
|
| 59 |
+
MAX_LOG_INT_CACHE = 2000
|
| 60 |
+
log_int_cache = {}
|
| 61 |
+
|
| 62 |
+
LOG_TAYLOR_PREC = 2500 # Use Taylor series with caching up to this prec
|
| 63 |
+
LOG_TAYLOR_SHIFT = 9 # Cache log values in steps of size 2^-N
|
| 64 |
+
log_taylor_cache = {}
|
| 65 |
+
# prec/size ratio of x for fastest convergence in AGM formula
|
| 66 |
+
LOG_AGM_MAG_PREC_RATIO = 20
|
| 67 |
+
|
| 68 |
+
ATAN_TAYLOR_PREC = 3000 # Same as for log
|
| 69 |
+
ATAN_TAYLOR_SHIFT = 7 # steps of size 2^-N
|
| 70 |
+
atan_taylor_cache = {}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# ~= next power of two + 20
|
| 74 |
+
cache_prec_steps = [22,22]
|
| 75 |
+
for k in xrange(1, bitcount(LOG_TAYLOR_PREC)+1):
|
| 76 |
+
cache_prec_steps += [min(2**k,LOG_TAYLOR_PREC)+20] * 2**(k-1)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
#----------------------------------------------------------------------------#
|
| 80 |
+
# #
|
| 81 |
+
# Elementary mathematical constants #
|
| 82 |
+
# #
|
| 83 |
+
#----------------------------------------------------------------------------#
|
| 84 |
+
|
| 85 |
+
def constant_memo(f):
|
| 86 |
+
"""
|
| 87 |
+
Decorator for caching computed values of mathematical
|
| 88 |
+
constants. This decorator should be applied to a
|
| 89 |
+
function taking a single argument prec as input and
|
| 90 |
+
returning a fixed-point value with the given precision.
|
| 91 |
+
"""
|
| 92 |
+
f.memo_prec = -1
|
| 93 |
+
f.memo_val = None
|
| 94 |
+
def g(prec, **kwargs):
|
| 95 |
+
memo_prec = f.memo_prec
|
| 96 |
+
if prec <= memo_prec:
|
| 97 |
+
return f.memo_val >> (memo_prec-prec)
|
| 98 |
+
newprec = int(prec*1.05+10)
|
| 99 |
+
f.memo_val = f(newprec, **kwargs)
|
| 100 |
+
f.memo_prec = newprec
|
| 101 |
+
return f.memo_val >> (newprec-prec)
|
| 102 |
+
g.__name__ = f.__name__
|
| 103 |
+
g.__doc__ = f.__doc__
|
| 104 |
+
return g
|
| 105 |
+
|
| 106 |
+
def def_mpf_constant(fixed):
|
| 107 |
+
"""
|
| 108 |
+
Create a function that computes the mpf value for a mathematical
|
| 109 |
+
constant, given a function that computes the fixed-point value.
|
| 110 |
+
|
| 111 |
+
Assumptions: the constant is positive and has magnitude ~= 1;
|
| 112 |
+
the fixed-point function rounds to floor.
|
| 113 |
+
"""
|
| 114 |
+
def f(prec, rnd=round_fast):
|
| 115 |
+
wp = prec + 20
|
| 116 |
+
v = fixed(wp)
|
| 117 |
+
if rnd in (round_up, round_ceiling):
|
| 118 |
+
v += 1
|
| 119 |
+
return normalize(0, v, -wp, bitcount(v), prec, rnd)
|
| 120 |
+
f.__doc__ = fixed.__doc__
|
| 121 |
+
return f
|
| 122 |
+
|
| 123 |
+
def bsp_acot(q, a, b, hyperbolic):
|
| 124 |
+
if b - a == 1:
|
| 125 |
+
a1 = MPZ(2*a + 3)
|
| 126 |
+
if hyperbolic or a&1:
|
| 127 |
+
return MPZ_ONE, a1 * q**2, a1
|
| 128 |
+
else:
|
| 129 |
+
return -MPZ_ONE, a1 * q**2, a1
|
| 130 |
+
m = (a+b)//2
|
| 131 |
+
p1, q1, r1 = bsp_acot(q, a, m, hyperbolic)
|
| 132 |
+
p2, q2, r2 = bsp_acot(q, m, b, hyperbolic)
|
| 133 |
+
return q2*p1 + r1*p2, q1*q2, r1*r2
|
| 134 |
+
|
| 135 |
+
# the acoth(x) series converges like the geometric series for x^2
|
| 136 |
+
# N = ceil(p*log(2)/(2*log(x)))
|
| 137 |
+
def acot_fixed(a, prec, hyperbolic):
|
| 138 |
+
"""
|
| 139 |
+
Compute acot(a) or acoth(a) for an integer a with binary splitting; see
|
| 140 |
+
http://numbers.computation.free.fr/Constants/Algorithms/splitting.html
|
| 141 |
+
"""
|
| 142 |
+
N = int(0.35 * prec/math.log(a) + 20)
|
| 143 |
+
p, q, r = bsp_acot(a, 0,N, hyperbolic)
|
| 144 |
+
return ((p+q)<<prec)//(q*a)
|
| 145 |
+
|
| 146 |
+
def machin(coefs, prec, hyperbolic=False):
|
| 147 |
+
"""
|
| 148 |
+
Evaluate a Machin-like formula, i.e., a linear combination of
|
| 149 |
+
acot(n) or acoth(n) for specific integer values of n, using fixed-
|
| 150 |
+
point arithmetic. The input should be a list [(c, n), ...], giving
|
| 151 |
+
c*acot[h](n) + ...
|
| 152 |
+
"""
|
| 153 |
+
extraprec = 10
|
| 154 |
+
s = MPZ_ZERO
|
| 155 |
+
for a, b in coefs:
|
| 156 |
+
s += MPZ(a) * acot_fixed(MPZ(b), prec+extraprec, hyperbolic)
|
| 157 |
+
return (s >> extraprec)
|
| 158 |
+
|
| 159 |
+
# Logarithms of integers are needed for various computations involving
|
| 160 |
+
# logarithms, powers, radix conversion, etc
|
| 161 |
+
|
| 162 |
+
@constant_memo
|
| 163 |
+
def ln2_fixed(prec):
|
| 164 |
+
"""
|
| 165 |
+
Computes ln(2). This is done with a hyperbolic Machin-type formula,
|
| 166 |
+
with binary splitting at high precision.
|
| 167 |
+
"""
|
| 168 |
+
return machin([(18, 26), (-2, 4801), (8, 8749)], prec, True)
|
| 169 |
+
|
| 170 |
+
@constant_memo
|
| 171 |
+
def ln10_fixed(prec):
|
| 172 |
+
"""
|
| 173 |
+
Computes ln(10). This is done with a hyperbolic Machin-type formula.
|
| 174 |
+
"""
|
| 175 |
+
return machin([(46, 31), (34, 49), (20, 161)], prec, True)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
r"""
|
| 179 |
+
For computation of pi, we use the Chudnovsky series:
|
| 180 |
+
|
| 181 |
+
oo
|
| 182 |
+
___ k
|
| 183 |
+
1 \ (-1) (6 k)! (A + B k)
|
| 184 |
+
----- = ) -----------------------
|
| 185 |
+
12 pi /___ 3 3k+3/2
|
| 186 |
+
(3 k)! (k!) C
|
| 187 |
+
k = 0
|
| 188 |
+
|
| 189 |
+
where A, B, and C are certain integer constants. This series adds roughly
|
| 190 |
+
14 digits per term. Note that C^(3/2) can be extracted so that the
|
| 191 |
+
series contains only rational terms. This makes binary splitting very
|
| 192 |
+
efficient.
|
| 193 |
+
|
| 194 |
+
The recurrence formulas for the binary splitting were taken from
|
| 195 |
+
ftp://ftp.gmplib.org/pub/src/gmp-chudnovsky.c
|
| 196 |
+
|
| 197 |
+
Previously, Machin's formula was used at low precision and the AGM iteration
|
| 198 |
+
was used at high precision. However, the Chudnovsky series is essentially as
|
| 199 |
+
fast as the Machin formula at low precision and in practice about 3x faster
|
| 200 |
+
than the AGM at high precision (despite theoretically having a worse
|
| 201 |
+
asymptotic complexity), so there is no reason not to use it in all cases.
|
| 202 |
+
|
| 203 |
+
"""
|
| 204 |
+
|
| 205 |
+
# Constants in Chudnovsky's series
|
| 206 |
+
CHUD_A = MPZ(13591409)
|
| 207 |
+
CHUD_B = MPZ(545140134)
|
| 208 |
+
CHUD_C = MPZ(640320)
|
| 209 |
+
CHUD_D = MPZ(12)
|
| 210 |
+
|
| 211 |
+
def bs_chudnovsky(a, b, level, verbose):
|
| 212 |
+
"""
|
| 213 |
+
Computes the sum from a to b of the series in the Chudnovsky
|
| 214 |
+
formula. Returns g, p, q where p/q is the sum as an exact
|
| 215 |
+
fraction and g is a temporary value used to save work
|
| 216 |
+
for recursive calls.
|
| 217 |
+
"""
|
| 218 |
+
if b-a == 1:
|
| 219 |
+
g = MPZ((6*b-5)*(2*b-1)*(6*b-1))
|
| 220 |
+
p = b**3 * CHUD_C**3 // 24
|
| 221 |
+
q = (-1)**b * g * (CHUD_A+CHUD_B*b)
|
| 222 |
+
else:
|
| 223 |
+
if verbose and level < 4:
|
| 224 |
+
print(" binary splitting", a, b)
|
| 225 |
+
mid = (a+b)//2
|
| 226 |
+
g1, p1, q1 = bs_chudnovsky(a, mid, level+1, verbose)
|
| 227 |
+
g2, p2, q2 = bs_chudnovsky(mid, b, level+1, verbose)
|
| 228 |
+
p = p1*p2
|
| 229 |
+
g = g1*g2
|
| 230 |
+
q = q1*p2 + q2*g1
|
| 231 |
+
return g, p, q
|
| 232 |
+
|
| 233 |
+
@constant_memo
|
| 234 |
+
def pi_fixed(prec, verbose=False, verbose_base=None):
|
| 235 |
+
"""
|
| 236 |
+
Compute floor(pi * 2**prec) as a big integer.
|
| 237 |
+
|
| 238 |
+
This is done using Chudnovsky's series (see comments in
|
| 239 |
+
libelefun.py for details).
|
| 240 |
+
"""
|
| 241 |
+
# The Chudnovsky series gives 14.18 digits per term
|
| 242 |
+
N = int(prec/3.3219280948/14.181647462 + 2)
|
| 243 |
+
if verbose:
|
| 244 |
+
print("binary splitting with N =", N)
|
| 245 |
+
g, p, q = bs_chudnovsky(0, N, 0, verbose)
|
| 246 |
+
sqrtC = isqrt_fast(CHUD_C<<(2*prec))
|
| 247 |
+
v = p*CHUD_C*sqrtC//((q+CHUD_A*p)*CHUD_D)
|
| 248 |
+
return v
|
| 249 |
+
|
| 250 |
+
def degree_fixed(prec):
|
| 251 |
+
return pi_fixed(prec)//180
|
| 252 |
+
|
| 253 |
+
def bspe(a, b):
|
| 254 |
+
"""
|
| 255 |
+
Sum series for exp(1)-1 between a, b, returning the result
|
| 256 |
+
as an exact fraction (p, q).
|
| 257 |
+
"""
|
| 258 |
+
if b-a == 1:
|
| 259 |
+
return MPZ_ONE, MPZ(b)
|
| 260 |
+
m = (a+b)//2
|
| 261 |
+
p1, q1 = bspe(a, m)
|
| 262 |
+
p2, q2 = bspe(m, b)
|
| 263 |
+
return p1*q2+p2, q1*q2
|
| 264 |
+
|
| 265 |
+
@constant_memo
|
| 266 |
+
def e_fixed(prec):
|
| 267 |
+
"""
|
| 268 |
+
Computes exp(1). This is done using the ordinary Taylor series for
|
| 269 |
+
exp, with binary splitting. For a description of the algorithm,
|
| 270 |
+
see:
|
| 271 |
+
|
| 272 |
+
http://numbers.computation.free.fr/Constants/
|
| 273 |
+
Algorithms/splitting.html
|
| 274 |
+
"""
|
| 275 |
+
# Slight overestimate of N needed for 1/N! < 2**(-prec)
|
| 276 |
+
# This could be tightened for large N.
|
| 277 |
+
N = int(1.1*prec/math.log(prec) + 20)
|
| 278 |
+
p, q = bspe(0,N)
|
| 279 |
+
return ((p+q)<<prec)//q
|
| 280 |
+
|
| 281 |
+
@constant_memo
|
| 282 |
+
def phi_fixed(prec):
|
| 283 |
+
"""
|
| 284 |
+
Computes the golden ratio, (1+sqrt(5))/2
|
| 285 |
+
"""
|
| 286 |
+
prec += 10
|
| 287 |
+
a = isqrt_fast(MPZ_FIVE<<(2*prec)) + (MPZ_ONE << prec)
|
| 288 |
+
return a >> 11
|
| 289 |
+
|
| 290 |
+
mpf_phi = def_mpf_constant(phi_fixed)
|
| 291 |
+
mpf_pi = def_mpf_constant(pi_fixed)
|
| 292 |
+
mpf_e = def_mpf_constant(e_fixed)
|
| 293 |
+
mpf_degree = def_mpf_constant(degree_fixed)
|
| 294 |
+
mpf_ln2 = def_mpf_constant(ln2_fixed)
|
| 295 |
+
mpf_ln10 = def_mpf_constant(ln10_fixed)
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
@constant_memo
|
| 299 |
+
def ln_sqrt2pi_fixed(prec):
|
| 300 |
+
wp = prec + 10
|
| 301 |
+
# ln(sqrt(2*pi)) = ln(2*pi)/2
|
| 302 |
+
return to_fixed(mpf_log(mpf_shift(mpf_pi(wp), 1), wp), prec-1)
|
| 303 |
+
|
| 304 |
+
@constant_memo
|
| 305 |
+
def sqrtpi_fixed(prec):
|
| 306 |
+
return sqrt_fixed(pi_fixed(prec), prec)
|
| 307 |
+
|
| 308 |
+
mpf_sqrtpi = def_mpf_constant(sqrtpi_fixed)
|
| 309 |
+
mpf_ln_sqrt2pi = def_mpf_constant(ln_sqrt2pi_fixed)
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
#----------------------------------------------------------------------------#
|
| 313 |
+
# #
|
| 314 |
+
# Powers #
|
| 315 |
+
# #
|
| 316 |
+
#----------------------------------------------------------------------------#
|
| 317 |
+
|
| 318 |
+
def mpf_pow(s, t, prec, rnd=round_fast):
|
| 319 |
+
"""
|
| 320 |
+
Compute s**t. Raises ComplexResult if s is negative and t is
|
| 321 |
+
fractional.
|
| 322 |
+
"""
|
| 323 |
+
ssign, sman, sexp, sbc = s
|
| 324 |
+
tsign, tman, texp, tbc = t
|
| 325 |
+
if ssign and texp < 0:
|
| 326 |
+
raise ComplexResult("negative number raised to a fractional power")
|
| 327 |
+
if texp >= 0:
|
| 328 |
+
return mpf_pow_int(s, (-1)**tsign * (tman<<texp), prec, rnd)
|
| 329 |
+
# s**(n/2) = sqrt(s)**n
|
| 330 |
+
if texp == -1:
|
| 331 |
+
if tman == 1:
|
| 332 |
+
if tsign:
|
| 333 |
+
return mpf_div(fone, mpf_sqrt(s, prec+10,
|
| 334 |
+
reciprocal_rnd[rnd]), prec, rnd)
|
| 335 |
+
return mpf_sqrt(s, prec, rnd)
|
| 336 |
+
else:
|
| 337 |
+
if tsign:
|
| 338 |
+
return mpf_pow_int(mpf_sqrt(s, prec+10,
|
| 339 |
+
reciprocal_rnd[rnd]), -tman, prec, rnd)
|
| 340 |
+
return mpf_pow_int(mpf_sqrt(s, prec+10, rnd), tman, prec, rnd)
|
| 341 |
+
# General formula: s**t = exp(t*log(s))
|
| 342 |
+
# TODO: handle rnd direction of the logarithm carefully
|
| 343 |
+
c = mpf_log(s, prec+10, rnd)
|
| 344 |
+
return mpf_exp(mpf_mul(t, c), prec, rnd)
|
| 345 |
+
|
| 346 |
+
def int_pow_fixed(y, n, prec):
|
| 347 |
+
"""n-th power of a fixed point number with precision prec
|
| 348 |
+
|
| 349 |
+
Returns the power in the form man, exp,
|
| 350 |
+
man * 2**exp ~= y**n
|
| 351 |
+
"""
|
| 352 |
+
if n == 2:
|
| 353 |
+
return (y*y), 0
|
| 354 |
+
bc = bitcount(y)
|
| 355 |
+
exp = 0
|
| 356 |
+
workprec = 2 * (prec + 4*bitcount(n) + 4)
|
| 357 |
+
_, pm, pe, pbc = fone
|
| 358 |
+
while 1:
|
| 359 |
+
if n & 1:
|
| 360 |
+
pm = pm*y
|
| 361 |
+
pe = pe+exp
|
| 362 |
+
pbc += bc - 2
|
| 363 |
+
pbc = pbc + bctable[int(pm >> pbc)]
|
| 364 |
+
if pbc > workprec:
|
| 365 |
+
pm = pm >> (pbc-workprec)
|
| 366 |
+
pe += pbc - workprec
|
| 367 |
+
pbc = workprec
|
| 368 |
+
n -= 1
|
| 369 |
+
if not n:
|
| 370 |
+
break
|
| 371 |
+
y = y*y
|
| 372 |
+
exp = exp+exp
|
| 373 |
+
bc = bc + bc - 2
|
| 374 |
+
bc = bc + bctable[int(y >> bc)]
|
| 375 |
+
if bc > workprec:
|
| 376 |
+
y = y >> (bc-workprec)
|
| 377 |
+
exp += bc - workprec
|
| 378 |
+
bc = workprec
|
| 379 |
+
n = n // 2
|
| 380 |
+
return pm, pe
|
| 381 |
+
|
| 382 |
+
# froot(s, n, prec, rnd) computes the real n-th root of a
|
| 383 |
+
# positive mpf tuple s.
|
| 384 |
+
# To compute the root we start from a 50-bit estimate for r
|
| 385 |
+
# generated with ordinary floating-point arithmetic, and then refine
|
| 386 |
+
# the value to full accuracy using the iteration
|
| 387 |
+
|
| 388 |
+
# 1 / y \
|
| 389 |
+
# r = --- | (n-1) * r + ---------- |
|
| 390 |
+
# n+1 n \ n r_n**(n-1) /
|
| 391 |
+
|
| 392 |
+
# which is simply Newton's method applied to the equation r**n = y.
|
| 393 |
+
# With giant_steps(start, prec+extra) = [p0,...,pm, prec+extra]
|
| 394 |
+
# and y = man * 2**-shift one has
|
| 395 |
+
# (man * 2**exp)**(1/n) =
|
| 396 |
+
# y**(1/n) * 2**(start-prec/n) * 2**(p0-start) * ... * 2**(prec+extra-pm) *
|
| 397 |
+
# 2**((exp+shift-(n-1)*prec)/n -extra))
|
| 398 |
+
# The last factor is accounted for in the last line of froot.
|
| 399 |
+
|
| 400 |
+
def nthroot_fixed(y, n, prec, exp1):
|
| 401 |
+
start = 50
|
| 402 |
+
try:
|
| 403 |
+
y1 = rshift(y, prec - n*start)
|
| 404 |
+
r = MPZ(int(y1**(1.0/n)))
|
| 405 |
+
except OverflowError:
|
| 406 |
+
y1 = from_int(y1, start)
|
| 407 |
+
fn = from_int(n)
|
| 408 |
+
fn = mpf_rdiv_int(1, fn, start)
|
| 409 |
+
r = mpf_pow(y1, fn, start)
|
| 410 |
+
r = to_int(r)
|
| 411 |
+
extra = 10
|
| 412 |
+
extra1 = n
|
| 413 |
+
prevp = start
|
| 414 |
+
for p in giant_steps(start, prec+extra):
|
| 415 |
+
pm, pe = int_pow_fixed(r, n-1, prevp)
|
| 416 |
+
r2 = rshift(pm, (n-1)*prevp - p - pe - extra1)
|
| 417 |
+
B = lshift(y, 2*p-prec+extra1)//r2
|
| 418 |
+
r = (B + (n-1) * lshift(r, p-prevp))//n
|
| 419 |
+
prevp = p
|
| 420 |
+
return r
|
| 421 |
+
|
| 422 |
+
def mpf_nthroot(s, n, prec, rnd=round_fast):
|
| 423 |
+
"""nth-root of a positive number
|
| 424 |
+
|
| 425 |
+
Use the Newton method when faster, otherwise use x**(1/n)
|
| 426 |
+
"""
|
| 427 |
+
sign, man, exp, bc = s
|
| 428 |
+
if sign:
|
| 429 |
+
raise ComplexResult("nth root of a negative number")
|
| 430 |
+
if not man:
|
| 431 |
+
if s == fnan:
|
| 432 |
+
return fnan
|
| 433 |
+
if s == fzero:
|
| 434 |
+
if n > 0:
|
| 435 |
+
return fzero
|
| 436 |
+
if n == 0:
|
| 437 |
+
return fone
|
| 438 |
+
return finf
|
| 439 |
+
# Infinity
|
| 440 |
+
if not n:
|
| 441 |
+
return fnan
|
| 442 |
+
if n < 0:
|
| 443 |
+
return fzero
|
| 444 |
+
return finf
|
| 445 |
+
flag_inverse = False
|
| 446 |
+
if n < 2:
|
| 447 |
+
if n == 0:
|
| 448 |
+
return fone
|
| 449 |
+
if n == 1:
|
| 450 |
+
return mpf_pos(s, prec, rnd)
|
| 451 |
+
if n == -1:
|
| 452 |
+
return mpf_div(fone, s, prec, rnd)
|
| 453 |
+
# n < 0
|
| 454 |
+
rnd = reciprocal_rnd[rnd]
|
| 455 |
+
flag_inverse = True
|
| 456 |
+
extra_inverse = 5
|
| 457 |
+
prec += extra_inverse
|
| 458 |
+
n = -n
|
| 459 |
+
if n > 20 and (n >= 20000 or prec < int(233 + 28.3 * n**0.62)):
|
| 460 |
+
prec2 = prec + 10
|
| 461 |
+
fn = from_int(n)
|
| 462 |
+
nth = mpf_rdiv_int(1, fn, prec2)
|
| 463 |
+
r = mpf_pow(s, nth, prec2, rnd)
|
| 464 |
+
s = normalize(r[0], r[1], r[2], r[3], prec, rnd)
|
| 465 |
+
if flag_inverse:
|
| 466 |
+
return mpf_div(fone, s, prec-extra_inverse, rnd)
|
| 467 |
+
else:
|
| 468 |
+
return s
|
| 469 |
+
# Convert to a fixed-point number with prec2 bits.
|
| 470 |
+
prec2 = prec + 2*n - (prec%n)
|
| 471 |
+
# a few tests indicate that
|
| 472 |
+
# for 10 < n < 10**4 a bit more precision is needed
|
| 473 |
+
if n > 10:
|
| 474 |
+
prec2 += prec2//10
|
| 475 |
+
prec2 = prec2 - prec2%n
|
| 476 |
+
# Mantissa may have more bits than we need. Trim it down.
|
| 477 |
+
shift = bc - prec2
|
| 478 |
+
# Adjust exponents to make prec2 and exp+shift multiples of n.
|
| 479 |
+
sign1 = 0
|
| 480 |
+
es = exp+shift
|
| 481 |
+
if es < 0:
|
| 482 |
+
sign1 = 1
|
| 483 |
+
es = -es
|
| 484 |
+
if sign1:
|
| 485 |
+
shift += es%n
|
| 486 |
+
else:
|
| 487 |
+
shift -= es%n
|
| 488 |
+
man = rshift(man, shift)
|
| 489 |
+
extra = 10
|
| 490 |
+
exp1 = ((exp+shift-(n-1)*prec2)//n) - extra
|
| 491 |
+
rnd_shift = 0
|
| 492 |
+
if flag_inverse:
|
| 493 |
+
if rnd == 'u' or rnd == 'c':
|
| 494 |
+
rnd_shift = 1
|
| 495 |
+
else:
|
| 496 |
+
if rnd == 'd' or rnd == 'f':
|
| 497 |
+
rnd_shift = 1
|
| 498 |
+
man = nthroot_fixed(man+rnd_shift, n, prec2, exp1)
|
| 499 |
+
s = from_man_exp(man, exp1, prec, rnd)
|
| 500 |
+
if flag_inverse:
|
| 501 |
+
return mpf_div(fone, s, prec-extra_inverse, rnd)
|
| 502 |
+
else:
|
| 503 |
+
return s
|
| 504 |
+
|
| 505 |
+
def mpf_cbrt(s, prec, rnd=round_fast):
|
| 506 |
+
"""cubic root of a positive number"""
|
| 507 |
+
return mpf_nthroot(s, 3, prec, rnd)
|
| 508 |
+
|
| 509 |
+
#----------------------------------------------------------------------------#
|
| 510 |
+
# #
|
| 511 |
+
# Logarithms #
|
| 512 |
+
# #
|
| 513 |
+
#----------------------------------------------------------------------------#
|
| 514 |
+
|
| 515 |
+
|
| 516 |
+
def log_int_fixed(n, prec, ln2=None):
|
| 517 |
+
"""
|
| 518 |
+
Fast computation of log(n), caching the value for small n,
|
| 519 |
+
intended for zeta sums.
|
| 520 |
+
"""
|
| 521 |
+
if n in log_int_cache:
|
| 522 |
+
value, vprec = log_int_cache[n]
|
| 523 |
+
if vprec >= prec:
|
| 524 |
+
return value >> (vprec - prec)
|
| 525 |
+
wp = prec + 10
|
| 526 |
+
if wp <= LOG_TAYLOR_SHIFT:
|
| 527 |
+
if ln2 is None:
|
| 528 |
+
ln2 = ln2_fixed(wp)
|
| 529 |
+
r = bitcount(n)
|
| 530 |
+
x = n << (wp-r)
|
| 531 |
+
v = log_taylor_cached(x, wp) + r*ln2
|
| 532 |
+
else:
|
| 533 |
+
v = to_fixed(mpf_log(from_int(n), wp+5), wp)
|
| 534 |
+
if n < MAX_LOG_INT_CACHE:
|
| 535 |
+
log_int_cache[n] = (v, wp)
|
| 536 |
+
return v >> (wp-prec)
|
| 537 |
+
|
| 538 |
+
def agm_fixed(a, b, prec):
|
| 539 |
+
"""
|
| 540 |
+
Fixed-point computation of agm(a,b), assuming
|
| 541 |
+
a, b both close to unit magnitude.
|
| 542 |
+
"""
|
| 543 |
+
i = 0
|
| 544 |
+
while 1:
|
| 545 |
+
anew = (a+b)>>1
|
| 546 |
+
if i > 4 and abs(a-anew) < 8:
|
| 547 |
+
return a
|
| 548 |
+
b = isqrt_fast(a*b)
|
| 549 |
+
a = anew
|
| 550 |
+
i += 1
|
| 551 |
+
return a
|
| 552 |
+
|
| 553 |
+
def log_agm(x, prec):
|
| 554 |
+
"""
|
| 555 |
+
Fixed-point computation of -log(x) = log(1/x), suitable
|
| 556 |
+
for large precision. It is required that 0 < x < 1. The
|
| 557 |
+
algorithm used is the Sasaki-Kanada formula
|
| 558 |
+
|
| 559 |
+
-log(x) = pi/agm(theta2(x)^2,theta3(x)^2). [1]
|
| 560 |
+
|
| 561 |
+
For faster convergence in the theta functions, x should
|
| 562 |
+
be chosen closer to 0.
|
| 563 |
+
|
| 564 |
+
Guard bits must be added by the caller.
|
| 565 |
+
|
| 566 |
+
HYPOTHESIS: if x = 2^(-n), n bits need to be added to
|
| 567 |
+
account for the truncation to a fixed-point number,
|
| 568 |
+
and this is the only significant cancellation error.
|
| 569 |
+
|
| 570 |
+
The number of bits lost to roundoff is small and can be
|
| 571 |
+
considered constant.
|
| 572 |
+
|
| 573 |
+
[1] Richard P. Brent, "Fast Algorithms for High-Precision
|
| 574 |
+
Computation of Elementary Functions (extended abstract)",
|
| 575 |
+
http://wwwmaths.anu.edu.au/~brent/pd/RNC7-Brent.pdf
|
| 576 |
+
|
| 577 |
+
"""
|
| 578 |
+
x2 = (x*x) >> prec
|
| 579 |
+
# Compute jtheta2(x)**2
|
| 580 |
+
s = a = b = x2
|
| 581 |
+
while a:
|
| 582 |
+
b = (b*x2) >> prec
|
| 583 |
+
a = (a*b) >> prec
|
| 584 |
+
s += a
|
| 585 |
+
s += (MPZ_ONE<<prec)
|
| 586 |
+
s = (s*s)>>(prec-2)
|
| 587 |
+
s = (s*isqrt_fast(x<<prec))>>prec
|
| 588 |
+
# Compute jtheta3(x)**2
|
| 589 |
+
t = a = b = x
|
| 590 |
+
while a:
|
| 591 |
+
b = (b*x2) >> prec
|
| 592 |
+
a = (a*b) >> prec
|
| 593 |
+
t += a
|
| 594 |
+
t = (MPZ_ONE<<prec) + (t<<1)
|
| 595 |
+
t = (t*t)>>prec
|
| 596 |
+
# Final formula
|
| 597 |
+
p = agm_fixed(s, t, prec)
|
| 598 |
+
return (pi_fixed(prec) << prec) // p
|
| 599 |
+
|
| 600 |
+
def log_taylor(x, prec, r=0):
|
| 601 |
+
"""
|
| 602 |
+
Fixed-point calculation of log(x). It is assumed that x is close
|
| 603 |
+
enough to 1 for the Taylor series to converge quickly. Convergence
|
| 604 |
+
can be improved by specifying r > 0 to compute
|
| 605 |
+
log(x^(1/2^r))*2^r, at the cost of performing r square roots.
|
| 606 |
+
|
| 607 |
+
The caller must provide sufficient guard bits.
|
| 608 |
+
"""
|
| 609 |
+
for i in xrange(r):
|
| 610 |
+
x = isqrt_fast(x<<prec)
|
| 611 |
+
one = MPZ_ONE << prec
|
| 612 |
+
v = ((x-one)<<prec)//(x+one)
|
| 613 |
+
sign = v < 0
|
| 614 |
+
if sign:
|
| 615 |
+
v = -v
|
| 616 |
+
v2 = (v*v) >> prec
|
| 617 |
+
v4 = (v2*v2) >> prec
|
| 618 |
+
s0 = v
|
| 619 |
+
s1 = v//3
|
| 620 |
+
v = (v*v4) >> prec
|
| 621 |
+
k = 5
|
| 622 |
+
while v:
|
| 623 |
+
s0 += v // k
|
| 624 |
+
k += 2
|
| 625 |
+
s1 += v // k
|
| 626 |
+
v = (v*v4) >> prec
|
| 627 |
+
k += 2
|
| 628 |
+
s1 = (s1*v2) >> prec
|
| 629 |
+
s = (s0+s1) << (1+r)
|
| 630 |
+
if sign:
|
| 631 |
+
return -s
|
| 632 |
+
return s
|
| 633 |
+
|
| 634 |
+
def log_taylor_cached(x, prec):
|
| 635 |
+
"""
|
| 636 |
+
Fixed-point computation of log(x), assuming x in (0.5, 2)
|
| 637 |
+
and prec <= LOG_TAYLOR_PREC.
|
| 638 |
+
"""
|
| 639 |
+
n = x >> (prec-LOG_TAYLOR_SHIFT)
|
| 640 |
+
cached_prec = cache_prec_steps[prec]
|
| 641 |
+
dprec = cached_prec - prec
|
| 642 |
+
if (n, cached_prec) in log_taylor_cache:
|
| 643 |
+
a, log_a = log_taylor_cache[n, cached_prec]
|
| 644 |
+
else:
|
| 645 |
+
a = n << (cached_prec - LOG_TAYLOR_SHIFT)
|
| 646 |
+
log_a = log_taylor(a, cached_prec, 8)
|
| 647 |
+
log_taylor_cache[n, cached_prec] = (a, log_a)
|
| 648 |
+
a >>= dprec
|
| 649 |
+
log_a >>= dprec
|
| 650 |
+
u = ((x - a) << prec) // a
|
| 651 |
+
v = (u << prec) // ((MPZ_TWO << prec) + u)
|
| 652 |
+
v2 = (v*v) >> prec
|
| 653 |
+
v4 = (v2*v2) >> prec
|
| 654 |
+
s0 = v
|
| 655 |
+
s1 = v//3
|
| 656 |
+
v = (v*v4) >> prec
|
| 657 |
+
k = 5
|
| 658 |
+
while v:
|
| 659 |
+
s0 += v//k
|
| 660 |
+
k += 2
|
| 661 |
+
s1 += v//k
|
| 662 |
+
v = (v*v4) >> prec
|
| 663 |
+
k += 2
|
| 664 |
+
s1 = (s1*v2) >> prec
|
| 665 |
+
s = (s0+s1) << 1
|
| 666 |
+
return log_a + s
|
| 667 |
+
|
| 668 |
+
def mpf_log(x, prec, rnd=round_fast):
|
| 669 |
+
"""
|
| 670 |
+
Compute the natural logarithm of the mpf value x. If x is negative,
|
| 671 |
+
ComplexResult is raised.
|
| 672 |
+
"""
|
| 673 |
+
sign, man, exp, bc = x
|
| 674 |
+
#------------------------------------------------------------------
|
| 675 |
+
# Handle special values
|
| 676 |
+
if not man:
|
| 677 |
+
if x == fzero: return fninf
|
| 678 |
+
if x == finf: return finf
|
| 679 |
+
if x == fnan: return fnan
|
| 680 |
+
if sign:
|
| 681 |
+
raise ComplexResult("logarithm of a negative number")
|
| 682 |
+
wp = prec + 20
|
| 683 |
+
#------------------------------------------------------------------
|
| 684 |
+
# Handle log(2^n) = log(n)*2.
|
| 685 |
+
# Here we catch the only possible exact value, log(1) = 0
|
| 686 |
+
if man == 1:
|
| 687 |
+
if not exp:
|
| 688 |
+
return fzero
|
| 689 |
+
return from_man_exp(exp*ln2_fixed(wp), -wp, prec, rnd)
|
| 690 |
+
mag = exp+bc
|
| 691 |
+
abs_mag = abs(mag)
|
| 692 |
+
#------------------------------------------------------------------
|
| 693 |
+
# Handle x = 1+eps, where log(x) ~ x. We need to check for
|
| 694 |
+
# cancellation when moving to fixed-point math and compensate
|
| 695 |
+
# by increasing the precision. Note that abs_mag in (0, 1) <=>
|
| 696 |
+
# 0.5 < x < 2 and x != 1
|
| 697 |
+
if abs_mag <= 1:
|
| 698 |
+
# Calculate t = x-1 to measure distance from 1 in bits
|
| 699 |
+
tsign = 1-abs_mag
|
| 700 |
+
if tsign:
|
| 701 |
+
tman = (MPZ_ONE<<bc) - man
|
| 702 |
+
else:
|
| 703 |
+
tman = man - (MPZ_ONE<<(bc-1))
|
| 704 |
+
tbc = bitcount(tman)
|
| 705 |
+
cancellation = bc - tbc
|
| 706 |
+
if cancellation > wp:
|
| 707 |
+
t = normalize(tsign, tman, abs_mag-bc, tbc, tbc, 'n')
|
| 708 |
+
return mpf_perturb(t, tsign, prec, rnd)
|
| 709 |
+
else:
|
| 710 |
+
wp += cancellation
|
| 711 |
+
# TODO: if close enough to 1, we could use Taylor series
|
| 712 |
+
# even in the AGM precision range, since the Taylor series
|
| 713 |
+
# converges rapidly
|
| 714 |
+
#------------------------------------------------------------------
|
| 715 |
+
# Another special case:
|
| 716 |
+
# n*log(2) is a good enough approximation
|
| 717 |
+
if abs_mag > 10000:
|
| 718 |
+
if bitcount(abs_mag) > wp:
|
| 719 |
+
return from_man_exp(exp*ln2_fixed(wp), -wp, prec, rnd)
|
| 720 |
+
#------------------------------------------------------------------
|
| 721 |
+
# General case.
|
| 722 |
+
# Perform argument reduction using log(x) = log(x*2^n) - n*log(2):
|
| 723 |
+
# If we are in the Taylor precision range, choose magnitude 0 or 1.
|
| 724 |
+
# If we are in the AGM precision range, choose magnitude -m for
|
| 725 |
+
# some large m; benchmarking on one machine showed m = prec/20 to be
|
| 726 |
+
# optimal between 1000 and 100,000 digits.
|
| 727 |
+
if wp <= LOG_TAYLOR_PREC:
|
| 728 |
+
m = log_taylor_cached(lshift(man, wp-bc), wp)
|
| 729 |
+
if mag:
|
| 730 |
+
m += mag*ln2_fixed(wp)
|
| 731 |
+
else:
|
| 732 |
+
optimal_mag = -wp//LOG_AGM_MAG_PREC_RATIO
|
| 733 |
+
n = optimal_mag - mag
|
| 734 |
+
x = mpf_shift(x, n)
|
| 735 |
+
wp += (-optimal_mag)
|
| 736 |
+
m = -log_agm(to_fixed(x, wp), wp)
|
| 737 |
+
m -= n*ln2_fixed(wp)
|
| 738 |
+
return from_man_exp(m, -wp, prec, rnd)
|
| 739 |
+
|
| 740 |
+
def mpf_log_hypot(a, b, prec, rnd):
|
| 741 |
+
"""
|
| 742 |
+
Computes log(sqrt(a^2+b^2)) accurately.
|
| 743 |
+
"""
|
| 744 |
+
# If either a or b is inf/nan/0, assume it to be a
|
| 745 |
+
if not b[1]:
|
| 746 |
+
a, b = b, a
|
| 747 |
+
# a is inf/nan/0
|
| 748 |
+
if not a[1]:
|
| 749 |
+
# both are inf/nan/0
|
| 750 |
+
if not b[1]:
|
| 751 |
+
if a == b == fzero:
|
| 752 |
+
return fninf
|
| 753 |
+
if fnan in (a, b):
|
| 754 |
+
return fnan
|
| 755 |
+
# at least one term is (+/- inf)^2
|
| 756 |
+
return finf
|
| 757 |
+
# only a is inf/nan/0
|
| 758 |
+
if a == fzero:
|
| 759 |
+
# log(sqrt(0+b^2)) = log(|b|)
|
| 760 |
+
return mpf_log(mpf_abs(b), prec, rnd)
|
| 761 |
+
if a == fnan:
|
| 762 |
+
return fnan
|
| 763 |
+
return finf
|
| 764 |
+
# Exact
|
| 765 |
+
a2 = mpf_mul(a,a)
|
| 766 |
+
b2 = mpf_mul(b,b)
|
| 767 |
+
extra = 20
|
| 768 |
+
# Not exact
|
| 769 |
+
h2 = mpf_add(a2, b2, prec+extra)
|
| 770 |
+
cancelled = mpf_add(h2, fnone, 10)
|
| 771 |
+
mag_cancelled = cancelled[2]+cancelled[3]
|
| 772 |
+
# Just redo the sum exactly if necessary (could be smarter
|
| 773 |
+
# and avoid memory allocation when a or b is precisely 1
|
| 774 |
+
# and the other is tiny...)
|
| 775 |
+
if cancelled == fzero or mag_cancelled < -extra//2:
|
| 776 |
+
h2 = mpf_add(a2, b2, prec+extra-min(a2[2],b2[2]))
|
| 777 |
+
return mpf_shift(mpf_log(h2, prec, rnd), -1)
|
| 778 |
+
|
| 779 |
+
|
| 780 |
+
#----------------------------------------------------------------------
|
| 781 |
+
# Inverse tangent
|
| 782 |
+
#
|
| 783 |
+
|
| 784 |
+
def atan_newton(x, prec):
|
| 785 |
+
if prec >= 100:
|
| 786 |
+
r = math.atan(int((x>>(prec-53)))/2.0**53)
|
| 787 |
+
else:
|
| 788 |
+
r = math.atan(int(x)/2.0**prec)
|
| 789 |
+
prevp = 50
|
| 790 |
+
r = MPZ(int(r * 2.0**53) >> (53-prevp))
|
| 791 |
+
extra_p = 50
|
| 792 |
+
for wp in giant_steps(prevp, prec):
|
| 793 |
+
wp += extra_p
|
| 794 |
+
r = r << (wp-prevp)
|
| 795 |
+
cos, sin = cos_sin_fixed(r, wp)
|
| 796 |
+
tan = (sin << wp) // cos
|
| 797 |
+
a = ((tan-rshift(x, prec-wp)) << wp) // ((MPZ_ONE<<wp) + ((tan**2)>>wp))
|
| 798 |
+
r = r - a
|
| 799 |
+
prevp = wp
|
| 800 |
+
return rshift(r, prevp-prec)
|
| 801 |
+
|
| 802 |
+
def atan_taylor_get_cached(n, prec):
|
| 803 |
+
# Taylor series with caching wins up to huge precisions
|
| 804 |
+
# To avoid unnecessary precomputation at low precision, we
|
| 805 |
+
# do it in steps
|
| 806 |
+
# Round to next power of 2
|
| 807 |
+
prec2 = (1<<(bitcount(prec-1))) + 20
|
| 808 |
+
dprec = prec2 - prec
|
| 809 |
+
if (n, prec2) in atan_taylor_cache:
|
| 810 |
+
a, atan_a = atan_taylor_cache[n, prec2]
|
| 811 |
+
else:
|
| 812 |
+
a = n << (prec2 - ATAN_TAYLOR_SHIFT)
|
| 813 |
+
atan_a = atan_newton(a, prec2)
|
| 814 |
+
atan_taylor_cache[n, prec2] = (a, atan_a)
|
| 815 |
+
return (a >> dprec), (atan_a >> dprec)
|
| 816 |
+
|
| 817 |
+
def atan_taylor(x, prec):
|
| 818 |
+
n = (x >> (prec-ATAN_TAYLOR_SHIFT))
|
| 819 |
+
a, atan_a = atan_taylor_get_cached(n, prec)
|
| 820 |
+
d = x - a
|
| 821 |
+
s0 = v = (d << prec) // ((a**2 >> prec) + (a*d >> prec) + (MPZ_ONE << prec))
|
| 822 |
+
v2 = (v**2 >> prec)
|
| 823 |
+
v4 = (v2 * v2) >> prec
|
| 824 |
+
s1 = v//3
|
| 825 |
+
v = (v * v4) >> prec
|
| 826 |
+
k = 5
|
| 827 |
+
while v:
|
| 828 |
+
s0 += v // k
|
| 829 |
+
k += 2
|
| 830 |
+
s1 += v // k
|
| 831 |
+
v = (v * v4) >> prec
|
| 832 |
+
k += 2
|
| 833 |
+
s1 = (s1 * v2) >> prec
|
| 834 |
+
s = s0 - s1
|
| 835 |
+
return atan_a + s
|
| 836 |
+
|
| 837 |
+
def atan_inf(sign, prec, rnd):
|
| 838 |
+
if not sign:
|
| 839 |
+
return mpf_shift(mpf_pi(prec, rnd), -1)
|
| 840 |
+
return mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
|
| 841 |
+
|
| 842 |
+
def mpf_atan(x, prec, rnd=round_fast):
|
| 843 |
+
sign, man, exp, bc = x
|
| 844 |
+
if not man:
|
| 845 |
+
if x == fzero: return fzero
|
| 846 |
+
if x == finf: return atan_inf(0, prec, rnd)
|
| 847 |
+
if x == fninf: return atan_inf(1, prec, rnd)
|
| 848 |
+
return fnan
|
| 849 |
+
mag = exp + bc
|
| 850 |
+
# Essentially infinity
|
| 851 |
+
if mag > prec+20:
|
| 852 |
+
return atan_inf(sign, prec, rnd)
|
| 853 |
+
# Essentially ~ x
|
| 854 |
+
if -mag > prec+20:
|
| 855 |
+
return mpf_perturb(x, 1-sign, prec, rnd)
|
| 856 |
+
wp = prec + 30 + abs(mag)
|
| 857 |
+
# For large x, use atan(x) = pi/2 - atan(1/x)
|
| 858 |
+
if mag >= 2:
|
| 859 |
+
x = mpf_rdiv_int(1, x, wp)
|
| 860 |
+
reciprocal = True
|
| 861 |
+
else:
|
| 862 |
+
reciprocal = False
|
| 863 |
+
t = to_fixed(x, wp)
|
| 864 |
+
if sign:
|
| 865 |
+
t = -t
|
| 866 |
+
if wp < ATAN_TAYLOR_PREC:
|
| 867 |
+
a = atan_taylor(t, wp)
|
| 868 |
+
else:
|
| 869 |
+
a = atan_newton(t, wp)
|
| 870 |
+
if reciprocal:
|
| 871 |
+
a = ((pi_fixed(wp)>>1)+1) - a
|
| 872 |
+
if sign:
|
| 873 |
+
a = -a
|
| 874 |
+
return from_man_exp(a, -wp, prec, rnd)
|
| 875 |
+
|
| 876 |
+
# TODO: cleanup the special cases
|
| 877 |
+
def mpf_atan2(y, x, prec, rnd=round_fast):
|
| 878 |
+
xsign, xman, xexp, xbc = x
|
| 879 |
+
ysign, yman, yexp, ybc = y
|
| 880 |
+
if not yman:
|
| 881 |
+
if y == fzero and x != fnan:
|
| 882 |
+
if mpf_sign(x) >= 0:
|
| 883 |
+
return fzero
|
| 884 |
+
return mpf_pi(prec, rnd)
|
| 885 |
+
if y in (finf, fninf):
|
| 886 |
+
if x in (finf, fninf):
|
| 887 |
+
return fnan
|
| 888 |
+
# pi/2
|
| 889 |
+
if y == finf:
|
| 890 |
+
return mpf_shift(mpf_pi(prec, rnd), -1)
|
| 891 |
+
# -pi/2
|
| 892 |
+
return mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
|
| 893 |
+
return fnan
|
| 894 |
+
if ysign:
|
| 895 |
+
return mpf_neg(mpf_atan2(mpf_neg(y), x, prec, negative_rnd[rnd]))
|
| 896 |
+
if not xman:
|
| 897 |
+
if x == fnan:
|
| 898 |
+
return fnan
|
| 899 |
+
if x == finf:
|
| 900 |
+
return fzero
|
| 901 |
+
if x == fninf:
|
| 902 |
+
return mpf_pi(prec, rnd)
|
| 903 |
+
if y == fzero:
|
| 904 |
+
return fzero
|
| 905 |
+
return mpf_shift(mpf_pi(prec, rnd), -1)
|
| 906 |
+
tquo = mpf_atan(mpf_div(y, x, prec+4), prec+4)
|
| 907 |
+
if xsign:
|
| 908 |
+
return mpf_add(mpf_pi(prec+4), tquo, prec, rnd)
|
| 909 |
+
else:
|
| 910 |
+
return mpf_pos(tquo, prec, rnd)
|
| 911 |
+
|
| 912 |
+
def mpf_asin(x, prec, rnd=round_fast):
|
| 913 |
+
sign, man, exp, bc = x
|
| 914 |
+
if bc+exp > 0 and x not in (fone, fnone):
|
| 915 |
+
raise ComplexResult("asin(x) is real only for -1 <= x <= 1")
|
| 916 |
+
# asin(x) = 2*atan(x/(1+sqrt(1-x**2)))
|
| 917 |
+
wp = prec + 15
|
| 918 |
+
a = mpf_mul(x, x)
|
| 919 |
+
b = mpf_add(fone, mpf_sqrt(mpf_sub(fone, a, wp), wp), wp)
|
| 920 |
+
c = mpf_div(x, b, wp)
|
| 921 |
+
return mpf_shift(mpf_atan(c, prec, rnd), 1)
|
| 922 |
+
|
| 923 |
+
def mpf_acos(x, prec, rnd=round_fast):
|
| 924 |
+
# acos(x) = 2*atan(sqrt(1-x**2)/(1+x))
|
| 925 |
+
sign, man, exp, bc = x
|
| 926 |
+
if bc + exp > 0:
|
| 927 |
+
if x not in (fone, fnone):
|
| 928 |
+
raise ComplexResult("acos(x) is real only for -1 <= x <= 1")
|
| 929 |
+
if x == fnone:
|
| 930 |
+
return mpf_pi(prec, rnd)
|
| 931 |
+
wp = prec + 15
|
| 932 |
+
a = mpf_mul(x, x)
|
| 933 |
+
b = mpf_sqrt(mpf_sub(fone, a, wp), wp)
|
| 934 |
+
c = mpf_div(b, mpf_add(fone, x, wp), wp)
|
| 935 |
+
return mpf_shift(mpf_atan(c, prec, rnd), 1)
|
| 936 |
+
|
| 937 |
+
def mpf_asinh(x, prec, rnd=round_fast):
|
| 938 |
+
wp = prec + 20
|
| 939 |
+
sign, man, exp, bc = x
|
| 940 |
+
mag = exp+bc
|
| 941 |
+
if mag < -8:
|
| 942 |
+
if mag < -wp:
|
| 943 |
+
return mpf_perturb(x, 1-sign, prec, rnd)
|
| 944 |
+
wp += (-mag)
|
| 945 |
+
# asinh(x) = log(x+sqrt(x**2+1))
|
| 946 |
+
# use reflection symmetry to avoid cancellation
|
| 947 |
+
q = mpf_sqrt(mpf_add(mpf_mul(x, x), fone, wp), wp)
|
| 948 |
+
q = mpf_add(mpf_abs(x), q, wp)
|
| 949 |
+
if sign:
|
| 950 |
+
return mpf_neg(mpf_log(q, prec, negative_rnd[rnd]))
|
| 951 |
+
else:
|
| 952 |
+
return mpf_log(q, prec, rnd)
|
| 953 |
+
|
| 954 |
+
def mpf_acosh(x, prec, rnd=round_fast):
|
| 955 |
+
# acosh(x) = log(x+sqrt(x**2-1))
|
| 956 |
+
wp = prec + 15
|
| 957 |
+
if mpf_cmp(x, fone) == -1:
|
| 958 |
+
raise ComplexResult("acosh(x) is real only for x >= 1")
|
| 959 |
+
q = mpf_sqrt(mpf_add(mpf_mul(x,x), fnone, wp), wp)
|
| 960 |
+
return mpf_log(mpf_add(x, q, wp), prec, rnd)
|
| 961 |
+
|
| 962 |
+
def mpf_atanh(x, prec, rnd=round_fast):
|
| 963 |
+
# atanh(x) = log((1+x)/(1-x))/2
|
| 964 |
+
sign, man, exp, bc = x
|
| 965 |
+
if (not man) and exp:
|
| 966 |
+
if x in (fzero, fnan):
|
| 967 |
+
return x
|
| 968 |
+
raise ComplexResult("atanh(x) is real only for -1 <= x <= 1")
|
| 969 |
+
mag = bc + exp
|
| 970 |
+
if mag > 0:
|
| 971 |
+
if mag == 1 and man == 1:
|
| 972 |
+
return [finf, fninf][sign]
|
| 973 |
+
raise ComplexResult("atanh(x) is real only for -1 <= x <= 1")
|
| 974 |
+
wp = prec + 15
|
| 975 |
+
if mag < -8:
|
| 976 |
+
if mag < -wp:
|
| 977 |
+
return mpf_perturb(x, sign, prec, rnd)
|
| 978 |
+
wp += (-mag)
|
| 979 |
+
a = mpf_add(x, fone, wp)
|
| 980 |
+
b = mpf_sub(fone, x, wp)
|
| 981 |
+
return mpf_shift(mpf_log(mpf_div(a, b, wp), prec, rnd), -1)
|
| 982 |
+
|
| 983 |
+
def mpf_fibonacci(x, prec, rnd=round_fast):
|
| 984 |
+
sign, man, exp, bc = x
|
| 985 |
+
if not man:
|
| 986 |
+
if x == fninf:
|
| 987 |
+
return fnan
|
| 988 |
+
return x
|
| 989 |
+
# F(2^n) ~= 2^(2^n)
|
| 990 |
+
size = abs(exp+bc)
|
| 991 |
+
if exp >= 0:
|
| 992 |
+
# Exact
|
| 993 |
+
if size < 10 or size <= bitcount(prec):
|
| 994 |
+
return from_int(ifib(to_int(x)), prec, rnd)
|
| 995 |
+
# Use the modified Binet formula
|
| 996 |
+
wp = prec + size + 20
|
| 997 |
+
a = mpf_phi(wp)
|
| 998 |
+
b = mpf_add(mpf_shift(a, 1), fnone, wp)
|
| 999 |
+
u = mpf_pow(a, x, wp)
|
| 1000 |
+
v = mpf_cos_pi(x, wp)
|
| 1001 |
+
v = mpf_div(v, u, wp)
|
| 1002 |
+
u = mpf_sub(u, v, wp)
|
| 1003 |
+
u = mpf_div(u, b, prec, rnd)
|
| 1004 |
+
return u
|
| 1005 |
+
|
| 1006 |
+
|
| 1007 |
+
#-------------------------------------------------------------------------------
|
| 1008 |
+
# Exponential-type functions
|
| 1009 |
+
#-------------------------------------------------------------------------------
|
| 1010 |
+
|
| 1011 |
+
def exponential_series(x, prec, type=0):
|
| 1012 |
+
"""
|
| 1013 |
+
Taylor series for cosh/sinh or cos/sin.
|
| 1014 |
+
|
| 1015 |
+
type = 0 -- returns exp(x) (slightly faster than cosh+sinh)
|
| 1016 |
+
type = 1 -- returns (cosh(x), sinh(x))
|
| 1017 |
+
type = 2 -- returns (cos(x), sin(x))
|
| 1018 |
+
"""
|
| 1019 |
+
if x < 0:
|
| 1020 |
+
x = -x
|
| 1021 |
+
sign = 1
|
| 1022 |
+
else:
|
| 1023 |
+
sign = 0
|
| 1024 |
+
r = int(0.5*prec**0.5)
|
| 1025 |
+
xmag = bitcount(x) - prec
|
| 1026 |
+
r = max(0, xmag + r)
|
| 1027 |
+
extra = 10 + 2*max(r,-xmag)
|
| 1028 |
+
wp = prec + extra
|
| 1029 |
+
x <<= (extra - r)
|
| 1030 |
+
one = MPZ_ONE << wp
|
| 1031 |
+
alt = (type == 2)
|
| 1032 |
+
if prec < EXP_SERIES_U_CUTOFF:
|
| 1033 |
+
x2 = a = (x*x) >> wp
|
| 1034 |
+
x4 = (x2*x2) >> wp
|
| 1035 |
+
s0 = s1 = MPZ_ZERO
|
| 1036 |
+
k = 2
|
| 1037 |
+
while a:
|
| 1038 |
+
a //= (k-1)*k; s0 += a; k += 2
|
| 1039 |
+
a //= (k-1)*k; s1 += a; k += 2
|
| 1040 |
+
a = (a*x4) >> wp
|
| 1041 |
+
s1 = (x2*s1) >> wp
|
| 1042 |
+
if alt:
|
| 1043 |
+
c = s1 - s0 + one
|
| 1044 |
+
else:
|
| 1045 |
+
c = s1 + s0 + one
|
| 1046 |
+
else:
|
| 1047 |
+
u = int(0.3*prec**0.35)
|
| 1048 |
+
x2 = a = (x*x) >> wp
|
| 1049 |
+
xpowers = [one, x2]
|
| 1050 |
+
for i in xrange(1, u):
|
| 1051 |
+
xpowers.append((xpowers[-1]*x2)>>wp)
|
| 1052 |
+
sums = [MPZ_ZERO] * u
|
| 1053 |
+
k = 2
|
| 1054 |
+
while a:
|
| 1055 |
+
for i in xrange(u):
|
| 1056 |
+
a //= (k-1)*k
|
| 1057 |
+
if alt and k & 2: sums[i] -= a
|
| 1058 |
+
else: sums[i] += a
|
| 1059 |
+
k += 2
|
| 1060 |
+
a = (a*xpowers[-1]) >> wp
|
| 1061 |
+
for i in xrange(1, u):
|
| 1062 |
+
sums[i] = (sums[i]*xpowers[i]) >> wp
|
| 1063 |
+
c = sum(sums) + one
|
| 1064 |
+
if type == 0:
|
| 1065 |
+
s = isqrt_fast(c*c - (one<<wp))
|
| 1066 |
+
if sign:
|
| 1067 |
+
v = c - s
|
| 1068 |
+
else:
|
| 1069 |
+
v = c + s
|
| 1070 |
+
for i in xrange(r):
|
| 1071 |
+
v = (v*v) >> wp
|
| 1072 |
+
return v >> extra
|
| 1073 |
+
else:
|
| 1074 |
+
# Repeatedly apply the double-angle formula
|
| 1075 |
+
# cosh(2*x) = 2*cosh(x)^2 - 1
|
| 1076 |
+
# cos(2*x) = 2*cos(x)^2 - 1
|
| 1077 |
+
pshift = wp-1
|
| 1078 |
+
for i in xrange(r):
|
| 1079 |
+
c = ((c*c) >> pshift) - one
|
| 1080 |
+
# With the abs, this is the same for sinh and sin
|
| 1081 |
+
s = isqrt_fast(abs((one<<wp) - c*c))
|
| 1082 |
+
if sign:
|
| 1083 |
+
s = -s
|
| 1084 |
+
return (c>>extra), (s>>extra)
|
| 1085 |
+
|
| 1086 |
+
def exp_basecase(x, prec):
|
| 1087 |
+
"""
|
| 1088 |
+
Compute exp(x) as a fixed-point number. Works for any x,
|
| 1089 |
+
but for speed should have |x| < 1. For an arbitrary number,
|
| 1090 |
+
use exp(x) = exp(x-m*log(2)) * 2^m where m = floor(x/log(2)).
|
| 1091 |
+
"""
|
| 1092 |
+
if prec > EXP_COSH_CUTOFF:
|
| 1093 |
+
return exponential_series(x, prec, 0)
|
| 1094 |
+
r = int(prec**0.5)
|
| 1095 |
+
prec += r
|
| 1096 |
+
s0 = s1 = (MPZ_ONE << prec)
|
| 1097 |
+
k = 2
|
| 1098 |
+
a = x2 = (x*x) >> prec
|
| 1099 |
+
while a:
|
| 1100 |
+
a //= k; s0 += a; k += 1
|
| 1101 |
+
a //= k; s1 += a; k += 1
|
| 1102 |
+
a = (a*x2) >> prec
|
| 1103 |
+
s1 = (s1*x) >> prec
|
| 1104 |
+
s = s0 + s1
|
| 1105 |
+
u = r
|
| 1106 |
+
while r:
|
| 1107 |
+
s = (s*s) >> prec
|
| 1108 |
+
r -= 1
|
| 1109 |
+
return s >> u
|
| 1110 |
+
|
| 1111 |
+
def exp_expneg_basecase(x, prec):
|
| 1112 |
+
"""
|
| 1113 |
+
Computation of exp(x), exp(-x)
|
| 1114 |
+
"""
|
| 1115 |
+
if prec > EXP_COSH_CUTOFF:
|
| 1116 |
+
cosh, sinh = exponential_series(x, prec, 1)
|
| 1117 |
+
return cosh+sinh, cosh-sinh
|
| 1118 |
+
a = exp_basecase(x, prec)
|
| 1119 |
+
b = (MPZ_ONE << (prec+prec)) // a
|
| 1120 |
+
return a, b
|
| 1121 |
+
|
| 1122 |
+
def cos_sin_basecase(x, prec):
|
| 1123 |
+
"""
|
| 1124 |
+
Compute cos(x), sin(x) as fixed-point numbers, assuming x
|
| 1125 |
+
in [0, pi/2). For an arbitrary number, use x' = x - m*(pi/2)
|
| 1126 |
+
where m = floor(x/(pi/2)) along with quarter-period symmetries.
|
| 1127 |
+
"""
|
| 1128 |
+
if prec > COS_SIN_CACHE_PREC:
|
| 1129 |
+
return exponential_series(x, prec, 2)
|
| 1130 |
+
precs = prec - COS_SIN_CACHE_STEP
|
| 1131 |
+
t = x >> precs
|
| 1132 |
+
n = int(t)
|
| 1133 |
+
if n not in cos_sin_cache:
|
| 1134 |
+
w = t<<(10+COS_SIN_CACHE_PREC-COS_SIN_CACHE_STEP)
|
| 1135 |
+
cos_t, sin_t = exponential_series(w, 10+COS_SIN_CACHE_PREC, 2)
|
| 1136 |
+
cos_sin_cache[n] = (cos_t>>10), (sin_t>>10)
|
| 1137 |
+
cos_t, sin_t = cos_sin_cache[n]
|
| 1138 |
+
offset = COS_SIN_CACHE_PREC - prec
|
| 1139 |
+
cos_t >>= offset
|
| 1140 |
+
sin_t >>= offset
|
| 1141 |
+
x -= t << precs
|
| 1142 |
+
cos = MPZ_ONE << prec
|
| 1143 |
+
sin = x
|
| 1144 |
+
k = 2
|
| 1145 |
+
a = -((x*x) >> prec)
|
| 1146 |
+
while a:
|
| 1147 |
+
a //= k; cos += a; k += 1; a = (a*x) >> prec
|
| 1148 |
+
a //= k; sin += a; k += 1; a = -((a*x) >> prec)
|
| 1149 |
+
return ((cos*cos_t-sin*sin_t) >> prec), ((sin*cos_t+cos*sin_t) >> prec)
|
| 1150 |
+
|
| 1151 |
+
def mpf_exp(x, prec, rnd=round_fast):
|
| 1152 |
+
sign, man, exp, bc = x
|
| 1153 |
+
if man:
|
| 1154 |
+
mag = bc + exp
|
| 1155 |
+
wp = prec + 14
|
| 1156 |
+
if sign:
|
| 1157 |
+
man = -man
|
| 1158 |
+
# TODO: the best cutoff depends on both x and the precision.
|
| 1159 |
+
if prec > 600 and exp >= 0:
|
| 1160 |
+
# Need about log2(exp(n)) ~= 1.45*mag extra precision
|
| 1161 |
+
e = mpf_e(wp+int(1.45*mag))
|
| 1162 |
+
return mpf_pow_int(e, man<<exp, prec, rnd)
|
| 1163 |
+
if mag < -wp:
|
| 1164 |
+
return mpf_perturb(fone, sign, prec, rnd)
|
| 1165 |
+
# |x| >= 2
|
| 1166 |
+
if mag > 1:
|
| 1167 |
+
# For large arguments: exp(2^mag*(1+eps)) =
|
| 1168 |
+
# exp(2^mag)*exp(2^mag*eps) = exp(2^mag)*(1 + 2^mag*eps + ...)
|
| 1169 |
+
# so about mag extra bits is required.
|
| 1170 |
+
wpmod = wp + mag
|
| 1171 |
+
offset = exp + wpmod
|
| 1172 |
+
if offset >= 0:
|
| 1173 |
+
t = man << offset
|
| 1174 |
+
else:
|
| 1175 |
+
t = man >> (-offset)
|
| 1176 |
+
lg2 = ln2_fixed(wpmod)
|
| 1177 |
+
n, t = divmod(t, lg2)
|
| 1178 |
+
n = int(n)
|
| 1179 |
+
t >>= mag
|
| 1180 |
+
else:
|
| 1181 |
+
offset = exp + wp
|
| 1182 |
+
if offset >= 0:
|
| 1183 |
+
t = man << offset
|
| 1184 |
+
else:
|
| 1185 |
+
t = man >> (-offset)
|
| 1186 |
+
n = 0
|
| 1187 |
+
man = exp_basecase(t, wp)
|
| 1188 |
+
return from_man_exp(man, n-wp, prec, rnd)
|
| 1189 |
+
if not exp:
|
| 1190 |
+
return fone
|
| 1191 |
+
if x == fninf:
|
| 1192 |
+
return fzero
|
| 1193 |
+
return x
|
| 1194 |
+
|
| 1195 |
+
|
| 1196 |
+
def mpf_cosh_sinh(x, prec, rnd=round_fast, tanh=0):
|
| 1197 |
+
"""Simultaneously compute (cosh(x), sinh(x)) for real x"""
|
| 1198 |
+
sign, man, exp, bc = x
|
| 1199 |
+
if (not man) and exp:
|
| 1200 |
+
if tanh:
|
| 1201 |
+
if x == finf: return fone
|
| 1202 |
+
if x == fninf: return fnone
|
| 1203 |
+
return fnan
|
| 1204 |
+
if x == finf: return (finf, finf)
|
| 1205 |
+
if x == fninf: return (finf, fninf)
|
| 1206 |
+
return fnan, fnan
|
| 1207 |
+
mag = exp+bc
|
| 1208 |
+
wp = prec+14
|
| 1209 |
+
if mag < -4:
|
| 1210 |
+
# Extremely close to 0, sinh(x) ~= x and cosh(x) ~= 1
|
| 1211 |
+
if mag < -wp:
|
| 1212 |
+
if tanh:
|
| 1213 |
+
return mpf_perturb(x, 1-sign, prec, rnd)
|
| 1214 |
+
cosh = mpf_perturb(fone, 0, prec, rnd)
|
| 1215 |
+
sinh = mpf_perturb(x, sign, prec, rnd)
|
| 1216 |
+
return cosh, sinh
|
| 1217 |
+
# Fix for cancellation when computing sinh
|
| 1218 |
+
wp += (-mag)
|
| 1219 |
+
# Does exp(-2*x) vanish?
|
| 1220 |
+
if mag > 10:
|
| 1221 |
+
if 3*(1<<(mag-1)) > wp:
|
| 1222 |
+
# XXX: rounding
|
| 1223 |
+
if tanh:
|
| 1224 |
+
return mpf_perturb([fone,fnone][sign], 1-sign, prec, rnd)
|
| 1225 |
+
c = s = mpf_shift(mpf_exp(mpf_abs(x), prec, rnd), -1)
|
| 1226 |
+
if sign:
|
| 1227 |
+
s = mpf_neg(s)
|
| 1228 |
+
return c, s
|
| 1229 |
+
# |x| > 1
|
| 1230 |
+
if mag > 1:
|
| 1231 |
+
wpmod = wp + mag
|
| 1232 |
+
offset = exp + wpmod
|
| 1233 |
+
if offset >= 0:
|
| 1234 |
+
t = man << offset
|
| 1235 |
+
else:
|
| 1236 |
+
t = man >> (-offset)
|
| 1237 |
+
lg2 = ln2_fixed(wpmod)
|
| 1238 |
+
n, t = divmod(t, lg2)
|
| 1239 |
+
n = int(n)
|
| 1240 |
+
t >>= mag
|
| 1241 |
+
else:
|
| 1242 |
+
offset = exp + wp
|
| 1243 |
+
if offset >= 0:
|
| 1244 |
+
t = man << offset
|
| 1245 |
+
else:
|
| 1246 |
+
t = man >> (-offset)
|
| 1247 |
+
n = 0
|
| 1248 |
+
a, b = exp_expneg_basecase(t, wp)
|
| 1249 |
+
# TODO: optimize division precision
|
| 1250 |
+
cosh = a + (b>>(2*n))
|
| 1251 |
+
sinh = a - (b>>(2*n))
|
| 1252 |
+
if sign:
|
| 1253 |
+
sinh = -sinh
|
| 1254 |
+
if tanh:
|
| 1255 |
+
man = (sinh << wp) // cosh
|
| 1256 |
+
return from_man_exp(man, -wp, prec, rnd)
|
| 1257 |
+
else:
|
| 1258 |
+
cosh = from_man_exp(cosh, n-wp-1, prec, rnd)
|
| 1259 |
+
sinh = from_man_exp(sinh, n-wp-1, prec, rnd)
|
| 1260 |
+
return cosh, sinh
|
| 1261 |
+
|
| 1262 |
+
|
| 1263 |
+
def mod_pi2(man, exp, mag, wp):
|
| 1264 |
+
# Reduce to standard interval
|
| 1265 |
+
if mag > 0:
|
| 1266 |
+
i = 0
|
| 1267 |
+
while 1:
|
| 1268 |
+
cancellation_prec = 20 << i
|
| 1269 |
+
wpmod = wp + mag + cancellation_prec
|
| 1270 |
+
pi2 = pi_fixed(wpmod-1)
|
| 1271 |
+
pi4 = pi2 >> 1
|
| 1272 |
+
offset = wpmod + exp
|
| 1273 |
+
if offset >= 0:
|
| 1274 |
+
t = man << offset
|
| 1275 |
+
else:
|
| 1276 |
+
t = man >> (-offset)
|
| 1277 |
+
n, y = divmod(t, pi2)
|
| 1278 |
+
if y > pi4:
|
| 1279 |
+
small = pi2 - y
|
| 1280 |
+
else:
|
| 1281 |
+
small = y
|
| 1282 |
+
if small >> (wp+mag-10):
|
| 1283 |
+
n = int(n)
|
| 1284 |
+
t = y >> mag
|
| 1285 |
+
wp = wpmod - mag
|
| 1286 |
+
break
|
| 1287 |
+
i += 1
|
| 1288 |
+
else:
|
| 1289 |
+
wp += (-mag)
|
| 1290 |
+
offset = exp + wp
|
| 1291 |
+
if offset >= 0:
|
| 1292 |
+
t = man << offset
|
| 1293 |
+
else:
|
| 1294 |
+
t = man >> (-offset)
|
| 1295 |
+
n = 0
|
| 1296 |
+
return t, n, wp
|
| 1297 |
+
|
| 1298 |
+
|
| 1299 |
+
def mpf_cos_sin(x, prec, rnd=round_fast, which=0, pi=False):
|
| 1300 |
+
"""
|
| 1301 |
+
which:
|
| 1302 |
+
0 -- return cos(x), sin(x)
|
| 1303 |
+
1 -- return cos(x)
|
| 1304 |
+
2 -- return sin(x)
|
| 1305 |
+
3 -- return tan(x)
|
| 1306 |
+
|
| 1307 |
+
if pi=True, compute for pi*x
|
| 1308 |
+
"""
|
| 1309 |
+
sign, man, exp, bc = x
|
| 1310 |
+
if not man:
|
| 1311 |
+
if exp:
|
| 1312 |
+
c, s = fnan, fnan
|
| 1313 |
+
else:
|
| 1314 |
+
c, s = fone, fzero
|
| 1315 |
+
if which == 0: return c, s
|
| 1316 |
+
if which == 1: return c
|
| 1317 |
+
if which == 2: return s
|
| 1318 |
+
if which == 3: return s
|
| 1319 |
+
|
| 1320 |
+
mag = bc + exp
|
| 1321 |
+
wp = prec + 10
|
| 1322 |
+
|
| 1323 |
+
# Extremely small?
|
| 1324 |
+
if mag < 0:
|
| 1325 |
+
if mag < -wp:
|
| 1326 |
+
if pi:
|
| 1327 |
+
x = mpf_mul(x, mpf_pi(wp))
|
| 1328 |
+
c = mpf_perturb(fone, 1, prec, rnd)
|
| 1329 |
+
s = mpf_perturb(x, 1-sign, prec, rnd)
|
| 1330 |
+
if which == 0: return c, s
|
| 1331 |
+
if which == 1: return c
|
| 1332 |
+
if which == 2: return s
|
| 1333 |
+
if which == 3: return mpf_perturb(x, sign, prec, rnd)
|
| 1334 |
+
if pi:
|
| 1335 |
+
if exp >= -1:
|
| 1336 |
+
if exp == -1:
|
| 1337 |
+
c = fzero
|
| 1338 |
+
s = (fone, fnone)[bool(man & 2) ^ sign]
|
| 1339 |
+
elif exp == 0:
|
| 1340 |
+
c, s = (fnone, fzero)
|
| 1341 |
+
else:
|
| 1342 |
+
c, s = (fone, fzero)
|
| 1343 |
+
if which == 0: return c, s
|
| 1344 |
+
if which == 1: return c
|
| 1345 |
+
if which == 2: return s
|
| 1346 |
+
if which == 3: return mpf_div(s, c, prec, rnd)
|
| 1347 |
+
# Subtract nearest half-integer (= mod by pi/2)
|
| 1348 |
+
n = ((man >> (-exp-2)) + 1) >> 1
|
| 1349 |
+
man = man - (n << (-exp-1))
|
| 1350 |
+
mag2 = bitcount(man) + exp
|
| 1351 |
+
wp = prec + 10 - mag2
|
| 1352 |
+
offset = exp + wp
|
| 1353 |
+
if offset >= 0:
|
| 1354 |
+
t = man << offset
|
| 1355 |
+
else:
|
| 1356 |
+
t = man >> (-offset)
|
| 1357 |
+
t = (t*pi_fixed(wp)) >> wp
|
| 1358 |
+
else:
|
| 1359 |
+
t, n, wp = mod_pi2(man, exp, mag, wp)
|
| 1360 |
+
c, s = cos_sin_basecase(t, wp)
|
| 1361 |
+
m = n & 3
|
| 1362 |
+
if m == 1: c, s = -s, c
|
| 1363 |
+
elif m == 2: c, s = -c, -s
|
| 1364 |
+
elif m == 3: c, s = s, -c
|
| 1365 |
+
if sign:
|
| 1366 |
+
s = -s
|
| 1367 |
+
if which == 0:
|
| 1368 |
+
c = from_man_exp(c, -wp, prec, rnd)
|
| 1369 |
+
s = from_man_exp(s, -wp, prec, rnd)
|
| 1370 |
+
return c, s
|
| 1371 |
+
if which == 1:
|
| 1372 |
+
return from_man_exp(c, -wp, prec, rnd)
|
| 1373 |
+
if which == 2:
|
| 1374 |
+
return from_man_exp(s, -wp, prec, rnd)
|
| 1375 |
+
if which == 3:
|
| 1376 |
+
return from_rational(s, c, prec, rnd)
|
| 1377 |
+
|
| 1378 |
+
def mpf_cos(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 1)
|
| 1379 |
+
def mpf_sin(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 2)
|
| 1380 |
+
def mpf_tan(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 3)
|
| 1381 |
+
def mpf_cos_sin_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 0, 1)
|
| 1382 |
+
def mpf_cos_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 1, 1)
|
| 1383 |
+
def mpf_sin_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 2, 1)
|
| 1384 |
+
def mpf_cosh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd)[0]
|
| 1385 |
+
def mpf_sinh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd)[1]
|
| 1386 |
+
def mpf_tanh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd, tanh=1)
|
| 1387 |
+
|
| 1388 |
+
|
| 1389 |
+
# Low-overhead fixed-point versions
|
| 1390 |
+
|
| 1391 |
+
def cos_sin_fixed(x, prec, pi2=None):
|
| 1392 |
+
if pi2 is None:
|
| 1393 |
+
pi2 = pi_fixed(prec-1)
|
| 1394 |
+
n, t = divmod(x, pi2)
|
| 1395 |
+
n = int(n)
|
| 1396 |
+
c, s = cos_sin_basecase(t, prec)
|
| 1397 |
+
m = n & 3
|
| 1398 |
+
if m == 0: return c, s
|
| 1399 |
+
if m == 1: return -s, c
|
| 1400 |
+
if m == 2: return -c, -s
|
| 1401 |
+
if m == 3: return s, -c
|
| 1402 |
+
|
| 1403 |
+
def exp_fixed(x, prec, ln2=None):
|
| 1404 |
+
if ln2 is None:
|
| 1405 |
+
ln2 = ln2_fixed(prec)
|
| 1406 |
+
n, t = divmod(x, ln2)
|
| 1407 |
+
n = int(n)
|
| 1408 |
+
v = exp_basecase(t, prec)
|
| 1409 |
+
if n >= 0:
|
| 1410 |
+
return v << n
|
| 1411 |
+
else:
|
| 1412 |
+
return v >> (-n)
|
| 1413 |
+
|
| 1414 |
+
|
| 1415 |
+
if BACKEND == 'sage':
|
| 1416 |
+
try:
|
| 1417 |
+
import sage.libs.mpmath.ext_libmp as _lbmp
|
| 1418 |
+
mpf_sqrt = _lbmp.mpf_sqrt
|
| 1419 |
+
mpf_exp = _lbmp.mpf_exp
|
| 1420 |
+
mpf_log = _lbmp.mpf_log
|
| 1421 |
+
mpf_cos = _lbmp.mpf_cos
|
| 1422 |
+
mpf_sin = _lbmp.mpf_sin
|
| 1423 |
+
mpf_pow = _lbmp.mpf_pow
|
| 1424 |
+
exp_fixed = _lbmp.exp_fixed
|
| 1425 |
+
cos_sin_fixed = _lbmp.cos_sin_fixed
|
| 1426 |
+
log_int_fixed = _lbmp.log_int_fixed
|
| 1427 |
+
except (ImportError, AttributeError):
|
| 1428 |
+
print("Warning: Sage imports in libelefun failed")
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libhyper.py
ADDED
|
@@ -0,0 +1,1150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This module implements computation of hypergeometric and related
|
| 3 |
+
functions. In particular, it provides code for generic summation
|
| 4 |
+
of hypergeometric series. Optimized versions for various special
|
| 5 |
+
cases are also provided.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import operator
|
| 9 |
+
import math
|
| 10 |
+
|
| 11 |
+
from .backend import MPZ_ZERO, MPZ_ONE, BACKEND, xrange, exec_
|
| 12 |
+
|
| 13 |
+
from .libintmath import gcd
|
| 14 |
+
|
| 15 |
+
from .libmpf import (\
|
| 16 |
+
ComplexResult, round_fast, round_nearest,
|
| 17 |
+
negative_rnd, bitcount, to_fixed, from_man_exp, from_int, to_int,
|
| 18 |
+
from_rational,
|
| 19 |
+
fzero, fone, fnone, ftwo, finf, fninf, fnan,
|
| 20 |
+
mpf_sign, mpf_add, mpf_abs, mpf_pos,
|
| 21 |
+
mpf_cmp, mpf_lt, mpf_le, mpf_gt, mpf_min_max,
|
| 22 |
+
mpf_perturb, mpf_neg, mpf_shift, mpf_sub, mpf_mul, mpf_div,
|
| 23 |
+
sqrt_fixed, mpf_sqrt, mpf_rdiv_int, mpf_pow_int,
|
| 24 |
+
to_rational,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
from .libelefun import (\
|
| 28 |
+
mpf_pi, mpf_exp, mpf_log, pi_fixed, mpf_cos_sin, mpf_cos, mpf_sin,
|
| 29 |
+
mpf_sqrt, agm_fixed,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
from .libmpc import (\
|
| 33 |
+
mpc_one, mpc_sub, mpc_mul_mpf, mpc_mul, mpc_neg, complex_int_pow,
|
| 34 |
+
mpc_div, mpc_add_mpf, mpc_sub_mpf,
|
| 35 |
+
mpc_log, mpc_add, mpc_pos, mpc_shift,
|
| 36 |
+
mpc_is_infnan, mpc_zero, mpc_sqrt, mpc_abs,
|
| 37 |
+
mpc_mpf_div, mpc_square, mpc_exp
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
from .libintmath import ifac
|
| 41 |
+
from .gammazeta import mpf_gamma_int, mpf_euler, euler_fixed
|
| 42 |
+
|
| 43 |
+
class NoConvergence(Exception):
|
| 44 |
+
pass
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
#-----------------------------------------------------------------------#
|
| 48 |
+
# #
|
| 49 |
+
# Generic hypergeometric series #
|
| 50 |
+
# #
|
| 51 |
+
#-----------------------------------------------------------------------#
|
| 52 |
+
|
| 53 |
+
"""
|
| 54 |
+
TODO:
|
| 55 |
+
|
| 56 |
+
1. proper mpq parsing
|
| 57 |
+
2. imaginary z special-cased (also: rational, integer?)
|
| 58 |
+
3. more clever handling of series that don't converge because of stupid
|
| 59 |
+
upwards rounding
|
| 60 |
+
4. checking for cancellation
|
| 61 |
+
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
def make_hyp_summator(key):
|
| 65 |
+
"""
|
| 66 |
+
Returns a function that sums a generalized hypergeometric series,
|
| 67 |
+
for given parameter types (integer, rational, real, complex).
|
| 68 |
+
|
| 69 |
+
"""
|
| 70 |
+
p, q, param_types, ztype = key
|
| 71 |
+
|
| 72 |
+
pstring = "".join(param_types)
|
| 73 |
+
fname = "hypsum_%i_%i_%s_%s_%s" % (p, q, pstring[:p], pstring[p:], ztype)
|
| 74 |
+
#print "generating hypsum", fname
|
| 75 |
+
|
| 76 |
+
have_complex_param = 'C' in param_types
|
| 77 |
+
have_complex_arg = ztype == 'C'
|
| 78 |
+
have_complex = have_complex_param or have_complex_arg
|
| 79 |
+
|
| 80 |
+
source = []
|
| 81 |
+
add = source.append
|
| 82 |
+
|
| 83 |
+
aint = []
|
| 84 |
+
arat = []
|
| 85 |
+
bint = []
|
| 86 |
+
brat = []
|
| 87 |
+
areal = []
|
| 88 |
+
breal = []
|
| 89 |
+
acomplex = []
|
| 90 |
+
bcomplex = []
|
| 91 |
+
|
| 92 |
+
#add("wp = prec + 40")
|
| 93 |
+
add("MAX = kwargs.get('maxterms', wp*100)")
|
| 94 |
+
add("HIGH = MPZ_ONE<<epsshift")
|
| 95 |
+
add("LOW = -HIGH")
|
| 96 |
+
|
| 97 |
+
# Setup code
|
| 98 |
+
add("SRE = PRE = one = (MPZ_ONE << wp)")
|
| 99 |
+
if have_complex:
|
| 100 |
+
add("SIM = PIM = MPZ_ZERO")
|
| 101 |
+
|
| 102 |
+
if have_complex_arg:
|
| 103 |
+
add("xsign, xm, xe, xbc = z[0]")
|
| 104 |
+
add("if xsign: xm = -xm")
|
| 105 |
+
add("ysign, ym, ye, ybc = z[1]")
|
| 106 |
+
add("if ysign: ym = -ym")
|
| 107 |
+
else:
|
| 108 |
+
add("xsign, xm, xe, xbc = z")
|
| 109 |
+
add("if xsign: xm = -xm")
|
| 110 |
+
|
| 111 |
+
add("offset = xe + wp")
|
| 112 |
+
add("if offset >= 0:")
|
| 113 |
+
add(" ZRE = xm << offset")
|
| 114 |
+
add("else:")
|
| 115 |
+
add(" ZRE = xm >> (-offset)")
|
| 116 |
+
if have_complex_arg:
|
| 117 |
+
add("offset = ye + wp")
|
| 118 |
+
add("if offset >= 0:")
|
| 119 |
+
add(" ZIM = ym << offset")
|
| 120 |
+
add("else:")
|
| 121 |
+
add(" ZIM = ym >> (-offset)")
|
| 122 |
+
|
| 123 |
+
for i, flag in enumerate(param_types):
|
| 124 |
+
W = ["A", "B"][i >= p]
|
| 125 |
+
if flag == 'Z':
|
| 126 |
+
([aint,bint][i >= p]).append(i)
|
| 127 |
+
add("%sINT_%i = coeffs[%i]" % (W, i, i))
|
| 128 |
+
elif flag == 'Q':
|
| 129 |
+
([arat,brat][i >= p]).append(i)
|
| 130 |
+
add("%sP_%i, %sQ_%i = coeffs[%i]._mpq_" % (W, i, W, i, i))
|
| 131 |
+
elif flag == 'R':
|
| 132 |
+
([areal,breal][i >= p]).append(i)
|
| 133 |
+
add("xsign, xm, xe, xbc = coeffs[%i]._mpf_" % i)
|
| 134 |
+
add("if xsign: xm = -xm")
|
| 135 |
+
add("offset = xe + wp")
|
| 136 |
+
add("if offset >= 0:")
|
| 137 |
+
add(" %sREAL_%i = xm << offset" % (W, i))
|
| 138 |
+
add("else:")
|
| 139 |
+
add(" %sREAL_%i = xm >> (-offset)" % (W, i))
|
| 140 |
+
elif flag == 'C':
|
| 141 |
+
([acomplex,bcomplex][i >= p]).append(i)
|
| 142 |
+
add("__re, __im = coeffs[%i]._mpc_" % i)
|
| 143 |
+
add("xsign, xm, xe, xbc = __re")
|
| 144 |
+
add("if xsign: xm = -xm")
|
| 145 |
+
add("ysign, ym, ye, ybc = __im")
|
| 146 |
+
add("if ysign: ym = -ym")
|
| 147 |
+
|
| 148 |
+
add("offset = xe + wp")
|
| 149 |
+
add("if offset >= 0:")
|
| 150 |
+
add(" %sCRE_%i = xm << offset" % (W, i))
|
| 151 |
+
add("else:")
|
| 152 |
+
add(" %sCRE_%i = xm >> (-offset)" % (W, i))
|
| 153 |
+
add("offset = ye + wp")
|
| 154 |
+
add("if offset >= 0:")
|
| 155 |
+
add(" %sCIM_%i = ym << offset" % (W, i))
|
| 156 |
+
add("else:")
|
| 157 |
+
add(" %sCIM_%i = ym >> (-offset)" % (W, i))
|
| 158 |
+
else:
|
| 159 |
+
raise ValueError
|
| 160 |
+
|
| 161 |
+
l_areal = len(areal)
|
| 162 |
+
l_breal = len(breal)
|
| 163 |
+
cancellable_real = min(l_areal, l_breal)
|
| 164 |
+
noncancellable_real_num = areal[cancellable_real:]
|
| 165 |
+
noncancellable_real_den = breal[cancellable_real:]
|
| 166 |
+
|
| 167 |
+
# LOOP
|
| 168 |
+
add("for n in xrange(1,10**8):")
|
| 169 |
+
|
| 170 |
+
add(" if n in magnitude_check:")
|
| 171 |
+
add(" p_mag = bitcount(abs(PRE))")
|
| 172 |
+
if have_complex:
|
| 173 |
+
add(" p_mag = max(p_mag, bitcount(abs(PIM)))")
|
| 174 |
+
add(" magnitude_check[n] = wp-p_mag")
|
| 175 |
+
|
| 176 |
+
# Real factors
|
| 177 |
+
multiplier = " * ".join(["AINT_#".replace("#", str(i)) for i in aint] + \
|
| 178 |
+
["AP_#".replace("#", str(i)) for i in arat] + \
|
| 179 |
+
["BQ_#".replace("#", str(i)) for i in brat])
|
| 180 |
+
|
| 181 |
+
divisor = " * ".join(["BINT_#".replace("#", str(i)) for i in bint] + \
|
| 182 |
+
["BP_#".replace("#", str(i)) for i in brat] + \
|
| 183 |
+
["AQ_#".replace("#", str(i)) for i in arat] + ["n"])
|
| 184 |
+
|
| 185 |
+
if multiplier:
|
| 186 |
+
add(" mul = " + multiplier)
|
| 187 |
+
add(" div = " + divisor)
|
| 188 |
+
|
| 189 |
+
# Check for singular terms
|
| 190 |
+
add(" if not div:")
|
| 191 |
+
if multiplier:
|
| 192 |
+
add(" if not mul:")
|
| 193 |
+
add(" break")
|
| 194 |
+
add(" raise ZeroDivisionError")
|
| 195 |
+
|
| 196 |
+
# Update product
|
| 197 |
+
if have_complex:
|
| 198 |
+
|
| 199 |
+
# TODO: when there are several real parameters and just a few complex
|
| 200 |
+
# (maybe just the complex argument), we only need to do about
|
| 201 |
+
# half as many ops if we accumulate the real factor in a single real variable
|
| 202 |
+
for k in range(cancellable_real): add(" PRE = PRE * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
|
| 203 |
+
for i in noncancellable_real_num: add(" PRE = (PRE * AREAL_#) >> wp".replace("#", str(i)))
|
| 204 |
+
for i in noncancellable_real_den: add(" PRE = (PRE << wp) // BREAL_#".replace("#", str(i)))
|
| 205 |
+
for k in range(cancellable_real): add(" PIM = PIM * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
|
| 206 |
+
for i in noncancellable_real_num: add(" PIM = (PIM * AREAL_#) >> wp".replace("#", str(i)))
|
| 207 |
+
for i in noncancellable_real_den: add(" PIM = (PIM << wp) // BREAL_#".replace("#", str(i)))
|
| 208 |
+
|
| 209 |
+
if multiplier:
|
| 210 |
+
if have_complex_arg:
|
| 211 |
+
add(" PRE, PIM = (mul*(PRE*ZRE-PIM*ZIM))//div, (mul*(PIM*ZRE+PRE*ZIM))//div")
|
| 212 |
+
add(" PRE >>= wp")
|
| 213 |
+
add(" PIM >>= wp")
|
| 214 |
+
else:
|
| 215 |
+
add(" PRE = ((mul * PRE * ZRE) >> wp) // div")
|
| 216 |
+
add(" PIM = ((mul * PIM * ZRE) >> wp) // div")
|
| 217 |
+
else:
|
| 218 |
+
if have_complex_arg:
|
| 219 |
+
add(" PRE, PIM = (PRE*ZRE-PIM*ZIM)//div, (PIM*ZRE+PRE*ZIM)//div")
|
| 220 |
+
add(" PRE >>= wp")
|
| 221 |
+
add(" PIM >>= wp")
|
| 222 |
+
else:
|
| 223 |
+
add(" PRE = ((PRE * ZRE) >> wp) // div")
|
| 224 |
+
add(" PIM = ((PIM * ZRE) >> wp) // div")
|
| 225 |
+
|
| 226 |
+
for i in acomplex:
|
| 227 |
+
add(" PRE, PIM = PRE*ACRE_#-PIM*ACIM_#, PIM*ACRE_#+PRE*ACIM_#".replace("#", str(i)))
|
| 228 |
+
add(" PRE >>= wp")
|
| 229 |
+
add(" PIM >>= wp")
|
| 230 |
+
|
| 231 |
+
for i in bcomplex:
|
| 232 |
+
add(" mag = BCRE_#*BCRE_#+BCIM_#*BCIM_#".replace("#", str(i)))
|
| 233 |
+
add(" re = PRE*BCRE_# + PIM*BCIM_#".replace("#", str(i)))
|
| 234 |
+
add(" im = PIM*BCRE_# - PRE*BCIM_#".replace("#", str(i)))
|
| 235 |
+
add(" PRE = (re << wp) // mag".replace("#", str(i)))
|
| 236 |
+
add(" PIM = (im << wp) // mag".replace("#", str(i)))
|
| 237 |
+
|
| 238 |
+
else:
|
| 239 |
+
for k in range(cancellable_real): add(" PRE = PRE * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
|
| 240 |
+
for i in noncancellable_real_num: add(" PRE = (PRE * AREAL_#) >> wp".replace("#", str(i)))
|
| 241 |
+
for i in noncancellable_real_den: add(" PRE = (PRE << wp) // BREAL_#".replace("#", str(i)))
|
| 242 |
+
if multiplier:
|
| 243 |
+
add(" PRE = ((PRE * mul * ZRE) >> wp) // div")
|
| 244 |
+
else:
|
| 245 |
+
add(" PRE = ((PRE * ZRE) >> wp) // div")
|
| 246 |
+
|
| 247 |
+
# Add product to sum
|
| 248 |
+
if have_complex:
|
| 249 |
+
add(" SRE += PRE")
|
| 250 |
+
add(" SIM += PIM")
|
| 251 |
+
add(" if (HIGH > PRE > LOW) and (HIGH > PIM > LOW):")
|
| 252 |
+
add(" break")
|
| 253 |
+
else:
|
| 254 |
+
add(" SRE += PRE")
|
| 255 |
+
add(" if HIGH > PRE > LOW:")
|
| 256 |
+
add(" break")
|
| 257 |
+
|
| 258 |
+
#add(" from mpmath import nprint, log, ldexp")
|
| 259 |
+
#add(" nprint([n, log(abs(PRE),2), ldexp(PRE,-wp)])")
|
| 260 |
+
|
| 261 |
+
add(" if n > MAX:")
|
| 262 |
+
add(" raise NoConvergence('Hypergeometric series converges too slowly. Try increasing maxterms.')")
|
| 263 |
+
|
| 264 |
+
# +1 all parameters for next loop
|
| 265 |
+
for i in aint: add(" AINT_# += 1".replace("#", str(i)))
|
| 266 |
+
for i in bint: add(" BINT_# += 1".replace("#", str(i)))
|
| 267 |
+
for i in arat: add(" AP_# += AQ_#".replace("#", str(i)))
|
| 268 |
+
for i in brat: add(" BP_# += BQ_#".replace("#", str(i)))
|
| 269 |
+
for i in areal: add(" AREAL_# += one".replace("#", str(i)))
|
| 270 |
+
for i in breal: add(" BREAL_# += one".replace("#", str(i)))
|
| 271 |
+
for i in acomplex: add(" ACRE_# += one".replace("#", str(i)))
|
| 272 |
+
for i in bcomplex: add(" BCRE_# += one".replace("#", str(i)))
|
| 273 |
+
|
| 274 |
+
if have_complex:
|
| 275 |
+
add("a = from_man_exp(SRE, -wp, prec, 'n')")
|
| 276 |
+
add("b = from_man_exp(SIM, -wp, prec, 'n')")
|
| 277 |
+
|
| 278 |
+
add("if SRE:")
|
| 279 |
+
add(" if SIM:")
|
| 280 |
+
add(" magn = max(a[2]+a[3], b[2]+b[3])")
|
| 281 |
+
add(" else:")
|
| 282 |
+
add(" magn = a[2]+a[3]")
|
| 283 |
+
add("elif SIM:")
|
| 284 |
+
add(" magn = b[2]+b[3]")
|
| 285 |
+
add("else:")
|
| 286 |
+
add(" magn = -wp+1")
|
| 287 |
+
|
| 288 |
+
add("return (a, b), True, magn")
|
| 289 |
+
else:
|
| 290 |
+
add("a = from_man_exp(SRE, -wp, prec, 'n')")
|
| 291 |
+
|
| 292 |
+
add("if SRE:")
|
| 293 |
+
add(" magn = a[2]+a[3]")
|
| 294 |
+
add("else:")
|
| 295 |
+
add(" magn = -wp+1")
|
| 296 |
+
|
| 297 |
+
add("return a, False, magn")
|
| 298 |
+
|
| 299 |
+
source = "\n".join((" " + line) for line in source)
|
| 300 |
+
source = ("def %s(coeffs, z, prec, wp, epsshift, magnitude_check, **kwargs):\n" % fname) + source
|
| 301 |
+
|
| 302 |
+
namespace = {}
|
| 303 |
+
|
| 304 |
+
exec_(source, globals(), namespace)
|
| 305 |
+
|
| 306 |
+
#print source
|
| 307 |
+
return source, namespace[fname]
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
if BACKEND == 'sage':
|
| 311 |
+
|
| 312 |
+
def make_hyp_summator(key):
|
| 313 |
+
"""
|
| 314 |
+
Returns a function that sums a generalized hypergeometric series,
|
| 315 |
+
for given parameter types (integer, rational, real, complex).
|
| 316 |
+
"""
|
| 317 |
+
from sage.libs.mpmath.ext_main import hypsum_internal
|
| 318 |
+
p, q, param_types, ztype = key
|
| 319 |
+
def _hypsum(coeffs, z, prec, wp, epsshift, magnitude_check, **kwargs):
|
| 320 |
+
return hypsum_internal(p, q, param_types, ztype, coeffs, z,
|
| 321 |
+
prec, wp, epsshift, magnitude_check, kwargs)
|
| 322 |
+
|
| 323 |
+
return "(none)", _hypsum
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
#-----------------------------------------------------------------------#
|
| 327 |
+
# #
|
| 328 |
+
# Error functions #
|
| 329 |
+
# #
|
| 330 |
+
#-----------------------------------------------------------------------#
|
| 331 |
+
|
| 332 |
+
# TODO: mpf_erf should call mpf_erfc when appropriate (currently
|
| 333 |
+
# only the converse delegation is implemented)
|
| 334 |
+
|
| 335 |
+
def mpf_erf(x, prec, rnd=round_fast):
|
| 336 |
+
sign, man, exp, bc = x
|
| 337 |
+
if not man:
|
| 338 |
+
if x == fzero: return fzero
|
| 339 |
+
if x == finf: return fone
|
| 340 |
+
if x== fninf: return fnone
|
| 341 |
+
return fnan
|
| 342 |
+
size = exp + bc
|
| 343 |
+
lg = math.log
|
| 344 |
+
# The approximation erf(x) = 1 is accurate to > x^2 * log(e,2) bits
|
| 345 |
+
if size > 3 and 2*(size-1) + 0.528766 > lg(prec,2):
|
| 346 |
+
if sign:
|
| 347 |
+
return mpf_perturb(fnone, 0, prec, rnd)
|
| 348 |
+
else:
|
| 349 |
+
return mpf_perturb(fone, 1, prec, rnd)
|
| 350 |
+
# erf(x) ~ 2*x/sqrt(pi) close to 0
|
| 351 |
+
if size < -prec:
|
| 352 |
+
# 2*x
|
| 353 |
+
x = mpf_shift(x,1)
|
| 354 |
+
c = mpf_sqrt(mpf_pi(prec+20), prec+20)
|
| 355 |
+
# TODO: interval rounding
|
| 356 |
+
return mpf_div(x, c, prec, rnd)
|
| 357 |
+
wp = prec + abs(size) + 25
|
| 358 |
+
# Taylor series for erf, fixed-point summation
|
| 359 |
+
t = abs(to_fixed(x, wp))
|
| 360 |
+
t2 = (t*t) >> wp
|
| 361 |
+
s, term, k = t, 12345, 1
|
| 362 |
+
while term:
|
| 363 |
+
t = ((t * t2) >> wp) // k
|
| 364 |
+
term = t // (2*k+1)
|
| 365 |
+
if k & 1:
|
| 366 |
+
s -= term
|
| 367 |
+
else:
|
| 368 |
+
s += term
|
| 369 |
+
k += 1
|
| 370 |
+
s = (s << (wp+1)) // sqrt_fixed(pi_fixed(wp), wp)
|
| 371 |
+
if sign:
|
| 372 |
+
s = -s
|
| 373 |
+
return from_man_exp(s, -wp, prec, rnd)
|
| 374 |
+
|
| 375 |
+
# If possible, we use the asymptotic series for erfc.
|
| 376 |
+
# This is an alternating divergent asymptotic series, so
|
| 377 |
+
# the error is at most equal to the first omitted term.
|
| 378 |
+
# Here we check if the smallest term is small enough
|
| 379 |
+
# for a given x and precision
|
| 380 |
+
def erfc_check_series(x, prec):
|
| 381 |
+
n = to_int(x)
|
| 382 |
+
if n**2 * 1.44 > prec:
|
| 383 |
+
return True
|
| 384 |
+
return False
|
| 385 |
+
|
| 386 |
+
def mpf_erfc(x, prec, rnd=round_fast):
|
| 387 |
+
sign, man, exp, bc = x
|
| 388 |
+
if not man:
|
| 389 |
+
if x == fzero: return fone
|
| 390 |
+
if x == finf: return fzero
|
| 391 |
+
if x == fninf: return ftwo
|
| 392 |
+
return fnan
|
| 393 |
+
wp = prec + 20
|
| 394 |
+
mag = bc+exp
|
| 395 |
+
# Preserve full accuracy when exponent grows huge
|
| 396 |
+
wp += max(0, 2*mag)
|
| 397 |
+
regular_erf = sign or mag < 2
|
| 398 |
+
if regular_erf or not erfc_check_series(x, wp):
|
| 399 |
+
if regular_erf:
|
| 400 |
+
return mpf_sub(fone, mpf_erf(x, prec+10, negative_rnd[rnd]), prec, rnd)
|
| 401 |
+
# 1-erf(x) ~ exp(-x^2), increase prec to deal with cancellation
|
| 402 |
+
n = to_int(x)+1
|
| 403 |
+
return mpf_sub(fone, mpf_erf(x, prec + int(n**2*1.44) + 10), prec, rnd)
|
| 404 |
+
s = term = MPZ_ONE << wp
|
| 405 |
+
term_prev = 0
|
| 406 |
+
t = (2 * to_fixed(x, wp) ** 2) >> wp
|
| 407 |
+
k = 1
|
| 408 |
+
while 1:
|
| 409 |
+
term = ((term * (2*k - 1)) << wp) // t
|
| 410 |
+
if k > 4 and term > term_prev or not term:
|
| 411 |
+
break
|
| 412 |
+
if k & 1:
|
| 413 |
+
s -= term
|
| 414 |
+
else:
|
| 415 |
+
s += term
|
| 416 |
+
term_prev = term
|
| 417 |
+
#print k, to_str(from_man_exp(term, -wp, 50), 10)
|
| 418 |
+
k += 1
|
| 419 |
+
s = (s << wp) // sqrt_fixed(pi_fixed(wp), wp)
|
| 420 |
+
s = from_man_exp(s, -wp, wp)
|
| 421 |
+
z = mpf_exp(mpf_neg(mpf_mul(x,x,wp),wp),wp)
|
| 422 |
+
y = mpf_div(mpf_mul(z, s, wp), x, prec, rnd)
|
| 423 |
+
return y
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
#-----------------------------------------------------------------------#
|
| 427 |
+
# #
|
| 428 |
+
# Exponential integrals #
|
| 429 |
+
# #
|
| 430 |
+
#-----------------------------------------------------------------------#
|
| 431 |
+
|
| 432 |
+
def ei_taylor(x, prec):
|
| 433 |
+
s = t = x
|
| 434 |
+
k = 2
|
| 435 |
+
while t:
|
| 436 |
+
t = ((t*x) >> prec) // k
|
| 437 |
+
s += t // k
|
| 438 |
+
k += 1
|
| 439 |
+
return s
|
| 440 |
+
|
| 441 |
+
def complex_ei_taylor(zre, zim, prec):
|
| 442 |
+
_abs = abs
|
| 443 |
+
sre = tre = zre
|
| 444 |
+
sim = tim = zim
|
| 445 |
+
k = 2
|
| 446 |
+
while _abs(tre) + _abs(tim) > 5:
|
| 447 |
+
tre, tim = ((tre*zre-tim*zim)//k)>>prec, ((tre*zim+tim*zre)//k)>>prec
|
| 448 |
+
sre += tre // k
|
| 449 |
+
sim += tim // k
|
| 450 |
+
k += 1
|
| 451 |
+
return sre, sim
|
| 452 |
+
|
| 453 |
+
def ei_asymptotic(x, prec):
|
| 454 |
+
one = MPZ_ONE << prec
|
| 455 |
+
x = t = ((one << prec) // x)
|
| 456 |
+
s = one + x
|
| 457 |
+
k = 2
|
| 458 |
+
while t:
|
| 459 |
+
t = (k*t*x) >> prec
|
| 460 |
+
s += t
|
| 461 |
+
k += 1
|
| 462 |
+
return s
|
| 463 |
+
|
| 464 |
+
def complex_ei_asymptotic(zre, zim, prec):
|
| 465 |
+
_abs = abs
|
| 466 |
+
one = MPZ_ONE << prec
|
| 467 |
+
M = (zim*zim + zre*zre) >> prec
|
| 468 |
+
# 1 / z
|
| 469 |
+
xre = tre = (zre << prec) // M
|
| 470 |
+
xim = tim = ((-zim) << prec) // M
|
| 471 |
+
sre = one + xre
|
| 472 |
+
sim = xim
|
| 473 |
+
k = 2
|
| 474 |
+
while _abs(tre) + _abs(tim) > 1000:
|
| 475 |
+
#print tre, tim
|
| 476 |
+
tre, tim = ((tre*xre-tim*xim)*k)>>prec, ((tre*xim+tim*xre)*k)>>prec
|
| 477 |
+
sre += tre
|
| 478 |
+
sim += tim
|
| 479 |
+
k += 1
|
| 480 |
+
if k > prec:
|
| 481 |
+
raise NoConvergence
|
| 482 |
+
return sre, sim
|
| 483 |
+
|
| 484 |
+
def mpf_ei(x, prec, rnd=round_fast, e1=False):
|
| 485 |
+
if e1:
|
| 486 |
+
x = mpf_neg(x)
|
| 487 |
+
sign, man, exp, bc = x
|
| 488 |
+
if e1 and not sign:
|
| 489 |
+
if x == fzero:
|
| 490 |
+
return finf
|
| 491 |
+
raise ComplexResult("E1(x) for x < 0")
|
| 492 |
+
if man:
|
| 493 |
+
xabs = 0, man, exp, bc
|
| 494 |
+
xmag = exp+bc
|
| 495 |
+
wp = prec + 20
|
| 496 |
+
can_use_asymp = xmag > wp
|
| 497 |
+
if not can_use_asymp:
|
| 498 |
+
if exp >= 0:
|
| 499 |
+
xabsint = man << exp
|
| 500 |
+
else:
|
| 501 |
+
xabsint = man >> (-exp)
|
| 502 |
+
can_use_asymp = xabsint > int(wp*0.693) + 10
|
| 503 |
+
if can_use_asymp:
|
| 504 |
+
if xmag > wp:
|
| 505 |
+
v = fone
|
| 506 |
+
else:
|
| 507 |
+
v = from_man_exp(ei_asymptotic(to_fixed(x, wp), wp), -wp)
|
| 508 |
+
v = mpf_mul(v, mpf_exp(x, wp), wp)
|
| 509 |
+
v = mpf_div(v, x, prec, rnd)
|
| 510 |
+
else:
|
| 511 |
+
wp += 2*int(to_int(xabs))
|
| 512 |
+
u = to_fixed(x, wp)
|
| 513 |
+
v = ei_taylor(u, wp) + euler_fixed(wp)
|
| 514 |
+
t1 = from_man_exp(v,-wp)
|
| 515 |
+
t2 = mpf_log(xabs,wp)
|
| 516 |
+
v = mpf_add(t1, t2, prec, rnd)
|
| 517 |
+
else:
|
| 518 |
+
if x == fzero: v = fninf
|
| 519 |
+
elif x == finf: v = finf
|
| 520 |
+
elif x == fninf: v = fzero
|
| 521 |
+
else: v = fnan
|
| 522 |
+
if e1:
|
| 523 |
+
v = mpf_neg(v)
|
| 524 |
+
return v
|
| 525 |
+
|
| 526 |
+
def mpc_ei(z, prec, rnd=round_fast, e1=False):
|
| 527 |
+
if e1:
|
| 528 |
+
z = mpc_neg(z)
|
| 529 |
+
a, b = z
|
| 530 |
+
asign, aman, aexp, abc = a
|
| 531 |
+
bsign, bman, bexp, bbc = b
|
| 532 |
+
if b == fzero:
|
| 533 |
+
if e1:
|
| 534 |
+
x = mpf_neg(mpf_ei(a, prec, rnd))
|
| 535 |
+
if not asign:
|
| 536 |
+
y = mpf_neg(mpf_pi(prec, rnd))
|
| 537 |
+
else:
|
| 538 |
+
y = fzero
|
| 539 |
+
return x, y
|
| 540 |
+
else:
|
| 541 |
+
return mpf_ei(a, prec, rnd), fzero
|
| 542 |
+
if a != fzero:
|
| 543 |
+
if not aman or not bman:
|
| 544 |
+
return (fnan, fnan)
|
| 545 |
+
wp = prec + 40
|
| 546 |
+
amag = aexp+abc
|
| 547 |
+
bmag = bexp+bbc
|
| 548 |
+
zmag = max(amag, bmag)
|
| 549 |
+
can_use_asymp = zmag > wp
|
| 550 |
+
if not can_use_asymp:
|
| 551 |
+
zabsint = abs(to_int(a)) + abs(to_int(b))
|
| 552 |
+
can_use_asymp = zabsint > int(wp*0.693) + 20
|
| 553 |
+
try:
|
| 554 |
+
if can_use_asymp:
|
| 555 |
+
if zmag > wp:
|
| 556 |
+
v = fone, fzero
|
| 557 |
+
else:
|
| 558 |
+
zre = to_fixed(a, wp)
|
| 559 |
+
zim = to_fixed(b, wp)
|
| 560 |
+
vre, vim = complex_ei_asymptotic(zre, zim, wp)
|
| 561 |
+
v = from_man_exp(vre, -wp), from_man_exp(vim, -wp)
|
| 562 |
+
v = mpc_mul(v, mpc_exp(z, wp), wp)
|
| 563 |
+
v = mpc_div(v, z, wp)
|
| 564 |
+
if e1:
|
| 565 |
+
v = mpc_neg(v, prec, rnd)
|
| 566 |
+
else:
|
| 567 |
+
x, y = v
|
| 568 |
+
if bsign:
|
| 569 |
+
v = mpf_pos(x, prec, rnd), mpf_sub(y, mpf_pi(wp), prec, rnd)
|
| 570 |
+
else:
|
| 571 |
+
v = mpf_pos(x, prec, rnd), mpf_add(y, mpf_pi(wp), prec, rnd)
|
| 572 |
+
return v
|
| 573 |
+
except NoConvergence:
|
| 574 |
+
pass
|
| 575 |
+
#wp += 2*max(0,zmag)
|
| 576 |
+
wp += 2*int(to_int(mpc_abs(z, 5)))
|
| 577 |
+
zre = to_fixed(a, wp)
|
| 578 |
+
zim = to_fixed(b, wp)
|
| 579 |
+
vre, vim = complex_ei_taylor(zre, zim, wp)
|
| 580 |
+
vre += euler_fixed(wp)
|
| 581 |
+
v = from_man_exp(vre,-wp), from_man_exp(vim,-wp)
|
| 582 |
+
if e1:
|
| 583 |
+
u = mpc_log(mpc_neg(z),wp)
|
| 584 |
+
else:
|
| 585 |
+
u = mpc_log(z,wp)
|
| 586 |
+
v = mpc_add(v, u, prec, rnd)
|
| 587 |
+
if e1:
|
| 588 |
+
v = mpc_neg(v)
|
| 589 |
+
return v
|
| 590 |
+
|
| 591 |
+
def mpf_e1(x, prec, rnd=round_fast):
|
| 592 |
+
return mpf_ei(x, prec, rnd, True)
|
| 593 |
+
|
| 594 |
+
def mpc_e1(x, prec, rnd=round_fast):
|
| 595 |
+
return mpc_ei(x, prec, rnd, True)
|
| 596 |
+
|
| 597 |
+
def mpf_expint(n, x, prec, rnd=round_fast, gamma=False):
|
| 598 |
+
"""
|
| 599 |
+
E_n(x), n an integer, x real
|
| 600 |
+
|
| 601 |
+
With gamma=True, computes Gamma(n,x) (upper incomplete gamma function)
|
| 602 |
+
|
| 603 |
+
Returns (real, None) if real, otherwise (real, imag)
|
| 604 |
+
The imaginary part is an optional branch cut term
|
| 605 |
+
|
| 606 |
+
"""
|
| 607 |
+
sign, man, exp, bc = x
|
| 608 |
+
if not man:
|
| 609 |
+
if gamma:
|
| 610 |
+
if x == fzero:
|
| 611 |
+
# Actually gamma function pole
|
| 612 |
+
if n <= 0:
|
| 613 |
+
return finf, None
|
| 614 |
+
return mpf_gamma_int(n, prec, rnd), None
|
| 615 |
+
if x == finf:
|
| 616 |
+
return fzero, None
|
| 617 |
+
# TODO: could return finite imaginary value at -inf
|
| 618 |
+
return fnan, fnan
|
| 619 |
+
else:
|
| 620 |
+
if x == fzero:
|
| 621 |
+
if n > 1:
|
| 622 |
+
return from_rational(1, n-1, prec, rnd), None
|
| 623 |
+
else:
|
| 624 |
+
return finf, None
|
| 625 |
+
if x == finf:
|
| 626 |
+
return fzero, None
|
| 627 |
+
return fnan, fnan
|
| 628 |
+
n_orig = n
|
| 629 |
+
if gamma:
|
| 630 |
+
n = 1-n
|
| 631 |
+
wp = prec + 20
|
| 632 |
+
xmag = exp + bc
|
| 633 |
+
# Beware of near-poles
|
| 634 |
+
if xmag < -10:
|
| 635 |
+
raise NotImplementedError
|
| 636 |
+
nmag = bitcount(abs(n))
|
| 637 |
+
have_imag = n > 0 and sign
|
| 638 |
+
negx = mpf_neg(x)
|
| 639 |
+
# Skip series if direct convergence
|
| 640 |
+
if n == 0 or 2*nmag - xmag < -wp:
|
| 641 |
+
if gamma:
|
| 642 |
+
v = mpf_exp(negx, wp)
|
| 643 |
+
re = mpf_mul(v, mpf_pow_int(x, n_orig-1, wp), prec, rnd)
|
| 644 |
+
else:
|
| 645 |
+
v = mpf_exp(negx, wp)
|
| 646 |
+
re = mpf_div(v, x, prec, rnd)
|
| 647 |
+
else:
|
| 648 |
+
# Finite number of terms, or...
|
| 649 |
+
can_use_asymptotic_series = -3*wp < n <= 0
|
| 650 |
+
# ...large enough?
|
| 651 |
+
if not can_use_asymptotic_series:
|
| 652 |
+
xi = abs(to_int(x))
|
| 653 |
+
m = min(max(1, xi-n), 2*wp)
|
| 654 |
+
siz = -n*nmag + (m+n)*bitcount(abs(m+n)) - m*xmag - (144*m//100)
|
| 655 |
+
tol = -wp-10
|
| 656 |
+
can_use_asymptotic_series = siz < tol
|
| 657 |
+
if can_use_asymptotic_series:
|
| 658 |
+
r = ((-MPZ_ONE) << (wp+wp)) // to_fixed(x, wp)
|
| 659 |
+
m = n
|
| 660 |
+
t = r*m
|
| 661 |
+
s = MPZ_ONE << wp
|
| 662 |
+
while m and t:
|
| 663 |
+
s += t
|
| 664 |
+
m += 1
|
| 665 |
+
t = (m*r*t) >> wp
|
| 666 |
+
v = mpf_exp(negx, wp)
|
| 667 |
+
if gamma:
|
| 668 |
+
# ~ exp(-x) * x^(n-1) * (1 + ...)
|
| 669 |
+
v = mpf_mul(v, mpf_pow_int(x, n_orig-1, wp), wp)
|
| 670 |
+
else:
|
| 671 |
+
# ~ exp(-x)/x * (1 + ...)
|
| 672 |
+
v = mpf_div(v, x, wp)
|
| 673 |
+
re = mpf_mul(v, from_man_exp(s, -wp), prec, rnd)
|
| 674 |
+
elif n == 1:
|
| 675 |
+
re = mpf_neg(mpf_ei(negx, prec, rnd))
|
| 676 |
+
elif n > 0 and n < 3*wp:
|
| 677 |
+
T1 = mpf_neg(mpf_ei(negx, wp))
|
| 678 |
+
if gamma:
|
| 679 |
+
if n_orig & 1:
|
| 680 |
+
T1 = mpf_neg(T1)
|
| 681 |
+
else:
|
| 682 |
+
T1 = mpf_mul(T1, mpf_pow_int(negx, n-1, wp), wp)
|
| 683 |
+
r = t = to_fixed(x, wp)
|
| 684 |
+
facs = [1] * (n-1)
|
| 685 |
+
for k in range(1,n-1):
|
| 686 |
+
facs[k] = facs[k-1] * k
|
| 687 |
+
facs = facs[::-1]
|
| 688 |
+
s = facs[0] << wp
|
| 689 |
+
for k in range(1, n-1):
|
| 690 |
+
if k & 1:
|
| 691 |
+
s -= facs[k] * t
|
| 692 |
+
else:
|
| 693 |
+
s += facs[k] * t
|
| 694 |
+
t = (t*r) >> wp
|
| 695 |
+
T2 = from_man_exp(s, -wp, wp)
|
| 696 |
+
T2 = mpf_mul(T2, mpf_exp(negx, wp))
|
| 697 |
+
if gamma:
|
| 698 |
+
T2 = mpf_mul(T2, mpf_pow_int(x, n_orig, wp), wp)
|
| 699 |
+
R = mpf_add(T1, T2)
|
| 700 |
+
re = mpf_div(R, from_int(ifac(n-1)), prec, rnd)
|
| 701 |
+
else:
|
| 702 |
+
raise NotImplementedError
|
| 703 |
+
if have_imag:
|
| 704 |
+
M = from_int(-ifac(n-1))
|
| 705 |
+
if gamma:
|
| 706 |
+
im = mpf_div(mpf_pi(wp), M, prec, rnd)
|
| 707 |
+
if n_orig & 1:
|
| 708 |
+
im = mpf_neg(im)
|
| 709 |
+
else:
|
| 710 |
+
im = mpf_div(mpf_mul(mpf_pi(wp), mpf_pow_int(negx, n_orig-1, wp), wp), M, prec, rnd)
|
| 711 |
+
return re, im
|
| 712 |
+
else:
|
| 713 |
+
return re, None
|
| 714 |
+
|
| 715 |
+
def mpf_ci_si_taylor(x, wp, which=0):
|
| 716 |
+
"""
|
| 717 |
+
0 - Ci(x) - (euler+log(x))
|
| 718 |
+
1 - Si(x)
|
| 719 |
+
"""
|
| 720 |
+
x = to_fixed(x, wp)
|
| 721 |
+
x2 = -(x*x) >> wp
|
| 722 |
+
if which == 0:
|
| 723 |
+
s, t, k = 0, (MPZ_ONE<<wp), 2
|
| 724 |
+
else:
|
| 725 |
+
s, t, k = x, x, 3
|
| 726 |
+
while t:
|
| 727 |
+
t = (t*x2//(k*(k-1)))>>wp
|
| 728 |
+
s += t//k
|
| 729 |
+
k += 2
|
| 730 |
+
return from_man_exp(s, -wp)
|
| 731 |
+
|
| 732 |
+
def mpc_ci_si_taylor(re, im, wp, which=0):
|
| 733 |
+
# The following code is only designed for small arguments,
|
| 734 |
+
# and not too small arguments (for relative accuracy)
|
| 735 |
+
if re[1]:
|
| 736 |
+
mag = re[2]+re[3]
|
| 737 |
+
elif im[1]:
|
| 738 |
+
mag = im[2]+im[3]
|
| 739 |
+
if im[1]:
|
| 740 |
+
mag = max(mag, im[2]+im[3])
|
| 741 |
+
if mag > 2 or mag < -wp:
|
| 742 |
+
raise NotImplementedError
|
| 743 |
+
wp += (2-mag)
|
| 744 |
+
zre = to_fixed(re, wp)
|
| 745 |
+
zim = to_fixed(im, wp)
|
| 746 |
+
z2re = (zim*zim-zre*zre)>>wp
|
| 747 |
+
z2im = (-2*zre*zim)>>wp
|
| 748 |
+
tre = zre
|
| 749 |
+
tim = zim
|
| 750 |
+
one = MPZ_ONE<<wp
|
| 751 |
+
if which == 0:
|
| 752 |
+
sre, sim, tre, tim, k = 0, 0, (MPZ_ONE<<wp), 0, 2
|
| 753 |
+
else:
|
| 754 |
+
sre, sim, tre, tim, k = zre, zim, zre, zim, 3
|
| 755 |
+
while max(abs(tre), abs(tim)) > 2:
|
| 756 |
+
f = k*(k-1)
|
| 757 |
+
tre, tim = ((tre*z2re-tim*z2im)//f)>>wp, ((tre*z2im+tim*z2re)//f)>>wp
|
| 758 |
+
sre += tre//k
|
| 759 |
+
sim += tim//k
|
| 760 |
+
k += 2
|
| 761 |
+
return from_man_exp(sre, -wp), from_man_exp(sim, -wp)
|
| 762 |
+
|
| 763 |
+
def mpf_ci_si(x, prec, rnd=round_fast, which=2):
|
| 764 |
+
"""
|
| 765 |
+
Calculation of Ci(x), Si(x) for real x.
|
| 766 |
+
|
| 767 |
+
which = 0 -- returns (Ci(x), -)
|
| 768 |
+
which = 1 -- returns (Si(x), -)
|
| 769 |
+
which = 2 -- returns (Ci(x), Si(x))
|
| 770 |
+
|
| 771 |
+
Note: if x < 0, Ci(x) needs an additional imaginary term, pi*i.
|
| 772 |
+
"""
|
| 773 |
+
wp = prec + 20
|
| 774 |
+
sign, man, exp, bc = x
|
| 775 |
+
ci, si = None, None
|
| 776 |
+
if not man:
|
| 777 |
+
if x == fzero:
|
| 778 |
+
return (fninf, fzero)
|
| 779 |
+
if x == fnan:
|
| 780 |
+
return (x, x)
|
| 781 |
+
ci = fzero
|
| 782 |
+
if which != 0:
|
| 783 |
+
if x == finf:
|
| 784 |
+
si = mpf_shift(mpf_pi(prec, rnd), -1)
|
| 785 |
+
if x == fninf:
|
| 786 |
+
si = mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
|
| 787 |
+
return (ci, si)
|
| 788 |
+
# For small x: Ci(x) ~ euler + log(x), Si(x) ~ x
|
| 789 |
+
mag = exp+bc
|
| 790 |
+
if mag < -wp:
|
| 791 |
+
if which != 0:
|
| 792 |
+
si = mpf_perturb(x, 1-sign, prec, rnd)
|
| 793 |
+
if which != 1:
|
| 794 |
+
y = mpf_euler(wp)
|
| 795 |
+
xabs = mpf_abs(x)
|
| 796 |
+
ci = mpf_add(y, mpf_log(xabs, wp), prec, rnd)
|
| 797 |
+
return ci, si
|
| 798 |
+
# For huge x: Ci(x) ~ sin(x)/x, Si(x) ~ pi/2
|
| 799 |
+
elif mag > wp:
|
| 800 |
+
if which != 0:
|
| 801 |
+
if sign:
|
| 802 |
+
si = mpf_neg(mpf_pi(prec, negative_rnd[rnd]))
|
| 803 |
+
else:
|
| 804 |
+
si = mpf_pi(prec, rnd)
|
| 805 |
+
si = mpf_shift(si, -1)
|
| 806 |
+
if which != 1:
|
| 807 |
+
ci = mpf_div(mpf_sin(x, wp), x, prec, rnd)
|
| 808 |
+
return ci, si
|
| 809 |
+
else:
|
| 810 |
+
wp += abs(mag)
|
| 811 |
+
# Use an asymptotic series? The smallest value of n!/x^n
|
| 812 |
+
# occurs for n ~ x, where the magnitude is ~ exp(-x).
|
| 813 |
+
asymptotic = mag-1 > math.log(wp, 2)
|
| 814 |
+
# Case 1: convergent series near 0
|
| 815 |
+
if not asymptotic:
|
| 816 |
+
if which != 0:
|
| 817 |
+
si = mpf_pos(mpf_ci_si_taylor(x, wp, 1), prec, rnd)
|
| 818 |
+
if which != 1:
|
| 819 |
+
ci = mpf_ci_si_taylor(x, wp, 0)
|
| 820 |
+
ci = mpf_add(ci, mpf_euler(wp), wp)
|
| 821 |
+
ci = mpf_add(ci, mpf_log(mpf_abs(x), wp), prec, rnd)
|
| 822 |
+
return ci, si
|
| 823 |
+
x = mpf_abs(x)
|
| 824 |
+
# Case 2: asymptotic series for x >> 1
|
| 825 |
+
xf = to_fixed(x, wp)
|
| 826 |
+
xr = (MPZ_ONE<<(2*wp)) // xf # 1/x
|
| 827 |
+
s1 = (MPZ_ONE << wp)
|
| 828 |
+
s2 = xr
|
| 829 |
+
t = xr
|
| 830 |
+
k = 2
|
| 831 |
+
while t:
|
| 832 |
+
t = -t
|
| 833 |
+
t = (t*xr*k)>>wp
|
| 834 |
+
k += 1
|
| 835 |
+
s1 += t
|
| 836 |
+
t = (t*xr*k)>>wp
|
| 837 |
+
k += 1
|
| 838 |
+
s2 += t
|
| 839 |
+
s1 = from_man_exp(s1, -wp)
|
| 840 |
+
s2 = from_man_exp(s2, -wp)
|
| 841 |
+
s1 = mpf_div(s1, x, wp)
|
| 842 |
+
s2 = mpf_div(s2, x, wp)
|
| 843 |
+
cos, sin = mpf_cos_sin(x, wp)
|
| 844 |
+
# Ci(x) = sin(x)*s1-cos(x)*s2
|
| 845 |
+
# Si(x) = pi/2-cos(x)*s1-sin(x)*s2
|
| 846 |
+
if which != 0:
|
| 847 |
+
si = mpf_add(mpf_mul(cos, s1), mpf_mul(sin, s2), wp)
|
| 848 |
+
si = mpf_sub(mpf_shift(mpf_pi(wp), -1), si, wp)
|
| 849 |
+
if sign:
|
| 850 |
+
si = mpf_neg(si)
|
| 851 |
+
si = mpf_pos(si, prec, rnd)
|
| 852 |
+
if which != 1:
|
| 853 |
+
ci = mpf_sub(mpf_mul(sin, s1), mpf_mul(cos, s2), prec, rnd)
|
| 854 |
+
return ci, si
|
| 855 |
+
|
| 856 |
+
def mpf_ci(x, prec, rnd=round_fast):
|
| 857 |
+
if mpf_sign(x) < 0:
|
| 858 |
+
raise ComplexResult
|
| 859 |
+
return mpf_ci_si(x, prec, rnd, 0)[0]
|
| 860 |
+
|
| 861 |
+
def mpf_si(x, prec, rnd=round_fast):
|
| 862 |
+
return mpf_ci_si(x, prec, rnd, 1)[1]
|
| 863 |
+
|
| 864 |
+
def mpc_ci(z, prec, rnd=round_fast):
|
| 865 |
+
re, im = z
|
| 866 |
+
if im == fzero:
|
| 867 |
+
ci = mpf_ci_si(re, prec, rnd, 0)[0]
|
| 868 |
+
if mpf_sign(re) < 0:
|
| 869 |
+
return (ci, mpf_pi(prec, rnd))
|
| 870 |
+
return (ci, fzero)
|
| 871 |
+
wp = prec + 20
|
| 872 |
+
cre, cim = mpc_ci_si_taylor(re, im, wp, 0)
|
| 873 |
+
cre = mpf_add(cre, mpf_euler(wp), wp)
|
| 874 |
+
ci = mpc_add((cre, cim), mpc_log(z, wp), prec, rnd)
|
| 875 |
+
return ci
|
| 876 |
+
|
| 877 |
+
def mpc_si(z, prec, rnd=round_fast):
|
| 878 |
+
re, im = z
|
| 879 |
+
if im == fzero:
|
| 880 |
+
return (mpf_ci_si(re, prec, rnd, 1)[1], fzero)
|
| 881 |
+
wp = prec + 20
|
| 882 |
+
z = mpc_ci_si_taylor(re, im, wp, 1)
|
| 883 |
+
return mpc_pos(z, prec, rnd)
|
| 884 |
+
|
| 885 |
+
|
| 886 |
+
#-----------------------------------------------------------------------#
|
| 887 |
+
# #
|
| 888 |
+
# Bessel functions #
|
| 889 |
+
# #
|
| 890 |
+
#-----------------------------------------------------------------------#
|
| 891 |
+
|
| 892 |
+
# A Bessel function of the first kind of integer order, J_n(x), is
|
| 893 |
+
# given by the power series
|
| 894 |
+
|
| 895 |
+
# oo
|
| 896 |
+
# ___ k 2 k + n
|
| 897 |
+
# \ (-1) / x \
|
| 898 |
+
# J_n(x) = ) ----------- | - |
|
| 899 |
+
# /___ k! (k + n)! \ 2 /
|
| 900 |
+
# k = 0
|
| 901 |
+
|
| 902 |
+
# Simplifying the quotient between two successive terms gives the
|
| 903 |
+
# ratio x^2 / (-4*k*(k+n)). Hence, we only need one full-precision
|
| 904 |
+
# multiplication and one division by a small integer per term.
|
| 905 |
+
# The complex version is very similar, the only difference being
|
| 906 |
+
# that the multiplication is actually 4 multiplies.
|
| 907 |
+
|
| 908 |
+
# In the general case, we have
|
| 909 |
+
# J_v(x) = (x/2)**v / v! * 0F1(v+1, (-1/4)*z**2)
|
| 910 |
+
|
| 911 |
+
# TODO: for extremely large x, we could use an asymptotic
|
| 912 |
+
# trigonometric approximation.
|
| 913 |
+
|
| 914 |
+
# TODO: recompute at higher precision if the fixed-point mantissa
|
| 915 |
+
# is very small
|
| 916 |
+
|
| 917 |
+
def mpf_besseljn(n, x, prec, rounding=round_fast):
|
| 918 |
+
prec += 50
|
| 919 |
+
negate = n < 0 and n & 1
|
| 920 |
+
mag = x[2]+x[3]
|
| 921 |
+
n = abs(n)
|
| 922 |
+
wp = prec + 20 + n*bitcount(n)
|
| 923 |
+
if mag < 0:
|
| 924 |
+
wp -= n * mag
|
| 925 |
+
x = to_fixed(x, wp)
|
| 926 |
+
x2 = (x**2) >> wp
|
| 927 |
+
if not n:
|
| 928 |
+
s = t = MPZ_ONE << wp
|
| 929 |
+
else:
|
| 930 |
+
s = t = (x**n // ifac(n)) >> ((n-1)*wp + n)
|
| 931 |
+
k = 1
|
| 932 |
+
while t:
|
| 933 |
+
t = ((t * x2) // (-4*k*(k+n))) >> wp
|
| 934 |
+
s += t
|
| 935 |
+
k += 1
|
| 936 |
+
if negate:
|
| 937 |
+
s = -s
|
| 938 |
+
return from_man_exp(s, -wp, prec, rounding)
|
| 939 |
+
|
| 940 |
+
def mpc_besseljn(n, z, prec, rounding=round_fast):
|
| 941 |
+
negate = n < 0 and n & 1
|
| 942 |
+
n = abs(n)
|
| 943 |
+
origprec = prec
|
| 944 |
+
zre, zim = z
|
| 945 |
+
mag = max(zre[2]+zre[3], zim[2]+zim[3])
|
| 946 |
+
prec += 20 + n*bitcount(n) + abs(mag)
|
| 947 |
+
if mag < 0:
|
| 948 |
+
prec -= n * mag
|
| 949 |
+
zre = to_fixed(zre, prec)
|
| 950 |
+
zim = to_fixed(zim, prec)
|
| 951 |
+
z2re = (zre**2 - zim**2) >> prec
|
| 952 |
+
z2im = (zre*zim) >> (prec-1)
|
| 953 |
+
if not n:
|
| 954 |
+
sre = tre = MPZ_ONE << prec
|
| 955 |
+
sim = tim = MPZ_ZERO
|
| 956 |
+
else:
|
| 957 |
+
re, im = complex_int_pow(zre, zim, n)
|
| 958 |
+
sre = tre = (re // ifac(n)) >> ((n-1)*prec + n)
|
| 959 |
+
sim = tim = (im // ifac(n)) >> ((n-1)*prec + n)
|
| 960 |
+
k = 1
|
| 961 |
+
while abs(tre) + abs(tim) > 3:
|
| 962 |
+
p = -4*k*(k+n)
|
| 963 |
+
tre, tim = tre*z2re - tim*z2im, tim*z2re + tre*z2im
|
| 964 |
+
tre = (tre // p) >> prec
|
| 965 |
+
tim = (tim // p) >> prec
|
| 966 |
+
sre += tre
|
| 967 |
+
sim += tim
|
| 968 |
+
k += 1
|
| 969 |
+
if negate:
|
| 970 |
+
sre = -sre
|
| 971 |
+
sim = -sim
|
| 972 |
+
re = from_man_exp(sre, -prec, origprec, rounding)
|
| 973 |
+
im = from_man_exp(sim, -prec, origprec, rounding)
|
| 974 |
+
return (re, im)
|
| 975 |
+
|
| 976 |
+
def mpf_agm(a, b, prec, rnd=round_fast):
|
| 977 |
+
"""
|
| 978 |
+
Computes the arithmetic-geometric mean agm(a,b) for
|
| 979 |
+
nonnegative mpf values a, b.
|
| 980 |
+
"""
|
| 981 |
+
asign, aman, aexp, abc = a
|
| 982 |
+
bsign, bman, bexp, bbc = b
|
| 983 |
+
if asign or bsign:
|
| 984 |
+
raise ComplexResult("agm of a negative number")
|
| 985 |
+
# Handle inf, nan or zero in either operand
|
| 986 |
+
if not (aman and bman):
|
| 987 |
+
if a == fnan or b == fnan:
|
| 988 |
+
return fnan
|
| 989 |
+
if a == finf:
|
| 990 |
+
if b == fzero:
|
| 991 |
+
return fnan
|
| 992 |
+
return finf
|
| 993 |
+
if b == finf:
|
| 994 |
+
if a == fzero:
|
| 995 |
+
return fnan
|
| 996 |
+
return finf
|
| 997 |
+
# agm(0,x) = agm(x,0) = 0
|
| 998 |
+
return fzero
|
| 999 |
+
wp = prec + 20
|
| 1000 |
+
amag = aexp+abc
|
| 1001 |
+
bmag = bexp+bbc
|
| 1002 |
+
mag_delta = amag - bmag
|
| 1003 |
+
# Reduce to roughly the same magnitude using floating-point AGM
|
| 1004 |
+
abs_mag_delta = abs(mag_delta)
|
| 1005 |
+
if abs_mag_delta > 10:
|
| 1006 |
+
while abs_mag_delta > 10:
|
| 1007 |
+
a, b = mpf_shift(mpf_add(a,b,wp),-1), \
|
| 1008 |
+
mpf_sqrt(mpf_mul(a,b,wp),wp)
|
| 1009 |
+
abs_mag_delta //= 2
|
| 1010 |
+
asign, aman, aexp, abc = a
|
| 1011 |
+
bsign, bman, bexp, bbc = b
|
| 1012 |
+
amag = aexp+abc
|
| 1013 |
+
bmag = bexp+bbc
|
| 1014 |
+
mag_delta = amag - bmag
|
| 1015 |
+
#print to_float(a), to_float(b)
|
| 1016 |
+
# Use agm(a,b) = agm(x*a,x*b)/x to obtain a, b ~= 1
|
| 1017 |
+
min_mag = min(amag,bmag)
|
| 1018 |
+
max_mag = max(amag,bmag)
|
| 1019 |
+
n = 0
|
| 1020 |
+
# If too small, we lose precision when going to fixed-point
|
| 1021 |
+
if min_mag < -8:
|
| 1022 |
+
n = -min_mag
|
| 1023 |
+
# If too large, we waste time using fixed-point with large numbers
|
| 1024 |
+
elif max_mag > 20:
|
| 1025 |
+
n = -max_mag
|
| 1026 |
+
if n:
|
| 1027 |
+
a = mpf_shift(a, n)
|
| 1028 |
+
b = mpf_shift(b, n)
|
| 1029 |
+
#print to_float(a), to_float(b)
|
| 1030 |
+
af = to_fixed(a, wp)
|
| 1031 |
+
bf = to_fixed(b, wp)
|
| 1032 |
+
g = agm_fixed(af, bf, wp)
|
| 1033 |
+
return from_man_exp(g, -wp-n, prec, rnd)
|
| 1034 |
+
|
| 1035 |
+
def mpf_agm1(a, prec, rnd=round_fast):
|
| 1036 |
+
"""
|
| 1037 |
+
Computes the arithmetic-geometric mean agm(1,a) for a nonnegative
|
| 1038 |
+
mpf value a.
|
| 1039 |
+
"""
|
| 1040 |
+
return mpf_agm(fone, a, prec, rnd)
|
| 1041 |
+
|
| 1042 |
+
def mpc_agm(a, b, prec, rnd=round_fast):
|
| 1043 |
+
"""
|
| 1044 |
+
Complex AGM.
|
| 1045 |
+
|
| 1046 |
+
TODO:
|
| 1047 |
+
* check that convergence works as intended
|
| 1048 |
+
* optimize
|
| 1049 |
+
* select a nonarbitrary branch
|
| 1050 |
+
"""
|
| 1051 |
+
if mpc_is_infnan(a) or mpc_is_infnan(b):
|
| 1052 |
+
return fnan, fnan
|
| 1053 |
+
if mpc_zero in (a, b):
|
| 1054 |
+
return fzero, fzero
|
| 1055 |
+
if mpc_neg(a) == b:
|
| 1056 |
+
return fzero, fzero
|
| 1057 |
+
wp = prec+20
|
| 1058 |
+
eps = mpf_shift(fone, -wp+10)
|
| 1059 |
+
while 1:
|
| 1060 |
+
a1 = mpc_shift(mpc_add(a, b, wp), -1)
|
| 1061 |
+
b1 = mpc_sqrt(mpc_mul(a, b, wp), wp)
|
| 1062 |
+
a, b = a1, b1
|
| 1063 |
+
size = mpf_min_max([mpc_abs(a,10), mpc_abs(b,10)])[1]
|
| 1064 |
+
err = mpc_abs(mpc_sub(a, b, 10), 10)
|
| 1065 |
+
if size == fzero or mpf_lt(err, mpf_mul(eps, size)):
|
| 1066 |
+
return a
|
| 1067 |
+
|
| 1068 |
+
def mpc_agm1(a, prec, rnd=round_fast):
|
| 1069 |
+
return mpc_agm(mpc_one, a, prec, rnd)
|
| 1070 |
+
|
| 1071 |
+
def mpf_ellipk(x, prec, rnd=round_fast):
|
| 1072 |
+
if not x[1]:
|
| 1073 |
+
if x == fzero:
|
| 1074 |
+
return mpf_shift(mpf_pi(prec, rnd), -1)
|
| 1075 |
+
if x == fninf:
|
| 1076 |
+
return fzero
|
| 1077 |
+
if x == fnan:
|
| 1078 |
+
return x
|
| 1079 |
+
if x == fone:
|
| 1080 |
+
return finf
|
| 1081 |
+
# TODO: for |x| << 1/2, one could use fall back to
|
| 1082 |
+
# pi/2 * hyp2f1_rat((1,2),(1,2),(1,1), x)
|
| 1083 |
+
wp = prec + 15
|
| 1084 |
+
# Use K(x) = pi/2/agm(1,a) where a = sqrt(1-x)
|
| 1085 |
+
# The sqrt raises ComplexResult if x > 0
|
| 1086 |
+
a = mpf_sqrt(mpf_sub(fone, x, wp), wp)
|
| 1087 |
+
v = mpf_agm1(a, wp)
|
| 1088 |
+
r = mpf_div(mpf_pi(wp), v, prec, rnd)
|
| 1089 |
+
return mpf_shift(r, -1)
|
| 1090 |
+
|
| 1091 |
+
def mpc_ellipk(z, prec, rnd=round_fast):
|
| 1092 |
+
re, im = z
|
| 1093 |
+
if im == fzero:
|
| 1094 |
+
if re == finf:
|
| 1095 |
+
return mpc_zero
|
| 1096 |
+
if mpf_le(re, fone):
|
| 1097 |
+
return mpf_ellipk(re, prec, rnd), fzero
|
| 1098 |
+
wp = prec + 15
|
| 1099 |
+
a = mpc_sqrt(mpc_sub(mpc_one, z, wp), wp)
|
| 1100 |
+
v = mpc_agm1(a, wp)
|
| 1101 |
+
r = mpc_mpf_div(mpf_pi(wp), v, prec, rnd)
|
| 1102 |
+
return mpc_shift(r, -1)
|
| 1103 |
+
|
| 1104 |
+
def mpf_ellipe(x, prec, rnd=round_fast):
|
| 1105 |
+
# http://functions.wolfram.com/EllipticIntegrals/
|
| 1106 |
+
# EllipticK/20/01/0001/
|
| 1107 |
+
# E = (1-m)*(K'(m)*2*m + K(m))
|
| 1108 |
+
sign, man, exp, bc = x
|
| 1109 |
+
if not man:
|
| 1110 |
+
if x == fzero:
|
| 1111 |
+
return mpf_shift(mpf_pi(prec, rnd), -1)
|
| 1112 |
+
if x == fninf:
|
| 1113 |
+
return finf
|
| 1114 |
+
if x == fnan:
|
| 1115 |
+
return x
|
| 1116 |
+
if x == finf:
|
| 1117 |
+
raise ComplexResult
|
| 1118 |
+
if x == fone:
|
| 1119 |
+
return fone
|
| 1120 |
+
wp = prec+20
|
| 1121 |
+
mag = exp+bc
|
| 1122 |
+
if mag < -wp:
|
| 1123 |
+
return mpf_shift(mpf_pi(prec, rnd), -1)
|
| 1124 |
+
# Compute a finite difference for K'
|
| 1125 |
+
p = max(mag, 0) - wp
|
| 1126 |
+
h = mpf_shift(fone, p)
|
| 1127 |
+
K = mpf_ellipk(x, 2*wp)
|
| 1128 |
+
Kh = mpf_ellipk(mpf_sub(x, h), 2*wp)
|
| 1129 |
+
Kdiff = mpf_shift(mpf_sub(K, Kh), -p)
|
| 1130 |
+
t = mpf_sub(fone, x)
|
| 1131 |
+
b = mpf_mul(Kdiff, mpf_shift(x,1), wp)
|
| 1132 |
+
return mpf_mul(t, mpf_add(K, b), prec, rnd)
|
| 1133 |
+
|
| 1134 |
+
def mpc_ellipe(z, prec, rnd=round_fast):
|
| 1135 |
+
re, im = z
|
| 1136 |
+
if im == fzero:
|
| 1137 |
+
if re == finf:
|
| 1138 |
+
return (fzero, finf)
|
| 1139 |
+
if mpf_le(re, fone):
|
| 1140 |
+
return mpf_ellipe(re, prec, rnd), fzero
|
| 1141 |
+
wp = prec + 15
|
| 1142 |
+
mag = mpc_abs(z, 1)
|
| 1143 |
+
p = max(mag[2]+mag[3], 0) - wp
|
| 1144 |
+
h = mpf_shift(fone, p)
|
| 1145 |
+
K = mpc_ellipk(z, 2*wp)
|
| 1146 |
+
Kh = mpc_ellipk(mpc_add_mpf(z, h, 2*wp), 2*wp)
|
| 1147 |
+
Kdiff = mpc_shift(mpc_sub(Kh, K, wp), -p)
|
| 1148 |
+
t = mpc_sub(mpc_one, z, wp)
|
| 1149 |
+
b = mpc_mul(Kdiff, mpc_shift(z,1), wp)
|
| 1150 |
+
return mpc_mul(t, mpc_add(K, b, wp), prec, rnd)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libintmath.py
ADDED
|
@@ -0,0 +1,584 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for integer math.
|
| 3 |
+
|
| 4 |
+
TODO: rename, cleanup, perhaps move the gmpy wrapper code
|
| 5 |
+
here from settings.py
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import math
|
| 10 |
+
from bisect import bisect
|
| 11 |
+
|
| 12 |
+
from .backend import xrange
|
| 13 |
+
from .backend import BACKEND, gmpy, sage, sage_utils, MPZ, MPZ_ONE, MPZ_ZERO
|
| 14 |
+
|
| 15 |
+
small_trailing = [0] * 256
|
| 16 |
+
for j in range(1,8):
|
| 17 |
+
small_trailing[1<<j::1<<(j+1)] = [j] * (1<<(7-j))
|
| 18 |
+
|
| 19 |
+
def giant_steps(start, target, n=2):
|
| 20 |
+
"""
|
| 21 |
+
Return a list of integers ~=
|
| 22 |
+
|
| 23 |
+
[start, n*start, ..., target/n^2, target/n, target]
|
| 24 |
+
|
| 25 |
+
but conservatively rounded so that the quotient between two
|
| 26 |
+
successive elements is actually slightly less than n.
|
| 27 |
+
|
| 28 |
+
With n = 2, this describes suitable precision steps for a
|
| 29 |
+
quadratically convergent algorithm such as Newton's method;
|
| 30 |
+
with n = 3 steps for cubic convergence (Halley's method), etc.
|
| 31 |
+
|
| 32 |
+
>>> giant_steps(50,1000)
|
| 33 |
+
[66, 128, 253, 502, 1000]
|
| 34 |
+
>>> giant_steps(50,1000,4)
|
| 35 |
+
[65, 252, 1000]
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
L = [target]
|
| 39 |
+
while L[-1] > start*n:
|
| 40 |
+
L = L + [L[-1]//n + 2]
|
| 41 |
+
return L[::-1]
|
| 42 |
+
|
| 43 |
+
def rshift(x, n):
|
| 44 |
+
"""For an integer x, calculate x >> n with the fastest (floor)
|
| 45 |
+
rounding. Unlike the plain Python expression (x >> n), n is
|
| 46 |
+
allowed to be negative, in which case a left shift is performed."""
|
| 47 |
+
if n >= 0: return x >> n
|
| 48 |
+
else: return x << (-n)
|
| 49 |
+
|
| 50 |
+
def lshift(x, n):
|
| 51 |
+
"""For an integer x, calculate x << n. Unlike the plain Python
|
| 52 |
+
expression (x << n), n is allowed to be negative, in which case a
|
| 53 |
+
right shift with default (floor) rounding is performed."""
|
| 54 |
+
if n >= 0: return x << n
|
| 55 |
+
else: return x >> (-n)
|
| 56 |
+
|
| 57 |
+
if BACKEND == 'sage':
|
| 58 |
+
import operator
|
| 59 |
+
rshift = operator.rshift
|
| 60 |
+
lshift = operator.lshift
|
| 61 |
+
|
| 62 |
+
def python_trailing(n):
|
| 63 |
+
"""Count the number of trailing zero bits in abs(n)."""
|
| 64 |
+
if not n:
|
| 65 |
+
return 0
|
| 66 |
+
low_byte = n & 0xff
|
| 67 |
+
if low_byte:
|
| 68 |
+
return small_trailing[low_byte]
|
| 69 |
+
t = 8
|
| 70 |
+
n >>= 8
|
| 71 |
+
while not n & 0xff:
|
| 72 |
+
n >>= 8
|
| 73 |
+
t += 8
|
| 74 |
+
return t + small_trailing[n & 0xff]
|
| 75 |
+
|
| 76 |
+
if BACKEND == 'gmpy':
|
| 77 |
+
if gmpy.version() >= '2':
|
| 78 |
+
def gmpy_trailing(n):
|
| 79 |
+
"""Count the number of trailing zero bits in abs(n) using gmpy."""
|
| 80 |
+
if n: return MPZ(n).bit_scan1()
|
| 81 |
+
else: return 0
|
| 82 |
+
else:
|
| 83 |
+
def gmpy_trailing(n):
|
| 84 |
+
"""Count the number of trailing zero bits in abs(n) using gmpy."""
|
| 85 |
+
if n: return MPZ(n).scan1()
|
| 86 |
+
else: return 0
|
| 87 |
+
|
| 88 |
+
# Small powers of 2
|
| 89 |
+
powers = [1<<_ for _ in range(300)]
|
| 90 |
+
|
| 91 |
+
def python_bitcount(n):
|
| 92 |
+
"""Calculate bit size of the nonnegative integer n."""
|
| 93 |
+
bc = bisect(powers, n)
|
| 94 |
+
if bc != 300:
|
| 95 |
+
return bc
|
| 96 |
+
bc = int(math.log(n, 2)) - 4
|
| 97 |
+
return bc + bctable[n>>bc]
|
| 98 |
+
|
| 99 |
+
def gmpy_bitcount(n):
|
| 100 |
+
"""Calculate bit size of the nonnegative integer n."""
|
| 101 |
+
if n: return MPZ(n).numdigits(2)
|
| 102 |
+
else: return 0
|
| 103 |
+
|
| 104 |
+
#def sage_bitcount(n):
|
| 105 |
+
# if n: return MPZ(n).nbits()
|
| 106 |
+
# else: return 0
|
| 107 |
+
|
| 108 |
+
def sage_trailing(n):
|
| 109 |
+
return MPZ(n).trailing_zero_bits()
|
| 110 |
+
|
| 111 |
+
if BACKEND == 'gmpy':
|
| 112 |
+
bitcount = gmpy_bitcount
|
| 113 |
+
trailing = gmpy_trailing
|
| 114 |
+
elif BACKEND == 'sage':
|
| 115 |
+
sage_bitcount = sage_utils.bitcount
|
| 116 |
+
bitcount = sage_bitcount
|
| 117 |
+
trailing = sage_trailing
|
| 118 |
+
else:
|
| 119 |
+
bitcount = python_bitcount
|
| 120 |
+
trailing = python_trailing
|
| 121 |
+
|
| 122 |
+
if BACKEND == 'gmpy' and 'bit_length' in dir(gmpy):
|
| 123 |
+
bitcount = gmpy.bit_length
|
| 124 |
+
|
| 125 |
+
# Used to avoid slow function calls as far as possible
|
| 126 |
+
trailtable = [trailing(n) for n in range(256)]
|
| 127 |
+
bctable = [bitcount(n) for n in range(1024)]
|
| 128 |
+
|
| 129 |
+
# TODO: speed up for bases 2, 4, 8, 16, ...
|
| 130 |
+
|
| 131 |
+
def bin_to_radix(x, xbits, base, bdigits):
|
| 132 |
+
"""Changes radix of a fixed-point number; i.e., converts
|
| 133 |
+
x * 2**xbits to floor(x * 10**bdigits)."""
|
| 134 |
+
return x * (MPZ(base)**bdigits) >> xbits
|
| 135 |
+
|
| 136 |
+
stddigits = '0123456789abcdefghijklmnopqrstuvwxyz'
|
| 137 |
+
|
| 138 |
+
def small_numeral(n, base=10, digits=stddigits):
|
| 139 |
+
"""Return the string numeral of a positive integer in an arbitrary
|
| 140 |
+
base. Most efficient for small input."""
|
| 141 |
+
if base == 10:
|
| 142 |
+
return str(n)
|
| 143 |
+
digs = []
|
| 144 |
+
while n:
|
| 145 |
+
n, digit = divmod(n, base)
|
| 146 |
+
digs.append(digits[digit])
|
| 147 |
+
return "".join(digs[::-1])
|
| 148 |
+
|
| 149 |
+
def numeral_python(n, base=10, size=0, digits=stddigits):
|
| 150 |
+
"""Represent the integer n as a string of digits in the given base.
|
| 151 |
+
Recursive division is used to make this function about 3x faster
|
| 152 |
+
than Python's str() for converting integers to decimal strings.
|
| 153 |
+
|
| 154 |
+
The 'size' parameters specifies the number of digits in n; this
|
| 155 |
+
number is only used to determine splitting points and need not be
|
| 156 |
+
exact."""
|
| 157 |
+
if n <= 0:
|
| 158 |
+
if not n:
|
| 159 |
+
return "0"
|
| 160 |
+
return "-" + numeral(-n, base, size, digits)
|
| 161 |
+
# Fast enough to do directly
|
| 162 |
+
if size < 250:
|
| 163 |
+
return small_numeral(n, base, digits)
|
| 164 |
+
# Divide in half
|
| 165 |
+
half = (size // 2) + (size & 1)
|
| 166 |
+
A, B = divmod(n, base**half)
|
| 167 |
+
ad = numeral(A, base, half, digits)
|
| 168 |
+
bd = numeral(B, base, half, digits).rjust(half, "0")
|
| 169 |
+
return ad + bd
|
| 170 |
+
|
| 171 |
+
def numeral_gmpy(n, base=10, size=0, digits=stddigits):
|
| 172 |
+
"""Represent the integer n as a string of digits in the given base.
|
| 173 |
+
Recursive division is used to make this function about 3x faster
|
| 174 |
+
than Python's str() for converting integers to decimal strings.
|
| 175 |
+
|
| 176 |
+
The 'size' parameters specifies the number of digits in n; this
|
| 177 |
+
number is only used to determine splitting points and need not be
|
| 178 |
+
exact."""
|
| 179 |
+
if n < 0:
|
| 180 |
+
return "-" + numeral(-n, base, size, digits)
|
| 181 |
+
# gmpy.digits() may cause a segmentation fault when trying to convert
|
| 182 |
+
# extremely large values to a string. The size limit may need to be
|
| 183 |
+
# adjusted on some platforms, but 1500000 works on Windows and Linux.
|
| 184 |
+
if size < 1500000:
|
| 185 |
+
return gmpy.digits(n, base)
|
| 186 |
+
# Divide in half
|
| 187 |
+
half = (size // 2) + (size & 1)
|
| 188 |
+
A, B = divmod(n, MPZ(base)**half)
|
| 189 |
+
ad = numeral(A, base, half, digits)
|
| 190 |
+
bd = numeral(B, base, half, digits).rjust(half, "0")
|
| 191 |
+
return ad + bd
|
| 192 |
+
|
| 193 |
+
if BACKEND == "gmpy":
|
| 194 |
+
numeral = numeral_gmpy
|
| 195 |
+
else:
|
| 196 |
+
numeral = numeral_python
|
| 197 |
+
|
| 198 |
+
_1_800 = 1<<800
|
| 199 |
+
_1_600 = 1<<600
|
| 200 |
+
_1_400 = 1<<400
|
| 201 |
+
_1_200 = 1<<200
|
| 202 |
+
_1_100 = 1<<100
|
| 203 |
+
_1_50 = 1<<50
|
| 204 |
+
|
| 205 |
+
def isqrt_small_python(x):
|
| 206 |
+
"""
|
| 207 |
+
Correctly (floor) rounded integer square root, using
|
| 208 |
+
division. Fast up to ~200 digits.
|
| 209 |
+
"""
|
| 210 |
+
if not x:
|
| 211 |
+
return x
|
| 212 |
+
if x < _1_800:
|
| 213 |
+
# Exact with IEEE double precision arithmetic
|
| 214 |
+
if x < _1_50:
|
| 215 |
+
return int(x**0.5)
|
| 216 |
+
# Initial estimate can be any integer >= the true root; round up
|
| 217 |
+
r = int(x**0.5 * 1.00000000000001) + 1
|
| 218 |
+
else:
|
| 219 |
+
bc = bitcount(x)
|
| 220 |
+
n = bc//2
|
| 221 |
+
r = int((x>>(2*n-100))**0.5+2)<<(n-50) # +2 is to round up
|
| 222 |
+
# The following iteration now precisely computes floor(sqrt(x))
|
| 223 |
+
# See e.g. Crandall & Pomerance, "Prime Numbers: A Computational
|
| 224 |
+
# Perspective"
|
| 225 |
+
while 1:
|
| 226 |
+
y = (r+x//r)>>1
|
| 227 |
+
if y >= r:
|
| 228 |
+
return r
|
| 229 |
+
r = y
|
| 230 |
+
|
| 231 |
+
def isqrt_fast_python(x):
|
| 232 |
+
"""
|
| 233 |
+
Fast approximate integer square root, computed using division-free
|
| 234 |
+
Newton iteration for large x. For random integers the result is almost
|
| 235 |
+
always correct (floor(sqrt(x))), but is 1 ulp too small with a roughly
|
| 236 |
+
0.1% probability. If x is very close to an exact square, the answer is
|
| 237 |
+
1 ulp wrong with high probability.
|
| 238 |
+
|
| 239 |
+
With 0 guard bits, the largest error over a set of 10^5 random
|
| 240 |
+
inputs of size 1-10^5 bits was 3 ulp. The use of 10 guard bits
|
| 241 |
+
almost certainly guarantees a max 1 ulp error.
|
| 242 |
+
"""
|
| 243 |
+
# Use direct division-based iteration if sqrt(x) < 2^400
|
| 244 |
+
# Assume floating-point square root accurate to within 1 ulp, then:
|
| 245 |
+
# 0 Newton iterations good to 52 bits
|
| 246 |
+
# 1 Newton iterations good to 104 bits
|
| 247 |
+
# 2 Newton iterations good to 208 bits
|
| 248 |
+
# 3 Newton iterations good to 416 bits
|
| 249 |
+
if x < _1_800:
|
| 250 |
+
y = int(x**0.5)
|
| 251 |
+
if x >= _1_100:
|
| 252 |
+
y = (y + x//y) >> 1
|
| 253 |
+
if x >= _1_200:
|
| 254 |
+
y = (y + x//y) >> 1
|
| 255 |
+
if x >= _1_400:
|
| 256 |
+
y = (y + x//y) >> 1
|
| 257 |
+
return y
|
| 258 |
+
bc = bitcount(x)
|
| 259 |
+
guard_bits = 10
|
| 260 |
+
x <<= 2*guard_bits
|
| 261 |
+
bc += 2*guard_bits
|
| 262 |
+
bc += (bc&1)
|
| 263 |
+
hbc = bc//2
|
| 264 |
+
startprec = min(50, hbc)
|
| 265 |
+
# Newton iteration for 1/sqrt(x), with floating-point starting value
|
| 266 |
+
r = int(2.0**(2*startprec) * (x >> (bc-2*startprec)) ** -0.5)
|
| 267 |
+
pp = startprec
|
| 268 |
+
for p in giant_steps(startprec, hbc):
|
| 269 |
+
# r**2, scaled from real size 2**(-bc) to 2**p
|
| 270 |
+
r2 = (r*r) >> (2*pp - p)
|
| 271 |
+
# x*r**2, scaled from real size ~1.0 to 2**p
|
| 272 |
+
xr2 = ((x >> (bc-p)) * r2) >> p
|
| 273 |
+
# New value of r, scaled from real size 2**(-bc/2) to 2**p
|
| 274 |
+
r = (r * ((3<<p) - xr2)) >> (pp+1)
|
| 275 |
+
pp = p
|
| 276 |
+
# (1/sqrt(x))*x = sqrt(x)
|
| 277 |
+
return (r*(x>>hbc)) >> (p+guard_bits)
|
| 278 |
+
|
| 279 |
+
def sqrtrem_python(x):
|
| 280 |
+
"""Correctly rounded integer (floor) square root with remainder."""
|
| 281 |
+
# to check cutoff:
|
| 282 |
+
# plot(lambda x: timing(isqrt, 2**int(x)), [0,2000])
|
| 283 |
+
if x < _1_600:
|
| 284 |
+
y = isqrt_small_python(x)
|
| 285 |
+
return y, x - y*y
|
| 286 |
+
y = isqrt_fast_python(x) + 1
|
| 287 |
+
rem = x - y*y
|
| 288 |
+
# Correct remainder
|
| 289 |
+
while rem < 0:
|
| 290 |
+
y -= 1
|
| 291 |
+
rem += (1+2*y)
|
| 292 |
+
else:
|
| 293 |
+
if rem:
|
| 294 |
+
while rem > 2*(1+y):
|
| 295 |
+
y += 1
|
| 296 |
+
rem -= (1+2*y)
|
| 297 |
+
return y, rem
|
| 298 |
+
|
| 299 |
+
def isqrt_python(x):
|
| 300 |
+
"""Integer square root with correct (floor) rounding."""
|
| 301 |
+
return sqrtrem_python(x)[0]
|
| 302 |
+
|
| 303 |
+
def sqrt_fixed(x, prec):
|
| 304 |
+
return isqrt_fast(x<<prec)
|
| 305 |
+
|
| 306 |
+
sqrt_fixed2 = sqrt_fixed
|
| 307 |
+
|
| 308 |
+
if BACKEND == 'gmpy':
|
| 309 |
+
if gmpy.version() >= '2':
|
| 310 |
+
isqrt_small = isqrt_fast = isqrt = gmpy.isqrt
|
| 311 |
+
sqrtrem = gmpy.isqrt_rem
|
| 312 |
+
else:
|
| 313 |
+
isqrt_small = isqrt_fast = isqrt = gmpy.sqrt
|
| 314 |
+
sqrtrem = gmpy.sqrtrem
|
| 315 |
+
elif BACKEND == 'sage':
|
| 316 |
+
isqrt_small = isqrt_fast = isqrt = \
|
| 317 |
+
getattr(sage_utils, "isqrt", lambda n: MPZ(n).isqrt())
|
| 318 |
+
sqrtrem = lambda n: MPZ(n).sqrtrem()
|
| 319 |
+
else:
|
| 320 |
+
isqrt_small = isqrt_small_python
|
| 321 |
+
isqrt_fast = isqrt_fast_python
|
| 322 |
+
isqrt = isqrt_python
|
| 323 |
+
sqrtrem = sqrtrem_python
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def ifib(n, _cache={}):
|
| 327 |
+
"""Computes the nth Fibonacci number as an integer, for
|
| 328 |
+
integer n."""
|
| 329 |
+
if n < 0:
|
| 330 |
+
return (-1)**(-n+1) * ifib(-n)
|
| 331 |
+
if n in _cache:
|
| 332 |
+
return _cache[n]
|
| 333 |
+
m = n
|
| 334 |
+
# Use Dijkstra's logarithmic algorithm
|
| 335 |
+
# The following implementation is basically equivalent to
|
| 336 |
+
# http://en.literateprograms.org/Fibonacci_numbers_(Scheme)
|
| 337 |
+
a, b, p, q = MPZ_ONE, MPZ_ZERO, MPZ_ZERO, MPZ_ONE
|
| 338 |
+
while n:
|
| 339 |
+
if n & 1:
|
| 340 |
+
aq = a*q
|
| 341 |
+
a, b = b*q+aq+a*p, b*p+aq
|
| 342 |
+
n -= 1
|
| 343 |
+
else:
|
| 344 |
+
qq = q*q
|
| 345 |
+
p, q = p*p+qq, qq+2*p*q
|
| 346 |
+
n >>= 1
|
| 347 |
+
if m < 250:
|
| 348 |
+
_cache[m] = b
|
| 349 |
+
return b
|
| 350 |
+
|
| 351 |
+
MAX_FACTORIAL_CACHE = 1000
|
| 352 |
+
|
| 353 |
+
def ifac(n, memo={0:1, 1:1}):
|
| 354 |
+
"""Return n factorial (for integers n >= 0 only)."""
|
| 355 |
+
f = memo.get(n)
|
| 356 |
+
if f:
|
| 357 |
+
return f
|
| 358 |
+
k = len(memo)
|
| 359 |
+
p = memo[k-1]
|
| 360 |
+
MAX = MAX_FACTORIAL_CACHE
|
| 361 |
+
while k <= n:
|
| 362 |
+
p *= k
|
| 363 |
+
if k <= MAX:
|
| 364 |
+
memo[k] = p
|
| 365 |
+
k += 1
|
| 366 |
+
return p
|
| 367 |
+
|
| 368 |
+
def ifac2(n, memo_pair=[{0:1}, {1:1}]):
|
| 369 |
+
"""Return n!! (double factorial), integers n >= 0 only."""
|
| 370 |
+
memo = memo_pair[n&1]
|
| 371 |
+
f = memo.get(n)
|
| 372 |
+
if f:
|
| 373 |
+
return f
|
| 374 |
+
k = max(memo)
|
| 375 |
+
p = memo[k]
|
| 376 |
+
MAX = MAX_FACTORIAL_CACHE
|
| 377 |
+
while k < n:
|
| 378 |
+
k += 2
|
| 379 |
+
p *= k
|
| 380 |
+
if k <= MAX:
|
| 381 |
+
memo[k] = p
|
| 382 |
+
return p
|
| 383 |
+
|
| 384 |
+
if BACKEND == 'gmpy':
|
| 385 |
+
ifac = gmpy.fac
|
| 386 |
+
elif BACKEND == 'sage':
|
| 387 |
+
ifac = lambda n: int(sage.factorial(n))
|
| 388 |
+
ifib = sage.fibonacci
|
| 389 |
+
|
| 390 |
+
def list_primes(n):
|
| 391 |
+
n = n + 1
|
| 392 |
+
sieve = list(xrange(n))
|
| 393 |
+
sieve[:2] = [0, 0]
|
| 394 |
+
for i in xrange(2, int(n**0.5)+1):
|
| 395 |
+
if sieve[i]:
|
| 396 |
+
for j in xrange(i**2, n, i):
|
| 397 |
+
sieve[j] = 0
|
| 398 |
+
return [p for p in sieve if p]
|
| 399 |
+
|
| 400 |
+
if BACKEND == 'sage':
|
| 401 |
+
# Note: it is *VERY* important for performance that we convert
|
| 402 |
+
# the list to Python ints.
|
| 403 |
+
def list_primes(n):
|
| 404 |
+
return [int(_) for _ in sage.primes(n+1)]
|
| 405 |
+
|
| 406 |
+
small_odd_primes = (3,5,7,11,13,17,19,23,29,31,37,41,43,47)
|
| 407 |
+
small_odd_primes_set = set(small_odd_primes)
|
| 408 |
+
|
| 409 |
+
def isprime(n):
|
| 410 |
+
"""
|
| 411 |
+
Determines whether n is a prime number. A probabilistic test is
|
| 412 |
+
performed if n is very large. No special trick is used for detecting
|
| 413 |
+
perfect powers.
|
| 414 |
+
|
| 415 |
+
>>> sum(list_primes(100000))
|
| 416 |
+
454396537
|
| 417 |
+
>>> sum(n*isprime(n) for n in range(100000))
|
| 418 |
+
454396537
|
| 419 |
+
|
| 420 |
+
"""
|
| 421 |
+
n = int(n)
|
| 422 |
+
if not n & 1:
|
| 423 |
+
return n == 2
|
| 424 |
+
if n < 50:
|
| 425 |
+
return n in small_odd_primes_set
|
| 426 |
+
for p in small_odd_primes:
|
| 427 |
+
if not n % p:
|
| 428 |
+
return False
|
| 429 |
+
m = n-1
|
| 430 |
+
s = trailing(m)
|
| 431 |
+
d = m >> s
|
| 432 |
+
def test(a):
|
| 433 |
+
x = pow(a,d,n)
|
| 434 |
+
if x == 1 or x == m:
|
| 435 |
+
return True
|
| 436 |
+
for r in xrange(1,s):
|
| 437 |
+
x = x**2 % n
|
| 438 |
+
if x == m:
|
| 439 |
+
return True
|
| 440 |
+
return False
|
| 441 |
+
# See http://primes.utm.edu/prove/prove2_3.html
|
| 442 |
+
if n < 1373653:
|
| 443 |
+
witnesses = [2,3]
|
| 444 |
+
elif n < 341550071728321:
|
| 445 |
+
witnesses = [2,3,5,7,11,13,17]
|
| 446 |
+
else:
|
| 447 |
+
witnesses = small_odd_primes
|
| 448 |
+
for a in witnesses:
|
| 449 |
+
if not test(a):
|
| 450 |
+
return False
|
| 451 |
+
return True
|
| 452 |
+
|
| 453 |
+
def moebius(n):
|
| 454 |
+
"""
|
| 455 |
+
Evaluates the Moebius function which is `mu(n) = (-1)^k` if `n`
|
| 456 |
+
is a product of `k` distinct primes and `mu(n) = 0` otherwise.
|
| 457 |
+
|
| 458 |
+
TODO: speed up using factorization
|
| 459 |
+
"""
|
| 460 |
+
n = abs(int(n))
|
| 461 |
+
if n < 2:
|
| 462 |
+
return n
|
| 463 |
+
factors = []
|
| 464 |
+
for p in xrange(2, n+1):
|
| 465 |
+
if not (n % p):
|
| 466 |
+
if not (n % p**2):
|
| 467 |
+
return 0
|
| 468 |
+
if not sum(p % f for f in factors):
|
| 469 |
+
factors.append(p)
|
| 470 |
+
return (-1)**len(factors)
|
| 471 |
+
|
| 472 |
+
def gcd(*args):
|
| 473 |
+
a = 0
|
| 474 |
+
for b in args:
|
| 475 |
+
if a:
|
| 476 |
+
while b:
|
| 477 |
+
a, b = b, a % b
|
| 478 |
+
else:
|
| 479 |
+
a = b
|
| 480 |
+
return a
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
# Comment by Juan Arias de Reyna:
|
| 484 |
+
#
|
| 485 |
+
# I learn this method to compute EulerE[2n] from van de Lune.
|
| 486 |
+
#
|
| 487 |
+
# We apply the formula EulerE[2n] = (-1)^n 2**(-2n) sum_{j=0}^n a(2n,2j+1)
|
| 488 |
+
#
|
| 489 |
+
# where the numbers a(n,j) vanish for j > n+1 or j <= -1 and satisfies
|
| 490 |
+
#
|
| 491 |
+
# a(0,-1) = a(0,0) = 0; a(0,1)= 1; a(0,2) = a(0,3) = 0
|
| 492 |
+
#
|
| 493 |
+
# a(n,j) = a(n-1,j) when n+j is even
|
| 494 |
+
# a(n,j) = (j-1) a(n-1,j-1) + (j+1) a(n-1,j+1) when n+j is odd
|
| 495 |
+
#
|
| 496 |
+
#
|
| 497 |
+
# But we can use only one array unidimensional a(j) since to compute
|
| 498 |
+
# a(n,j) we only need to know a(n-1,k) where k and j are of different parity
|
| 499 |
+
# and we have not to conserve the used values.
|
| 500 |
+
#
|
| 501 |
+
# We cached up the values of Euler numbers to sufficiently high order.
|
| 502 |
+
#
|
| 503 |
+
# Important Observation: If we pretend to use the numbers
|
| 504 |
+
# EulerE[1], EulerE[2], ... , EulerE[n]
|
| 505 |
+
# it is convenient to compute first EulerE[n], since the algorithm
|
| 506 |
+
# computes first all
|
| 507 |
+
# the previous ones, and keeps them in the CACHE
|
| 508 |
+
|
| 509 |
+
MAX_EULER_CACHE = 500
|
| 510 |
+
|
| 511 |
+
def eulernum(m, _cache={0:MPZ_ONE}):
|
| 512 |
+
r"""
|
| 513 |
+
Computes the Euler numbers `E(n)`, which can be defined as
|
| 514 |
+
coefficients of the Taylor expansion of `1/cosh x`:
|
| 515 |
+
|
| 516 |
+
.. math ::
|
| 517 |
+
|
| 518 |
+
\frac{1}{\cosh x} = \sum_{n=0}^\infty \frac{E_n}{n!} x^n
|
| 519 |
+
|
| 520 |
+
Example::
|
| 521 |
+
|
| 522 |
+
>>> [int(eulernum(n)) for n in range(11)]
|
| 523 |
+
[1, 0, -1, 0, 5, 0, -61, 0, 1385, 0, -50521]
|
| 524 |
+
>>> [int(eulernum(n)) for n in range(11)] # test cache
|
| 525 |
+
[1, 0, -1, 0, 5, 0, -61, 0, 1385, 0, -50521]
|
| 526 |
+
|
| 527 |
+
"""
|
| 528 |
+
# for odd m > 1, the Euler numbers are zero
|
| 529 |
+
if m & 1:
|
| 530 |
+
return MPZ_ZERO
|
| 531 |
+
f = _cache.get(m)
|
| 532 |
+
if f:
|
| 533 |
+
return f
|
| 534 |
+
MAX = MAX_EULER_CACHE
|
| 535 |
+
n = m
|
| 536 |
+
a = [MPZ(_) for _ in [0,0,1,0,0,0]]
|
| 537 |
+
for n in range(1, m+1):
|
| 538 |
+
for j in range(n+1, -1, -2):
|
| 539 |
+
a[j+1] = (j-1)*a[j] + (j+1)*a[j+2]
|
| 540 |
+
a.append(0)
|
| 541 |
+
suma = 0
|
| 542 |
+
for k in range(n+1, -1, -2):
|
| 543 |
+
suma += a[k+1]
|
| 544 |
+
if n <= MAX:
|
| 545 |
+
_cache[n] = ((-1)**(n//2))*(suma // 2**n)
|
| 546 |
+
if n == m:
|
| 547 |
+
return ((-1)**(n//2))*suma // 2**n
|
| 548 |
+
|
| 549 |
+
def stirling1(n, k):
|
| 550 |
+
"""
|
| 551 |
+
Stirling number of the first kind.
|
| 552 |
+
"""
|
| 553 |
+
if n < 0 or k < 0:
|
| 554 |
+
raise ValueError
|
| 555 |
+
if k >= n:
|
| 556 |
+
return MPZ(n == k)
|
| 557 |
+
if k < 1:
|
| 558 |
+
return MPZ_ZERO
|
| 559 |
+
L = [MPZ_ZERO] * (k+1)
|
| 560 |
+
L[1] = MPZ_ONE
|
| 561 |
+
for m in xrange(2, n+1):
|
| 562 |
+
for j in xrange(min(k, m), 0, -1):
|
| 563 |
+
L[j] = (m-1) * L[j] + L[j-1]
|
| 564 |
+
return (-1)**(n+k) * L[k]
|
| 565 |
+
|
| 566 |
+
def stirling2(n, k):
|
| 567 |
+
"""
|
| 568 |
+
Stirling number of the second kind.
|
| 569 |
+
"""
|
| 570 |
+
if n < 0 or k < 0:
|
| 571 |
+
raise ValueError
|
| 572 |
+
if k >= n:
|
| 573 |
+
return MPZ(n == k)
|
| 574 |
+
if k <= 1:
|
| 575 |
+
return MPZ(k == 1)
|
| 576 |
+
s = MPZ_ZERO
|
| 577 |
+
t = MPZ_ONE
|
| 578 |
+
for j in xrange(k+1):
|
| 579 |
+
if (k + j) & 1:
|
| 580 |
+
s -= t * MPZ(j)**n
|
| 581 |
+
else:
|
| 582 |
+
s += t * MPZ(j)**n
|
| 583 |
+
t = t * (k - j) // (j + 1)
|
| 584 |
+
return s // ifac(k)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libmpc.py
ADDED
|
@@ -0,0 +1,835 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Low-level functions for complex arithmetic.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
from .backend import MPZ, MPZ_ZERO, MPZ_ONE, MPZ_TWO, BACKEND
|
| 8 |
+
|
| 9 |
+
from .libmpf import (\
|
| 10 |
+
round_floor, round_ceiling, round_down, round_up,
|
| 11 |
+
round_nearest, round_fast, bitcount,
|
| 12 |
+
bctable, normalize, normalize1, reciprocal_rnd, rshift, lshift, giant_steps,
|
| 13 |
+
negative_rnd,
|
| 14 |
+
to_str, to_fixed, from_man_exp, from_float, to_float, from_int, to_int,
|
| 15 |
+
fzero, fone, ftwo, fhalf, finf, fninf, fnan, fnone,
|
| 16 |
+
mpf_abs, mpf_pos, mpf_neg, mpf_add, mpf_sub, mpf_mul,
|
| 17 |
+
mpf_div, mpf_mul_int, mpf_shift, mpf_sqrt, mpf_hypot,
|
| 18 |
+
mpf_rdiv_int, mpf_floor, mpf_ceil, mpf_nint, mpf_frac,
|
| 19 |
+
mpf_sign, mpf_hash,
|
| 20 |
+
ComplexResult
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
from .libelefun import (\
|
| 24 |
+
mpf_pi, mpf_exp, mpf_log, mpf_cos_sin, mpf_cosh_sinh, mpf_tan, mpf_pow_int,
|
| 25 |
+
mpf_log_hypot,
|
| 26 |
+
mpf_cos_sin_pi, mpf_phi,
|
| 27 |
+
mpf_cos, mpf_sin, mpf_cos_pi, mpf_sin_pi,
|
| 28 |
+
mpf_atan, mpf_atan2, mpf_cosh, mpf_sinh, mpf_tanh,
|
| 29 |
+
mpf_asin, mpf_acos, mpf_acosh, mpf_nthroot, mpf_fibonacci
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# An mpc value is a (real, imag) tuple
|
| 33 |
+
mpc_one = fone, fzero
|
| 34 |
+
mpc_zero = fzero, fzero
|
| 35 |
+
mpc_two = ftwo, fzero
|
| 36 |
+
mpc_half = (fhalf, fzero)
|
| 37 |
+
|
| 38 |
+
_infs = (finf, fninf)
|
| 39 |
+
_infs_nan = (finf, fninf, fnan)
|
| 40 |
+
|
| 41 |
+
def mpc_is_inf(z):
|
| 42 |
+
"""Check if either real or imaginary part is infinite"""
|
| 43 |
+
re, im = z
|
| 44 |
+
if re in _infs: return True
|
| 45 |
+
if im in _infs: return True
|
| 46 |
+
return False
|
| 47 |
+
|
| 48 |
+
def mpc_is_infnan(z):
|
| 49 |
+
"""Check if either real or imaginary part is infinite or nan"""
|
| 50 |
+
re, im = z
|
| 51 |
+
if re in _infs_nan: return True
|
| 52 |
+
if im in _infs_nan: return True
|
| 53 |
+
return False
|
| 54 |
+
|
| 55 |
+
def mpc_to_str(z, dps, **kwargs):
|
| 56 |
+
re, im = z
|
| 57 |
+
rs = to_str(re, dps)
|
| 58 |
+
if im[0]:
|
| 59 |
+
return rs + " - " + to_str(mpf_neg(im), dps, **kwargs) + "j"
|
| 60 |
+
else:
|
| 61 |
+
return rs + " + " + to_str(im, dps, **kwargs) + "j"
|
| 62 |
+
|
| 63 |
+
def mpc_to_complex(z, strict=False, rnd=round_fast):
|
| 64 |
+
re, im = z
|
| 65 |
+
return complex(to_float(re, strict, rnd), to_float(im, strict, rnd))
|
| 66 |
+
|
| 67 |
+
def mpc_hash(z):
|
| 68 |
+
if sys.version_info >= (3, 2):
|
| 69 |
+
re, im = z
|
| 70 |
+
h = mpf_hash(re) + sys.hash_info.imag * mpf_hash(im)
|
| 71 |
+
# Need to reduce either module 2^32 or 2^64
|
| 72 |
+
h = h % (2**sys.hash_info.width)
|
| 73 |
+
return int(h)
|
| 74 |
+
else:
|
| 75 |
+
try:
|
| 76 |
+
return hash(mpc_to_complex(z, strict=True))
|
| 77 |
+
except OverflowError:
|
| 78 |
+
return hash(z)
|
| 79 |
+
|
| 80 |
+
def mpc_conjugate(z, prec, rnd=round_fast):
|
| 81 |
+
re, im = z
|
| 82 |
+
return re, mpf_neg(im, prec, rnd)
|
| 83 |
+
|
| 84 |
+
def mpc_is_nonzero(z):
|
| 85 |
+
return z != mpc_zero
|
| 86 |
+
|
| 87 |
+
def mpc_add(z, w, prec, rnd=round_fast):
|
| 88 |
+
a, b = z
|
| 89 |
+
c, d = w
|
| 90 |
+
return mpf_add(a, c, prec, rnd), mpf_add(b, d, prec, rnd)
|
| 91 |
+
|
| 92 |
+
def mpc_add_mpf(z, x, prec, rnd=round_fast):
|
| 93 |
+
a, b = z
|
| 94 |
+
return mpf_add(a, x, prec, rnd), b
|
| 95 |
+
|
| 96 |
+
def mpc_sub(z, w, prec=0, rnd=round_fast):
|
| 97 |
+
a, b = z
|
| 98 |
+
c, d = w
|
| 99 |
+
return mpf_sub(a, c, prec, rnd), mpf_sub(b, d, prec, rnd)
|
| 100 |
+
|
| 101 |
+
def mpc_sub_mpf(z, p, prec=0, rnd=round_fast):
|
| 102 |
+
a, b = z
|
| 103 |
+
return mpf_sub(a, p, prec, rnd), b
|
| 104 |
+
|
| 105 |
+
def mpc_pos(z, prec, rnd=round_fast):
|
| 106 |
+
a, b = z
|
| 107 |
+
return mpf_pos(a, prec, rnd), mpf_pos(b, prec, rnd)
|
| 108 |
+
|
| 109 |
+
def mpc_neg(z, prec=None, rnd=round_fast):
|
| 110 |
+
a, b = z
|
| 111 |
+
return mpf_neg(a, prec, rnd), mpf_neg(b, prec, rnd)
|
| 112 |
+
|
| 113 |
+
def mpc_shift(z, n):
|
| 114 |
+
a, b = z
|
| 115 |
+
return mpf_shift(a, n), mpf_shift(b, n)
|
| 116 |
+
|
| 117 |
+
def mpc_abs(z, prec, rnd=round_fast):
|
| 118 |
+
"""Absolute value of a complex number, |a+bi|.
|
| 119 |
+
Returns an mpf value."""
|
| 120 |
+
a, b = z
|
| 121 |
+
return mpf_hypot(a, b, prec, rnd)
|
| 122 |
+
|
| 123 |
+
def mpc_arg(z, prec, rnd=round_fast):
|
| 124 |
+
"""Argument of a complex number. Returns an mpf value."""
|
| 125 |
+
a, b = z
|
| 126 |
+
return mpf_atan2(b, a, prec, rnd)
|
| 127 |
+
|
| 128 |
+
def mpc_floor(z, prec, rnd=round_fast):
|
| 129 |
+
a, b = z
|
| 130 |
+
return mpf_floor(a, prec, rnd), mpf_floor(b, prec, rnd)
|
| 131 |
+
|
| 132 |
+
def mpc_ceil(z, prec, rnd=round_fast):
|
| 133 |
+
a, b = z
|
| 134 |
+
return mpf_ceil(a, prec, rnd), mpf_ceil(b, prec, rnd)
|
| 135 |
+
|
| 136 |
+
def mpc_nint(z, prec, rnd=round_fast):
|
| 137 |
+
a, b = z
|
| 138 |
+
return mpf_nint(a, prec, rnd), mpf_nint(b, prec, rnd)
|
| 139 |
+
|
| 140 |
+
def mpc_frac(z, prec, rnd=round_fast):
|
| 141 |
+
a, b = z
|
| 142 |
+
return mpf_frac(a, prec, rnd), mpf_frac(b, prec, rnd)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def mpc_mul(z, w, prec, rnd=round_fast):
|
| 146 |
+
"""
|
| 147 |
+
Complex multiplication.
|
| 148 |
+
|
| 149 |
+
Returns the real and imaginary part of (a+bi)*(c+di), rounded to
|
| 150 |
+
the specified precision. The rounding mode applies to the real and
|
| 151 |
+
imaginary parts separately.
|
| 152 |
+
"""
|
| 153 |
+
a, b = z
|
| 154 |
+
c, d = w
|
| 155 |
+
p = mpf_mul(a, c)
|
| 156 |
+
q = mpf_mul(b, d)
|
| 157 |
+
r = mpf_mul(a, d)
|
| 158 |
+
s = mpf_mul(b, c)
|
| 159 |
+
re = mpf_sub(p, q, prec, rnd)
|
| 160 |
+
im = mpf_add(r, s, prec, rnd)
|
| 161 |
+
return re, im
|
| 162 |
+
|
| 163 |
+
def mpc_square(z, prec, rnd=round_fast):
|
| 164 |
+
# (a+b*I)**2 == a**2 - b**2 + 2*I*a*b
|
| 165 |
+
a, b = z
|
| 166 |
+
p = mpf_mul(a,a)
|
| 167 |
+
q = mpf_mul(b,b)
|
| 168 |
+
r = mpf_mul(a,b, prec, rnd)
|
| 169 |
+
re = mpf_sub(p, q, prec, rnd)
|
| 170 |
+
im = mpf_shift(r, 1)
|
| 171 |
+
return re, im
|
| 172 |
+
|
| 173 |
+
def mpc_mul_mpf(z, p, prec, rnd=round_fast):
|
| 174 |
+
a, b = z
|
| 175 |
+
re = mpf_mul(a, p, prec, rnd)
|
| 176 |
+
im = mpf_mul(b, p, prec, rnd)
|
| 177 |
+
return re, im
|
| 178 |
+
|
| 179 |
+
def mpc_mul_imag_mpf(z, x, prec, rnd=round_fast):
|
| 180 |
+
"""
|
| 181 |
+
Multiply the mpc value z by I*x where x is an mpf value.
|
| 182 |
+
"""
|
| 183 |
+
a, b = z
|
| 184 |
+
re = mpf_neg(mpf_mul(b, x, prec, rnd))
|
| 185 |
+
im = mpf_mul(a, x, prec, rnd)
|
| 186 |
+
return re, im
|
| 187 |
+
|
| 188 |
+
def mpc_mul_int(z, n, prec, rnd=round_fast):
|
| 189 |
+
a, b = z
|
| 190 |
+
re = mpf_mul_int(a, n, prec, rnd)
|
| 191 |
+
im = mpf_mul_int(b, n, prec, rnd)
|
| 192 |
+
return re, im
|
| 193 |
+
|
| 194 |
+
def mpc_div(z, w, prec, rnd=round_fast):
|
| 195 |
+
a, b = z
|
| 196 |
+
c, d = w
|
| 197 |
+
wp = prec + 10
|
| 198 |
+
# mag = c*c + d*d
|
| 199 |
+
mag = mpf_add(mpf_mul(c, c), mpf_mul(d, d), wp)
|
| 200 |
+
# (a*c+b*d)/mag, (b*c-a*d)/mag
|
| 201 |
+
t = mpf_add(mpf_mul(a,c), mpf_mul(b,d), wp)
|
| 202 |
+
u = mpf_sub(mpf_mul(b,c), mpf_mul(a,d), wp)
|
| 203 |
+
return mpf_div(t,mag,prec,rnd), mpf_div(u,mag,prec,rnd)
|
| 204 |
+
|
| 205 |
+
def mpc_div_mpf(z, p, prec, rnd=round_fast):
|
| 206 |
+
"""Calculate z/p where p is real"""
|
| 207 |
+
a, b = z
|
| 208 |
+
re = mpf_div(a, p, prec, rnd)
|
| 209 |
+
im = mpf_div(b, p, prec, rnd)
|
| 210 |
+
return re, im
|
| 211 |
+
|
| 212 |
+
def mpc_reciprocal(z, prec, rnd=round_fast):
|
| 213 |
+
"""Calculate 1/z efficiently"""
|
| 214 |
+
a, b = z
|
| 215 |
+
m = mpf_add(mpf_mul(a,a),mpf_mul(b,b),prec+10)
|
| 216 |
+
re = mpf_div(a, m, prec, rnd)
|
| 217 |
+
im = mpf_neg(mpf_div(b, m, prec, rnd))
|
| 218 |
+
return re, im
|
| 219 |
+
|
| 220 |
+
def mpc_mpf_div(p, z, prec, rnd=round_fast):
|
| 221 |
+
"""Calculate p/z where p is real efficiently"""
|
| 222 |
+
a, b = z
|
| 223 |
+
m = mpf_add(mpf_mul(a,a),mpf_mul(b,b), prec+10)
|
| 224 |
+
re = mpf_div(mpf_mul(a,p), m, prec, rnd)
|
| 225 |
+
im = mpf_div(mpf_neg(mpf_mul(b,p)), m, prec, rnd)
|
| 226 |
+
return re, im
|
| 227 |
+
|
| 228 |
+
def complex_int_pow(a, b, n):
|
| 229 |
+
"""Complex integer power: computes (a+b*I)**n exactly for
|
| 230 |
+
nonnegative n (a and b must be Python ints)."""
|
| 231 |
+
wre = 1
|
| 232 |
+
wim = 0
|
| 233 |
+
while n:
|
| 234 |
+
if n & 1:
|
| 235 |
+
wre, wim = wre*a - wim*b, wim*a + wre*b
|
| 236 |
+
n -= 1
|
| 237 |
+
a, b = a*a - b*b, 2*a*b
|
| 238 |
+
n //= 2
|
| 239 |
+
return wre, wim
|
| 240 |
+
|
| 241 |
+
def mpc_pow(z, w, prec, rnd=round_fast):
|
| 242 |
+
if w[1] == fzero:
|
| 243 |
+
return mpc_pow_mpf(z, w[0], prec, rnd)
|
| 244 |
+
return mpc_exp(mpc_mul(mpc_log(z, prec+10), w, prec+10), prec, rnd)
|
| 245 |
+
|
| 246 |
+
def mpc_pow_mpf(z, p, prec, rnd=round_fast):
|
| 247 |
+
psign, pman, pexp, pbc = p
|
| 248 |
+
if pexp >= 0:
|
| 249 |
+
return mpc_pow_int(z, (-1)**psign * (pman<<pexp), prec, rnd)
|
| 250 |
+
if pexp == -1:
|
| 251 |
+
sqrtz = mpc_sqrt(z, prec+10)
|
| 252 |
+
return mpc_pow_int(sqrtz, (-1)**psign * pman, prec, rnd)
|
| 253 |
+
return mpc_exp(mpc_mul_mpf(mpc_log(z, prec+10), p, prec+10), prec, rnd)
|
| 254 |
+
|
| 255 |
+
def mpc_pow_int(z, n, prec, rnd=round_fast):
|
| 256 |
+
a, b = z
|
| 257 |
+
if b == fzero:
|
| 258 |
+
return mpf_pow_int(a, n, prec, rnd), fzero
|
| 259 |
+
if a == fzero:
|
| 260 |
+
v = mpf_pow_int(b, n, prec, rnd)
|
| 261 |
+
n %= 4
|
| 262 |
+
if n == 0:
|
| 263 |
+
return v, fzero
|
| 264 |
+
elif n == 1:
|
| 265 |
+
return fzero, v
|
| 266 |
+
elif n == 2:
|
| 267 |
+
return mpf_neg(v), fzero
|
| 268 |
+
elif n == 3:
|
| 269 |
+
return fzero, mpf_neg(v)
|
| 270 |
+
if n == 0: return mpc_one
|
| 271 |
+
if n == 1: return mpc_pos(z, prec, rnd)
|
| 272 |
+
if n == 2: return mpc_square(z, prec, rnd)
|
| 273 |
+
if n == -1: return mpc_reciprocal(z, prec, rnd)
|
| 274 |
+
if n < 0: return mpc_reciprocal(mpc_pow_int(z, -n, prec+4), prec, rnd)
|
| 275 |
+
asign, aman, aexp, abc = a
|
| 276 |
+
bsign, bman, bexp, bbc = b
|
| 277 |
+
if asign: aman = -aman
|
| 278 |
+
if bsign: bman = -bman
|
| 279 |
+
de = aexp - bexp
|
| 280 |
+
abs_de = abs(de)
|
| 281 |
+
exact_size = n*(abs_de + max(abc, bbc))
|
| 282 |
+
if exact_size < 10000:
|
| 283 |
+
if de > 0:
|
| 284 |
+
aman <<= de
|
| 285 |
+
aexp = bexp
|
| 286 |
+
else:
|
| 287 |
+
bman <<= (-de)
|
| 288 |
+
bexp = aexp
|
| 289 |
+
re, im = complex_int_pow(aman, bman, n)
|
| 290 |
+
re = from_man_exp(re, int(n*aexp), prec, rnd)
|
| 291 |
+
im = from_man_exp(im, int(n*bexp), prec, rnd)
|
| 292 |
+
return re, im
|
| 293 |
+
return mpc_exp(mpc_mul_int(mpc_log(z, prec+10), n, prec+10), prec, rnd)
|
| 294 |
+
|
| 295 |
+
def mpc_sqrt(z, prec, rnd=round_fast):
|
| 296 |
+
"""Complex square root (principal branch).
|
| 297 |
+
|
| 298 |
+
We have sqrt(a+bi) = sqrt((r+a)/2) + b/sqrt(2*(r+a))*i where
|
| 299 |
+
r = abs(a+bi), when a+bi is not a negative real number."""
|
| 300 |
+
a, b = z
|
| 301 |
+
if b == fzero:
|
| 302 |
+
if a == fzero:
|
| 303 |
+
return (a, b)
|
| 304 |
+
# When a+bi is a negative real number, we get a real sqrt times i
|
| 305 |
+
if a[0]:
|
| 306 |
+
im = mpf_sqrt(mpf_neg(a), prec, rnd)
|
| 307 |
+
return (fzero, im)
|
| 308 |
+
else:
|
| 309 |
+
re = mpf_sqrt(a, prec, rnd)
|
| 310 |
+
return (re, fzero)
|
| 311 |
+
wp = prec+20
|
| 312 |
+
if not a[0]: # case a positive
|
| 313 |
+
t = mpf_add(mpc_abs((a, b), wp), a, wp) # t = abs(a+bi) + a
|
| 314 |
+
u = mpf_shift(t, -1) # u = t/2
|
| 315 |
+
re = mpf_sqrt(u, prec, rnd) # re = sqrt(u)
|
| 316 |
+
v = mpf_shift(t, 1) # v = 2*t
|
| 317 |
+
w = mpf_sqrt(v, wp) # w = sqrt(v)
|
| 318 |
+
im = mpf_div(b, w, prec, rnd) # im = b / w
|
| 319 |
+
else: # case a negative
|
| 320 |
+
t = mpf_sub(mpc_abs((a, b), wp), a, wp) # t = abs(a+bi) - a
|
| 321 |
+
u = mpf_shift(t, -1) # u = t/2
|
| 322 |
+
im = mpf_sqrt(u, prec, rnd) # im = sqrt(u)
|
| 323 |
+
v = mpf_shift(t, 1) # v = 2*t
|
| 324 |
+
w = mpf_sqrt(v, wp) # w = sqrt(v)
|
| 325 |
+
re = mpf_div(b, w, prec, rnd) # re = b/w
|
| 326 |
+
if b[0]:
|
| 327 |
+
re = mpf_neg(re)
|
| 328 |
+
im = mpf_neg(im)
|
| 329 |
+
return re, im
|
| 330 |
+
|
| 331 |
+
def mpc_nthroot_fixed(a, b, n, prec):
|
| 332 |
+
# a, b signed integers at fixed precision prec
|
| 333 |
+
start = 50
|
| 334 |
+
a1 = int(rshift(a, prec - n*start))
|
| 335 |
+
b1 = int(rshift(b, prec - n*start))
|
| 336 |
+
try:
|
| 337 |
+
r = (a1 + 1j * b1)**(1.0/n)
|
| 338 |
+
re = r.real
|
| 339 |
+
im = r.imag
|
| 340 |
+
re = MPZ(int(re))
|
| 341 |
+
im = MPZ(int(im))
|
| 342 |
+
except OverflowError:
|
| 343 |
+
a1 = from_int(a1, start)
|
| 344 |
+
b1 = from_int(b1, start)
|
| 345 |
+
fn = from_int(n)
|
| 346 |
+
nth = mpf_rdiv_int(1, fn, start)
|
| 347 |
+
re, im = mpc_pow((a1, b1), (nth, fzero), start)
|
| 348 |
+
re = to_int(re)
|
| 349 |
+
im = to_int(im)
|
| 350 |
+
extra = 10
|
| 351 |
+
prevp = start
|
| 352 |
+
extra1 = n
|
| 353 |
+
for p in giant_steps(start, prec+extra):
|
| 354 |
+
# this is slow for large n, unlike int_pow_fixed
|
| 355 |
+
re2, im2 = complex_int_pow(re, im, n-1)
|
| 356 |
+
re2 = rshift(re2, (n-1)*prevp - p - extra1)
|
| 357 |
+
im2 = rshift(im2, (n-1)*prevp - p - extra1)
|
| 358 |
+
r4 = (re2*re2 + im2*im2) >> (p + extra1)
|
| 359 |
+
ap = rshift(a, prec - p)
|
| 360 |
+
bp = rshift(b, prec - p)
|
| 361 |
+
rec = (ap * re2 + bp * im2) >> p
|
| 362 |
+
imc = (-ap * im2 + bp * re2) >> p
|
| 363 |
+
reb = (rec << p) // r4
|
| 364 |
+
imb = (imc << p) // r4
|
| 365 |
+
re = (reb + (n-1)*lshift(re, p-prevp))//n
|
| 366 |
+
im = (imb + (n-1)*lshift(im, p-prevp))//n
|
| 367 |
+
prevp = p
|
| 368 |
+
return re, im
|
| 369 |
+
|
| 370 |
+
def mpc_nthroot(z, n, prec, rnd=round_fast):
|
| 371 |
+
"""
|
| 372 |
+
Complex n-th root.
|
| 373 |
+
|
| 374 |
+
Use Newton method as in the real case when it is faster,
|
| 375 |
+
otherwise use z**(1/n)
|
| 376 |
+
"""
|
| 377 |
+
a, b = z
|
| 378 |
+
if a[0] == 0 and b == fzero:
|
| 379 |
+
re = mpf_nthroot(a, n, prec, rnd)
|
| 380 |
+
return (re, fzero)
|
| 381 |
+
if n < 2:
|
| 382 |
+
if n == 0:
|
| 383 |
+
return mpc_one
|
| 384 |
+
if n == 1:
|
| 385 |
+
return mpc_pos((a, b), prec, rnd)
|
| 386 |
+
if n == -1:
|
| 387 |
+
return mpc_div(mpc_one, (a, b), prec, rnd)
|
| 388 |
+
inverse = mpc_nthroot((a, b), -n, prec+5, reciprocal_rnd[rnd])
|
| 389 |
+
return mpc_div(mpc_one, inverse, prec, rnd)
|
| 390 |
+
if n <= 20:
|
| 391 |
+
prec2 = int(1.2 * (prec + 10))
|
| 392 |
+
asign, aman, aexp, abc = a
|
| 393 |
+
bsign, bman, bexp, bbc = b
|
| 394 |
+
pf = mpc_abs((a,b), prec)
|
| 395 |
+
if pf[-2] + pf[-1] > -10 and pf[-2] + pf[-1] < prec:
|
| 396 |
+
af = to_fixed(a, prec2)
|
| 397 |
+
bf = to_fixed(b, prec2)
|
| 398 |
+
re, im = mpc_nthroot_fixed(af, bf, n, prec2)
|
| 399 |
+
extra = 10
|
| 400 |
+
re = from_man_exp(re, -prec2-extra, prec2, rnd)
|
| 401 |
+
im = from_man_exp(im, -prec2-extra, prec2, rnd)
|
| 402 |
+
return re, im
|
| 403 |
+
fn = from_int(n)
|
| 404 |
+
prec2 = prec+10 + 10
|
| 405 |
+
nth = mpf_rdiv_int(1, fn, prec2)
|
| 406 |
+
re, im = mpc_pow((a, b), (nth, fzero), prec2, rnd)
|
| 407 |
+
re = normalize(re[0], re[1], re[2], re[3], prec, rnd)
|
| 408 |
+
im = normalize(im[0], im[1], im[2], im[3], prec, rnd)
|
| 409 |
+
return re, im
|
| 410 |
+
|
| 411 |
+
def mpc_cbrt(z, prec, rnd=round_fast):
|
| 412 |
+
"""
|
| 413 |
+
Complex cubic root.
|
| 414 |
+
"""
|
| 415 |
+
return mpc_nthroot(z, 3, prec, rnd)
|
| 416 |
+
|
| 417 |
+
def mpc_exp(z, prec, rnd=round_fast):
|
| 418 |
+
"""
|
| 419 |
+
Complex exponential function.
|
| 420 |
+
|
| 421 |
+
We use the direct formula exp(a+bi) = exp(a) * (cos(b) + sin(b)*i)
|
| 422 |
+
for the computation. This formula is very nice because it is
|
| 423 |
+
pefectly stable; since we just do real multiplications, the only
|
| 424 |
+
numerical errors that can creep in are single-ulp rounding errors.
|
| 425 |
+
|
| 426 |
+
The formula is efficient since mpmath's real exp is quite fast and
|
| 427 |
+
since we can compute cos and sin simultaneously.
|
| 428 |
+
|
| 429 |
+
It is no problem if a and b are large; if the implementations of
|
| 430 |
+
exp/cos/sin are accurate and efficient for all real numbers, then
|
| 431 |
+
so is this function for all complex numbers.
|
| 432 |
+
"""
|
| 433 |
+
a, b = z
|
| 434 |
+
if a == fzero:
|
| 435 |
+
return mpf_cos_sin(b, prec, rnd)
|
| 436 |
+
if b == fzero:
|
| 437 |
+
return mpf_exp(a, prec, rnd), fzero
|
| 438 |
+
mag = mpf_exp(a, prec+4, rnd)
|
| 439 |
+
c, s = mpf_cos_sin(b, prec+4, rnd)
|
| 440 |
+
re = mpf_mul(mag, c, prec, rnd)
|
| 441 |
+
im = mpf_mul(mag, s, prec, rnd)
|
| 442 |
+
return re, im
|
| 443 |
+
|
| 444 |
+
def mpc_log(z, prec, rnd=round_fast):
|
| 445 |
+
re = mpf_log_hypot(z[0], z[1], prec, rnd)
|
| 446 |
+
im = mpc_arg(z, prec, rnd)
|
| 447 |
+
return re, im
|
| 448 |
+
|
| 449 |
+
def mpc_cos(z, prec, rnd=round_fast):
|
| 450 |
+
"""Complex cosine. The formula used is cos(a+bi) = cos(a)*cosh(b) -
|
| 451 |
+
sin(a)*sinh(b)*i.
|
| 452 |
+
|
| 453 |
+
The same comments apply as for the complex exp: only real
|
| 454 |
+
multiplications are pewrormed, so no cancellation errors are
|
| 455 |
+
possible. The formula is also efficient since we can compute both
|
| 456 |
+
pairs (cos, sin) and (cosh, sinh) in single stwps."""
|
| 457 |
+
a, b = z
|
| 458 |
+
if b == fzero:
|
| 459 |
+
return mpf_cos(a, prec, rnd), fzero
|
| 460 |
+
if a == fzero:
|
| 461 |
+
return mpf_cosh(b, prec, rnd), fzero
|
| 462 |
+
wp = prec + 6
|
| 463 |
+
c, s = mpf_cos_sin(a, wp)
|
| 464 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 465 |
+
re = mpf_mul(c, ch, prec, rnd)
|
| 466 |
+
im = mpf_mul(s, sh, prec, rnd)
|
| 467 |
+
return re, mpf_neg(im)
|
| 468 |
+
|
| 469 |
+
def mpc_sin(z, prec, rnd=round_fast):
|
| 470 |
+
"""Complex sine. We have sin(a+bi) = sin(a)*cosh(b) +
|
| 471 |
+
cos(a)*sinh(b)*i. See the docstring for mpc_cos for additional
|
| 472 |
+
comments."""
|
| 473 |
+
a, b = z
|
| 474 |
+
if b == fzero:
|
| 475 |
+
return mpf_sin(a, prec, rnd), fzero
|
| 476 |
+
if a == fzero:
|
| 477 |
+
return fzero, mpf_sinh(b, prec, rnd)
|
| 478 |
+
wp = prec + 6
|
| 479 |
+
c, s = mpf_cos_sin(a, wp)
|
| 480 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 481 |
+
re = mpf_mul(s, ch, prec, rnd)
|
| 482 |
+
im = mpf_mul(c, sh, prec, rnd)
|
| 483 |
+
return re, im
|
| 484 |
+
|
| 485 |
+
def mpc_tan(z, prec, rnd=round_fast):
|
| 486 |
+
"""Complex tangent. Computed as tan(a+bi) = sin(2a)/M + sinh(2b)/M*i
|
| 487 |
+
where M = cos(2a) + cosh(2b)."""
|
| 488 |
+
a, b = z
|
| 489 |
+
asign, aman, aexp, abc = a
|
| 490 |
+
bsign, bman, bexp, bbc = b
|
| 491 |
+
if b == fzero: return mpf_tan(a, prec, rnd), fzero
|
| 492 |
+
if a == fzero: return fzero, mpf_tanh(b, prec, rnd)
|
| 493 |
+
wp = prec + 15
|
| 494 |
+
a = mpf_shift(a, 1)
|
| 495 |
+
b = mpf_shift(b, 1)
|
| 496 |
+
c, s = mpf_cos_sin(a, wp)
|
| 497 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 498 |
+
# TODO: handle cancellation when c ~= -1 and ch ~= 1
|
| 499 |
+
mag = mpf_add(c, ch, wp)
|
| 500 |
+
re = mpf_div(s, mag, prec, rnd)
|
| 501 |
+
im = mpf_div(sh, mag, prec, rnd)
|
| 502 |
+
return re, im
|
| 503 |
+
|
| 504 |
+
def mpc_cos_pi(z, prec, rnd=round_fast):
|
| 505 |
+
a, b = z
|
| 506 |
+
if b == fzero:
|
| 507 |
+
return mpf_cos_pi(a, prec, rnd), fzero
|
| 508 |
+
b = mpf_mul(b, mpf_pi(prec+5), prec+5)
|
| 509 |
+
if a == fzero:
|
| 510 |
+
return mpf_cosh(b, prec, rnd), fzero
|
| 511 |
+
wp = prec + 6
|
| 512 |
+
c, s = mpf_cos_sin_pi(a, wp)
|
| 513 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 514 |
+
re = mpf_mul(c, ch, prec, rnd)
|
| 515 |
+
im = mpf_mul(s, sh, prec, rnd)
|
| 516 |
+
return re, mpf_neg(im)
|
| 517 |
+
|
| 518 |
+
def mpc_sin_pi(z, prec, rnd=round_fast):
|
| 519 |
+
a, b = z
|
| 520 |
+
if b == fzero:
|
| 521 |
+
return mpf_sin_pi(a, prec, rnd), fzero
|
| 522 |
+
b = mpf_mul(b, mpf_pi(prec+5), prec+5)
|
| 523 |
+
if a == fzero:
|
| 524 |
+
return fzero, mpf_sinh(b, prec, rnd)
|
| 525 |
+
wp = prec + 6
|
| 526 |
+
c, s = mpf_cos_sin_pi(a, wp)
|
| 527 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 528 |
+
re = mpf_mul(s, ch, prec, rnd)
|
| 529 |
+
im = mpf_mul(c, sh, prec, rnd)
|
| 530 |
+
return re, im
|
| 531 |
+
|
| 532 |
+
def mpc_cos_sin(z, prec, rnd=round_fast):
|
| 533 |
+
a, b = z
|
| 534 |
+
if a == fzero:
|
| 535 |
+
ch, sh = mpf_cosh_sinh(b, prec, rnd)
|
| 536 |
+
return (ch, fzero), (fzero, sh)
|
| 537 |
+
if b == fzero:
|
| 538 |
+
c, s = mpf_cos_sin(a, prec, rnd)
|
| 539 |
+
return (c, fzero), (s, fzero)
|
| 540 |
+
wp = prec + 6
|
| 541 |
+
c, s = mpf_cos_sin(a, wp)
|
| 542 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 543 |
+
cre = mpf_mul(c, ch, prec, rnd)
|
| 544 |
+
cim = mpf_mul(s, sh, prec, rnd)
|
| 545 |
+
sre = mpf_mul(s, ch, prec, rnd)
|
| 546 |
+
sim = mpf_mul(c, sh, prec, rnd)
|
| 547 |
+
return (cre, mpf_neg(cim)), (sre, sim)
|
| 548 |
+
|
| 549 |
+
def mpc_cos_sin_pi(z, prec, rnd=round_fast):
|
| 550 |
+
a, b = z
|
| 551 |
+
if b == fzero:
|
| 552 |
+
c, s = mpf_cos_sin_pi(a, prec, rnd)
|
| 553 |
+
return (c, fzero), (s, fzero)
|
| 554 |
+
b = mpf_mul(b, mpf_pi(prec+5), prec+5)
|
| 555 |
+
if a == fzero:
|
| 556 |
+
ch, sh = mpf_cosh_sinh(b, prec, rnd)
|
| 557 |
+
return (ch, fzero), (fzero, sh)
|
| 558 |
+
wp = prec + 6
|
| 559 |
+
c, s = mpf_cos_sin_pi(a, wp)
|
| 560 |
+
ch, sh = mpf_cosh_sinh(b, wp)
|
| 561 |
+
cre = mpf_mul(c, ch, prec, rnd)
|
| 562 |
+
cim = mpf_mul(s, sh, prec, rnd)
|
| 563 |
+
sre = mpf_mul(s, ch, prec, rnd)
|
| 564 |
+
sim = mpf_mul(c, sh, prec, rnd)
|
| 565 |
+
return (cre, mpf_neg(cim)), (sre, sim)
|
| 566 |
+
|
| 567 |
+
def mpc_cosh(z, prec, rnd=round_fast):
|
| 568 |
+
"""Complex hyperbolic cosine. Computed as cosh(z) = cos(z*i)."""
|
| 569 |
+
a, b = z
|
| 570 |
+
return mpc_cos((b, mpf_neg(a)), prec, rnd)
|
| 571 |
+
|
| 572 |
+
def mpc_sinh(z, prec, rnd=round_fast):
|
| 573 |
+
"""Complex hyperbolic sine. Computed as sinh(z) = -i*sin(z*i)."""
|
| 574 |
+
a, b = z
|
| 575 |
+
b, a = mpc_sin((b, a), prec, rnd)
|
| 576 |
+
return a, b
|
| 577 |
+
|
| 578 |
+
def mpc_tanh(z, prec, rnd=round_fast):
|
| 579 |
+
"""Complex hyperbolic tangent. Computed as tanh(z) = -i*tan(z*i)."""
|
| 580 |
+
a, b = z
|
| 581 |
+
b, a = mpc_tan((b, a), prec, rnd)
|
| 582 |
+
return a, b
|
| 583 |
+
|
| 584 |
+
# TODO: avoid loss of accuracy
|
| 585 |
+
def mpc_atan(z, prec, rnd=round_fast):
|
| 586 |
+
a, b = z
|
| 587 |
+
# atan(z) = (I/2)*(log(1-I*z) - log(1+I*z))
|
| 588 |
+
# x = 1-I*z = 1 + b - I*a
|
| 589 |
+
# y = 1+I*z = 1 - b + I*a
|
| 590 |
+
wp = prec + 15
|
| 591 |
+
x = mpf_add(fone, b, wp), mpf_neg(a)
|
| 592 |
+
y = mpf_sub(fone, b, wp), a
|
| 593 |
+
l1 = mpc_log(x, wp)
|
| 594 |
+
l2 = mpc_log(y, wp)
|
| 595 |
+
a, b = mpc_sub(l1, l2, prec, rnd)
|
| 596 |
+
# (I/2) * (a+b*I) = (-b/2 + a/2*I)
|
| 597 |
+
v = mpf_neg(mpf_shift(b,-1)), mpf_shift(a,-1)
|
| 598 |
+
# Subtraction at infinity gives correct real part but
|
| 599 |
+
# wrong imaginary part (should be zero)
|
| 600 |
+
if v[1] == fnan and mpc_is_inf(z):
|
| 601 |
+
v = (v[0], fzero)
|
| 602 |
+
return v
|
| 603 |
+
|
| 604 |
+
beta_crossover = from_float(0.6417)
|
| 605 |
+
alpha_crossover = from_float(1.5)
|
| 606 |
+
|
| 607 |
+
def acos_asin(z, prec, rnd, n):
|
| 608 |
+
""" complex acos for n = 0, asin for n = 1
|
| 609 |
+
The algorithm is described in
|
| 610 |
+
T.E. Hull, T.F. Fairgrieve and P.T.P. Tang
|
| 611 |
+
'Implementing the Complex Arcsine and Arcosine Functions
|
| 612 |
+
using Exception Handling',
|
| 613 |
+
ACM Trans. on Math. Software Vol. 23 (1997), p299
|
| 614 |
+
The complex acos and asin can be defined as
|
| 615 |
+
acos(z) = acos(beta) - I*sign(a)* log(alpha + sqrt(alpha**2 -1))
|
| 616 |
+
asin(z) = asin(beta) + I*sign(a)* log(alpha + sqrt(alpha**2 -1))
|
| 617 |
+
where z = a + I*b
|
| 618 |
+
alpha = (1/2)*(r + s); beta = (1/2)*(r - s) = a/alpha
|
| 619 |
+
r = sqrt((a+1)**2 + y**2); s = sqrt((a-1)**2 + y**2)
|
| 620 |
+
These expressions are rewritten in different ways in different
|
| 621 |
+
regions, delimited by two crossovers alpha_crossover and beta_crossover,
|
| 622 |
+
and by abs(a) <= 1, in order to improve the numerical accuracy.
|
| 623 |
+
"""
|
| 624 |
+
a, b = z
|
| 625 |
+
wp = prec + 10
|
| 626 |
+
# special cases with real argument
|
| 627 |
+
if b == fzero:
|
| 628 |
+
am = mpf_sub(fone, mpf_abs(a), wp)
|
| 629 |
+
# case abs(a) <= 1
|
| 630 |
+
if not am[0]:
|
| 631 |
+
if n == 0:
|
| 632 |
+
return mpf_acos(a, prec, rnd), fzero
|
| 633 |
+
else:
|
| 634 |
+
return mpf_asin(a, prec, rnd), fzero
|
| 635 |
+
# cases abs(a) > 1
|
| 636 |
+
else:
|
| 637 |
+
# case a < -1
|
| 638 |
+
if a[0]:
|
| 639 |
+
pi = mpf_pi(prec, rnd)
|
| 640 |
+
c = mpf_acosh(mpf_neg(a), prec, rnd)
|
| 641 |
+
if n == 0:
|
| 642 |
+
return pi, mpf_neg(c)
|
| 643 |
+
else:
|
| 644 |
+
return mpf_neg(mpf_shift(pi, -1)), c
|
| 645 |
+
# case a > 1
|
| 646 |
+
else:
|
| 647 |
+
c = mpf_acosh(a, prec, rnd)
|
| 648 |
+
if n == 0:
|
| 649 |
+
return fzero, c
|
| 650 |
+
else:
|
| 651 |
+
pi = mpf_pi(prec, rnd)
|
| 652 |
+
return mpf_shift(pi, -1), mpf_neg(c)
|
| 653 |
+
asign = bsign = 0
|
| 654 |
+
if a[0]:
|
| 655 |
+
a = mpf_neg(a)
|
| 656 |
+
asign = 1
|
| 657 |
+
if b[0]:
|
| 658 |
+
b = mpf_neg(b)
|
| 659 |
+
bsign = 1
|
| 660 |
+
am = mpf_sub(fone, a, wp)
|
| 661 |
+
ap = mpf_add(fone, a, wp)
|
| 662 |
+
r = mpf_hypot(ap, b, wp)
|
| 663 |
+
s = mpf_hypot(am, b, wp)
|
| 664 |
+
alpha = mpf_shift(mpf_add(r, s, wp), -1)
|
| 665 |
+
beta = mpf_div(a, alpha, wp)
|
| 666 |
+
b2 = mpf_mul(b,b, wp)
|
| 667 |
+
# case beta <= beta_crossover
|
| 668 |
+
if not mpf_sub(beta_crossover, beta, wp)[0]:
|
| 669 |
+
if n == 0:
|
| 670 |
+
re = mpf_acos(beta, wp)
|
| 671 |
+
else:
|
| 672 |
+
re = mpf_asin(beta, wp)
|
| 673 |
+
else:
|
| 674 |
+
# to compute the real part in this region use the identity
|
| 675 |
+
# asin(beta) = atan(beta/sqrt(1-beta**2))
|
| 676 |
+
# beta/sqrt(1-beta**2) = (alpha + a) * (alpha - a)
|
| 677 |
+
# alpha + a is numerically accurate; alpha - a can have
|
| 678 |
+
# cancellations leading to numerical inaccuracies, so rewrite
|
| 679 |
+
# it in differente ways according to the region
|
| 680 |
+
Ax = mpf_add(alpha, a, wp)
|
| 681 |
+
# case a <= 1
|
| 682 |
+
if not am[0]:
|
| 683 |
+
# c = b*b/(r + (a+1)); d = (s + (1-a))
|
| 684 |
+
# alpha - a = (1/2)*(c + d)
|
| 685 |
+
# case n=0: re = atan(sqrt((1/2) * Ax * (c + d))/a)
|
| 686 |
+
# case n=1: re = atan(a/sqrt((1/2) * Ax * (c + d)))
|
| 687 |
+
c = mpf_div(b2, mpf_add(r, ap, wp), wp)
|
| 688 |
+
d = mpf_add(s, am, wp)
|
| 689 |
+
re = mpf_shift(mpf_mul(Ax, mpf_add(c, d, wp), wp), -1)
|
| 690 |
+
if n == 0:
|
| 691 |
+
re = mpf_atan(mpf_div(mpf_sqrt(re, wp), a, wp), wp)
|
| 692 |
+
else:
|
| 693 |
+
re = mpf_atan(mpf_div(a, mpf_sqrt(re, wp), wp), wp)
|
| 694 |
+
else:
|
| 695 |
+
# c = Ax/(r + (a+1)); d = Ax/(s - (1-a))
|
| 696 |
+
# alpha - a = (1/2)*(c + d)
|
| 697 |
+
# case n = 0: re = atan(b*sqrt(c + d)/2/a)
|
| 698 |
+
# case n = 1: re = atan(a/(b*sqrt(c + d)/2)
|
| 699 |
+
c = mpf_div(Ax, mpf_add(r, ap, wp), wp)
|
| 700 |
+
d = mpf_div(Ax, mpf_sub(s, am, wp), wp)
|
| 701 |
+
re = mpf_shift(mpf_add(c, d, wp), -1)
|
| 702 |
+
re = mpf_mul(b, mpf_sqrt(re, wp), wp)
|
| 703 |
+
if n == 0:
|
| 704 |
+
re = mpf_atan(mpf_div(re, a, wp), wp)
|
| 705 |
+
else:
|
| 706 |
+
re = mpf_atan(mpf_div(a, re, wp), wp)
|
| 707 |
+
# to compute alpha + sqrt(alpha**2 - 1), if alpha <= alpha_crossover
|
| 708 |
+
# replace it with 1 + Am1 + sqrt(Am1*(alpha+1)))
|
| 709 |
+
# where Am1 = alpha -1
|
| 710 |
+
# if alpha <= alpha_crossover:
|
| 711 |
+
if not mpf_sub(alpha_crossover, alpha, wp)[0]:
|
| 712 |
+
c1 = mpf_div(b2, mpf_add(r, ap, wp), wp)
|
| 713 |
+
# case a < 1
|
| 714 |
+
if mpf_neg(am)[0]:
|
| 715 |
+
# Am1 = (1/2) * (b*b/(r + (a+1)) + b*b/(s + (1-a))
|
| 716 |
+
c2 = mpf_add(s, am, wp)
|
| 717 |
+
c2 = mpf_div(b2, c2, wp)
|
| 718 |
+
Am1 = mpf_shift(mpf_add(c1, c2, wp), -1)
|
| 719 |
+
else:
|
| 720 |
+
# Am1 = (1/2) * (b*b/(r + (a+1)) + (s - (1-a)))
|
| 721 |
+
c2 = mpf_sub(s, am, wp)
|
| 722 |
+
Am1 = mpf_shift(mpf_add(c1, c2, wp), -1)
|
| 723 |
+
# im = log(1 + Am1 + sqrt(Am1*(alpha+1)))
|
| 724 |
+
im = mpf_mul(Am1, mpf_add(alpha, fone, wp), wp)
|
| 725 |
+
im = mpf_log(mpf_add(fone, mpf_add(Am1, mpf_sqrt(im, wp), wp), wp), wp)
|
| 726 |
+
else:
|
| 727 |
+
# im = log(alpha + sqrt(alpha*alpha - 1))
|
| 728 |
+
im = mpf_sqrt(mpf_sub(mpf_mul(alpha, alpha, wp), fone, wp), wp)
|
| 729 |
+
im = mpf_log(mpf_add(alpha, im, wp), wp)
|
| 730 |
+
if asign:
|
| 731 |
+
if n == 0:
|
| 732 |
+
re = mpf_sub(mpf_pi(wp), re, wp)
|
| 733 |
+
else:
|
| 734 |
+
re = mpf_neg(re)
|
| 735 |
+
if not bsign and n == 0:
|
| 736 |
+
im = mpf_neg(im)
|
| 737 |
+
if bsign and n == 1:
|
| 738 |
+
im = mpf_neg(im)
|
| 739 |
+
re = normalize(re[0], re[1], re[2], re[3], prec, rnd)
|
| 740 |
+
im = normalize(im[0], im[1], im[2], im[3], prec, rnd)
|
| 741 |
+
return re, im
|
| 742 |
+
|
| 743 |
+
def mpc_acos(z, prec, rnd=round_fast):
|
| 744 |
+
return acos_asin(z, prec, rnd, 0)
|
| 745 |
+
|
| 746 |
+
def mpc_asin(z, prec, rnd=round_fast):
|
| 747 |
+
return acos_asin(z, prec, rnd, 1)
|
| 748 |
+
|
| 749 |
+
def mpc_asinh(z, prec, rnd=round_fast):
|
| 750 |
+
# asinh(z) = I * asin(-I z)
|
| 751 |
+
a, b = z
|
| 752 |
+
a, b = mpc_asin((b, mpf_neg(a)), prec, rnd)
|
| 753 |
+
return mpf_neg(b), a
|
| 754 |
+
|
| 755 |
+
def mpc_acosh(z, prec, rnd=round_fast):
|
| 756 |
+
# acosh(z) = -I * acos(z) for Im(acos(z)) <= 0
|
| 757 |
+
# +I * acos(z) otherwise
|
| 758 |
+
a, b = mpc_acos(z, prec, rnd)
|
| 759 |
+
if b[0] or b == fzero:
|
| 760 |
+
return mpf_neg(b), a
|
| 761 |
+
else:
|
| 762 |
+
return b, mpf_neg(a)
|
| 763 |
+
|
| 764 |
+
def mpc_atanh(z, prec, rnd=round_fast):
|
| 765 |
+
# atanh(z) = (log(1+z)-log(1-z))/2
|
| 766 |
+
wp = prec + 15
|
| 767 |
+
a = mpc_add(z, mpc_one, wp)
|
| 768 |
+
b = mpc_sub(mpc_one, z, wp)
|
| 769 |
+
a = mpc_log(a, wp)
|
| 770 |
+
b = mpc_log(b, wp)
|
| 771 |
+
v = mpc_shift(mpc_sub(a, b, wp), -1)
|
| 772 |
+
# Subtraction at infinity gives correct imaginary part but
|
| 773 |
+
# wrong real part (should be zero)
|
| 774 |
+
if v[0] == fnan and mpc_is_inf(z):
|
| 775 |
+
v = (fzero, v[1])
|
| 776 |
+
return v
|
| 777 |
+
|
| 778 |
+
def mpc_fibonacci(z, prec, rnd=round_fast):
|
| 779 |
+
re, im = z
|
| 780 |
+
if im == fzero:
|
| 781 |
+
return (mpf_fibonacci(re, prec, rnd), fzero)
|
| 782 |
+
size = max(abs(re[2]+re[3]), abs(re[2]+re[3]))
|
| 783 |
+
wp = prec + size + 20
|
| 784 |
+
a = mpf_phi(wp)
|
| 785 |
+
b = mpf_add(mpf_shift(a, 1), fnone, wp)
|
| 786 |
+
u = mpc_pow((a, fzero), z, wp)
|
| 787 |
+
v = mpc_cos_pi(z, wp)
|
| 788 |
+
v = mpc_div(v, u, wp)
|
| 789 |
+
u = mpc_sub(u, v, wp)
|
| 790 |
+
u = mpc_div_mpf(u, b, prec, rnd)
|
| 791 |
+
return u
|
| 792 |
+
|
| 793 |
+
def mpf_expj(x, prec, rnd='f'):
|
| 794 |
+
raise ComplexResult
|
| 795 |
+
|
| 796 |
+
def mpc_expj(z, prec, rnd='f'):
|
| 797 |
+
re, im = z
|
| 798 |
+
if im == fzero:
|
| 799 |
+
return mpf_cos_sin(re, prec, rnd)
|
| 800 |
+
if re == fzero:
|
| 801 |
+
return mpf_exp(mpf_neg(im), prec, rnd), fzero
|
| 802 |
+
ey = mpf_exp(mpf_neg(im), prec+10)
|
| 803 |
+
c, s = mpf_cos_sin(re, prec+10)
|
| 804 |
+
re = mpf_mul(ey, c, prec, rnd)
|
| 805 |
+
im = mpf_mul(ey, s, prec, rnd)
|
| 806 |
+
return re, im
|
| 807 |
+
|
| 808 |
+
def mpf_expjpi(x, prec, rnd='f'):
|
| 809 |
+
raise ComplexResult
|
| 810 |
+
|
| 811 |
+
def mpc_expjpi(z, prec, rnd='f'):
|
| 812 |
+
re, im = z
|
| 813 |
+
if im == fzero:
|
| 814 |
+
return mpf_cos_sin_pi(re, prec, rnd)
|
| 815 |
+
sign, man, exp, bc = im
|
| 816 |
+
wp = prec+10
|
| 817 |
+
if man:
|
| 818 |
+
wp += max(0, exp+bc)
|
| 819 |
+
im = mpf_neg(mpf_mul(mpf_pi(wp), im, wp))
|
| 820 |
+
if re == fzero:
|
| 821 |
+
return mpf_exp(im, prec, rnd), fzero
|
| 822 |
+
ey = mpf_exp(im, prec+10)
|
| 823 |
+
c, s = mpf_cos_sin_pi(re, prec+10)
|
| 824 |
+
re = mpf_mul(ey, c, prec, rnd)
|
| 825 |
+
im = mpf_mul(ey, s, prec, rnd)
|
| 826 |
+
return re, im
|
| 827 |
+
|
| 828 |
+
|
| 829 |
+
if BACKEND == 'sage':
|
| 830 |
+
try:
|
| 831 |
+
import sage.libs.mpmath.ext_libmp as _lbmp
|
| 832 |
+
mpc_exp = _lbmp.mpc_exp
|
| 833 |
+
mpc_sqrt = _lbmp.mpc_sqrt
|
| 834 |
+
except (ImportError, AttributeError):
|
| 835 |
+
print("Warning: Sage imports in libmpc failed")
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c96a8c60ccaff3dbe94603afb496582f94300b3dd5c8ec016ff0c7e71f975baf
|
| 3 |
+
size 172649
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__init__.py
ADDED
|
File without changes
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2017 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#include <cuda_stdint.h>
|
| 51 |
+
|
| 52 |
+
#if !defined(_CUPTI_OPENACC_H_)
|
| 53 |
+
#define _CUPTI_OPENACC_H_
|
| 54 |
+
|
| 55 |
+
#ifndef CUPTIAPI
|
| 56 |
+
#ifdef _WIN32
|
| 57 |
+
#define CUPTIAPI __stdcall
|
| 58 |
+
#else
|
| 59 |
+
#define CUPTIAPI
|
| 60 |
+
#endif
|
| 61 |
+
#endif
|
| 62 |
+
|
| 63 |
+
#if defined(__LP64__)
|
| 64 |
+
#define CUPTILP64 1
|
| 65 |
+
#elif defined(_WIN64)
|
| 66 |
+
#define CUPTILP64 1
|
| 67 |
+
#else
|
| 68 |
+
#undef CUPTILP64
|
| 69 |
+
#endif
|
| 70 |
+
|
| 71 |
+
#if defined(__cplusplus)
|
| 72 |
+
extern "C" {
|
| 73 |
+
#endif
|
| 74 |
+
|
| 75 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 76 |
+
#pragma GCC visibility push(default)
|
| 77 |
+
#endif
|
| 78 |
+
|
| 79 |
+
/**
|
| 80 |
+
* \brief Initialize OpenACC support
|
| 81 |
+
*
|
| 82 |
+
* \param profRegister function of type acc_prof_reg as obtained from acc_register_library
|
| 83 |
+
* \param profUnregister function of type acc_prof_reg as obtained from acc_register_library
|
| 84 |
+
* \param profLookup function of type acc_prof_lookup as obtained from acc_register_library
|
| 85 |
+
*/
|
| 86 |
+
CUptiResult CUPTIAPI
|
| 87 |
+
cuptiOpenACCInitialize(void *profRegister, void *profUnregister, void *profLookup);
|
| 88 |
+
|
| 89 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 90 |
+
#pragma GCC visibility pop
|
| 91 |
+
#endif
|
| 92 |
+
|
| 93 |
+
#if defined(__cplusplus)
|
| 94 |
+
}
|
| 95 |
+
#endif
|
| 96 |
+
|
| 97 |
+
#endif /*_CUPTI_OPENACC_H_*/
|
| 98 |
+
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_activity.h
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_events.h
ADDED
|
@@ -0,0 +1,1371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2010-2021 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(_CUPTI_EVENTS_H_)
|
| 51 |
+
#define _CUPTI_EVENTS_H_
|
| 52 |
+
|
| 53 |
+
#include <cuda.h>
|
| 54 |
+
#include <string.h>
|
| 55 |
+
#include <cuda_stdint.h>
|
| 56 |
+
#include <cupti_result.h>
|
| 57 |
+
|
| 58 |
+
#ifndef CUPTIAPI
|
| 59 |
+
#ifdef _WIN32
|
| 60 |
+
#define CUPTIAPI __stdcall
|
| 61 |
+
#else
|
| 62 |
+
#define CUPTIAPI
|
| 63 |
+
#endif
|
| 64 |
+
#endif
|
| 65 |
+
|
| 66 |
+
#if defined(__cplusplus)
|
| 67 |
+
extern "C" {
|
| 68 |
+
#endif
|
| 69 |
+
|
| 70 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 71 |
+
#pragma GCC visibility push(default)
|
| 72 |
+
#endif
|
| 73 |
+
|
| 74 |
+
/**
|
| 75 |
+
* \defgroup CUPTI_EVENT_API CUPTI Event API
|
| 76 |
+
* Functions, types, and enums that implement the CUPTI Event API.
|
| 77 |
+
*
|
| 78 |
+
* \note CUPTI event API from the header cupti_events.h are not supported on devices
|
| 79 |
+
* with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
|
| 80 |
+
* These API will be deprecated in a future CUDA release. These are replaced by
|
| 81 |
+
* Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
|
| 82 |
+
* in the headers nvperf_host.h and nvperf_target.h which are supported on
|
| 83 |
+
* devices with compute capability 7.0 and higher (i.e. Volta and later GPU
|
| 84 |
+
* architectures).
|
| 85 |
+
*
|
| 86 |
+
* @{
|
| 87 |
+
*/
|
| 88 |
+
|
| 89 |
+
/**
|
| 90 |
+
* \brief ID for an event.
|
| 91 |
+
*
|
| 92 |
+
* An event represents a countable activity, action, or occurrence on
|
| 93 |
+
* the device.
|
| 94 |
+
*/
|
| 95 |
+
typedef uint32_t CUpti_EventID;
|
| 96 |
+
|
| 97 |
+
/**
|
| 98 |
+
* \brief ID for an event domain.
|
| 99 |
+
*
|
| 100 |
+
* ID for an event domain. An event domain represents a group of
|
| 101 |
+
* related events. A device may have multiple instances of a domain,
|
| 102 |
+
* indicating that the device can simultaneously record multiple
|
| 103 |
+
* instances of each event within that domain.
|
| 104 |
+
*/
|
| 105 |
+
typedef uint32_t CUpti_EventDomainID;
|
| 106 |
+
|
| 107 |
+
/**
|
| 108 |
+
* \brief A group of events.
|
| 109 |
+
*
|
| 110 |
+
* An event group is a collection of events that are managed
|
| 111 |
+
* together. All events in an event group must belong to the same
|
| 112 |
+
* domain.
|
| 113 |
+
*/
|
| 114 |
+
typedef void *CUpti_EventGroup;
|
| 115 |
+
|
| 116 |
+
/**
|
| 117 |
+
* \brief Device class.
|
| 118 |
+
*
|
| 119 |
+
* Enumeration of device classes for device attribute
|
| 120 |
+
* CUPTI_DEVICE_ATTR_DEVICE_CLASS.
|
| 121 |
+
*/
|
| 122 |
+
typedef enum {
|
| 123 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_TESLA = 0,
|
| 124 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_QUADRO = 1,
|
| 125 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_GEFORCE = 2,
|
| 126 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS_TEGRA = 3,
|
| 127 |
+
} CUpti_DeviceAttributeDeviceClass;
|
| 128 |
+
|
| 129 |
+
/**
|
| 130 |
+
* \brief Device attributes.
|
| 131 |
+
*
|
| 132 |
+
* CUPTI device attributes. These attributes can be read using \ref
|
| 133 |
+
* cuptiDeviceGetAttribute.
|
| 134 |
+
*/
|
| 135 |
+
typedef enum {
|
| 136 |
+
/**
|
| 137 |
+
* Number of event IDs for a device. Value is a uint32_t.
|
| 138 |
+
*/
|
| 139 |
+
CUPTI_DEVICE_ATTR_MAX_EVENT_ID = 1,
|
| 140 |
+
/**
|
| 141 |
+
* Number of event domain IDs for a device. Value is a uint32_t.
|
| 142 |
+
*/
|
| 143 |
+
CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID = 2,
|
| 144 |
+
/**
|
| 145 |
+
* Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
|
| 146 |
+
*/
|
| 147 |
+
CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH = 3,
|
| 148 |
+
/**
|
| 149 |
+
* Get theoretical maximum number of instructions per cycle. Value
|
| 150 |
+
* is a uint32_t.
|
| 151 |
+
*/
|
| 152 |
+
CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE = 4,
|
| 153 |
+
/**
|
| 154 |
+
* Get theoretical maximum number of single precision instructions
|
| 155 |
+
* that can be executed per second. Value is a uint64_t.
|
| 156 |
+
*/
|
| 157 |
+
CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5,
|
| 158 |
+
/**
|
| 159 |
+
* Get number of frame buffers for device. Value is a uint64_t.
|
| 160 |
+
*/
|
| 161 |
+
CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS = 6,
|
| 162 |
+
/**
|
| 163 |
+
* Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type
|
| 164 |
+
* is non-PCIE. Value is a uint64_t.
|
| 165 |
+
*/
|
| 166 |
+
CUPTI_DEVICE_ATTR_PCIE_LINK_RATE = 7,
|
| 167 |
+
/**
|
| 168 |
+
* Get PCIE link width for device. Return 0 if bus-type
|
| 169 |
+
* is non-PCIE. Value is a uint64_t.
|
| 170 |
+
*/
|
| 171 |
+
CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH = 8,
|
| 172 |
+
/**
|
| 173 |
+
* Get PCIE generation for device. Return 0 if bus-type
|
| 174 |
+
* is non-PCIE. Value is a uint64_t.
|
| 175 |
+
*/
|
| 176 |
+
CUPTI_DEVICE_ATTR_PCIE_GEN = 9,
|
| 177 |
+
/**
|
| 178 |
+
* Get the class for the device. Value is a
|
| 179 |
+
* CUpti_DeviceAttributeDeviceClass.
|
| 180 |
+
*/
|
| 181 |
+
CUPTI_DEVICE_ATTR_DEVICE_CLASS = 10,
|
| 182 |
+
/**
|
| 183 |
+
* Get the peak single precision flop per cycle. Value is a uint64_t.
|
| 184 |
+
*/
|
| 185 |
+
CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE = 11,
|
| 186 |
+
/**
|
| 187 |
+
* Get the peak double precision flop per cycle. Value is a uint64_t.
|
| 188 |
+
*/
|
| 189 |
+
CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE = 12,
|
| 190 |
+
/**
|
| 191 |
+
* Get number of L2 units. Value is a uint64_t.
|
| 192 |
+
*/
|
| 193 |
+
CUPTI_DEVICE_ATTR_MAX_L2_UNITS = 13,
|
| 194 |
+
/**
|
| 195 |
+
* Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED
|
| 196 |
+
* preference. Value is a uint64_t.
|
| 197 |
+
*/
|
| 198 |
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14,
|
| 199 |
+
/**
|
| 200 |
+
* Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1
|
| 201 |
+
* preference. Value is a uint64_t.
|
| 202 |
+
*/
|
| 203 |
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15,
|
| 204 |
+
/**
|
| 205 |
+
* Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL
|
| 206 |
+
* preference. Value is a uint64_t.
|
| 207 |
+
*/
|
| 208 |
+
CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16,
|
| 209 |
+
/**
|
| 210 |
+
* Get the peak half precision flop per cycle. Value is a uint64_t.
|
| 211 |
+
*/
|
| 212 |
+
CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE = 17,
|
| 213 |
+
/**
|
| 214 |
+
* Check if Nvlink is connected to device. Returns 1, if at least one
|
| 215 |
+
* Nvlink is connected to the device, returns 0 otherwise.
|
| 216 |
+
* Value is a uint32_t.
|
| 217 |
+
*/
|
| 218 |
+
CUPTI_DEVICE_ATTR_NVLINK_PRESENT = 18,
|
| 219 |
+
/**
|
| 220 |
+
* Check if Nvlink is present between GPU and CPU. Returns Bandwidth,
|
| 221 |
+
* in Bytes/sec, if Nvlink is present, returns 0 otherwise.
|
| 222 |
+
* Value is a uint64_t.
|
| 223 |
+
*/
|
| 224 |
+
CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW = 19,
|
| 225 |
+
/**
|
| 226 |
+
* Check if NVSwitch is present in the underlying topology.
|
| 227 |
+
* Returns 1, if present, returns 0 otherwise.
|
| 228 |
+
* Value is a uint32_t.
|
| 229 |
+
*/
|
| 230 |
+
CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT = 20,
|
| 231 |
+
CUPTI_DEVICE_ATTR_FORCE_INT = 0x7fffffff,
|
| 232 |
+
} CUpti_DeviceAttribute;
|
| 233 |
+
|
| 234 |
+
/**
|
| 235 |
+
* \brief Event domain attributes.
|
| 236 |
+
*
|
| 237 |
+
* Event domain attributes. Except where noted, all the attributes can
|
| 238 |
+
* be read using either \ref cuptiDeviceGetEventDomainAttribute or
|
| 239 |
+
* \ref cuptiEventDomainGetAttribute.
|
| 240 |
+
*/
|
| 241 |
+
typedef enum {
|
| 242 |
+
/**
|
| 243 |
+
* Event domain name. Value is a null terminated const c-string.
|
| 244 |
+
*/
|
| 245 |
+
CUPTI_EVENT_DOMAIN_ATTR_NAME = 0,
|
| 246 |
+
/**
|
| 247 |
+
* Number of instances of the domain for which event counts will be
|
| 248 |
+
* collected. The domain may have additional instances that cannot
|
| 249 |
+
* be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT).
|
| 250 |
+
* Can be read only with \ref
|
| 251 |
+
* cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
|
| 252 |
+
*/
|
| 253 |
+
CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT = 1,
|
| 254 |
+
/**
|
| 255 |
+
* Total number of instances of the domain, including instances that
|
| 256 |
+
* cannot be profiled. Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT
|
| 257 |
+
* to get the number of instances that can be profiled. Can be read
|
| 258 |
+
* only with \ref cuptiDeviceGetEventDomainAttribute. Value is a
|
| 259 |
+
* uint32_t.
|
| 260 |
+
*/
|
| 261 |
+
CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3,
|
| 262 |
+
/**
|
| 263 |
+
* Collection method used for events contained in the event domain.
|
| 264 |
+
* Value is a \ref CUpti_EventCollectionMethod.
|
| 265 |
+
*/
|
| 266 |
+
CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD = 4,
|
| 267 |
+
|
| 268 |
+
CUPTI_EVENT_DOMAIN_ATTR_FORCE_INT = 0x7fffffff,
|
| 269 |
+
} CUpti_EventDomainAttribute;
|
| 270 |
+
|
| 271 |
+
/**
|
| 272 |
+
* \brief The collection method used for an event.
|
| 273 |
+
*
|
| 274 |
+
* The collection method indicates how an event is collected.
|
| 275 |
+
*/
|
| 276 |
+
typedef enum {
|
| 277 |
+
/**
|
| 278 |
+
* Event is collected using a hardware global performance monitor.
|
| 279 |
+
*/
|
| 280 |
+
CUPTI_EVENT_COLLECTION_METHOD_PM = 0,
|
| 281 |
+
/**
|
| 282 |
+
* Event is collected using a hardware SM performance monitor.
|
| 283 |
+
*/
|
| 284 |
+
CUPTI_EVENT_COLLECTION_METHOD_SM = 1,
|
| 285 |
+
/**
|
| 286 |
+
* Event is collected using software instrumentation.
|
| 287 |
+
*/
|
| 288 |
+
CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED = 2,
|
| 289 |
+
/**
|
| 290 |
+
* Event is collected using NvLink throughput counter method.
|
| 291 |
+
*/
|
| 292 |
+
CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC = 3,
|
| 293 |
+
CUPTI_EVENT_COLLECTION_METHOD_FORCE_INT = 0x7fffffff
|
| 294 |
+
} CUpti_EventCollectionMethod;
|
| 295 |
+
|
| 296 |
+
/**
|
| 297 |
+
* \brief Event group attributes.
|
| 298 |
+
*
|
| 299 |
+
* Event group attributes. These attributes can be read using \ref
|
| 300 |
+
* cuptiEventGroupGetAttribute. Attributes marked [rw] can also be
|
| 301 |
+
* written using \ref cuptiEventGroupSetAttribute.
|
| 302 |
+
*/
|
| 303 |
+
typedef enum {
|
| 304 |
+
/**
|
| 305 |
+
* The domain to which the event group is bound. This attribute is
|
| 306 |
+
* set when the first event is added to the group. Value is a
|
| 307 |
+
* CUpti_EventDomainID.
|
| 308 |
+
*/
|
| 309 |
+
CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID = 0,
|
| 310 |
+
/**
|
| 311 |
+
* [rw] Profile all the instances of the domain for this
|
| 312 |
+
* eventgroup. This feature can be used to get load balancing
|
| 313 |
+
* across all instances of a domain. Value is an integer.
|
| 314 |
+
*/
|
| 315 |
+
CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1,
|
| 316 |
+
/**
|
| 317 |
+
* [rw] Reserved for user data.
|
| 318 |
+
*/
|
| 319 |
+
CUPTI_EVENT_GROUP_ATTR_USER_DATA = 2,
|
| 320 |
+
/**
|
| 321 |
+
* Number of events in the group. Value is a uint32_t.
|
| 322 |
+
*/
|
| 323 |
+
CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS = 3,
|
| 324 |
+
/**
|
| 325 |
+
* Enumerates events in the group. Value is a pointer to buffer of
|
| 326 |
+
* size sizeof(CUpti_EventID) * num_of_events in the eventgroup.
|
| 327 |
+
* num_of_events can be queried using
|
| 328 |
+
* CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS.
|
| 329 |
+
*/
|
| 330 |
+
CUPTI_EVENT_GROUP_ATTR_EVENTS = 4,
|
| 331 |
+
/**
|
| 332 |
+
* Number of instances of the domain bound to this event group that
|
| 333 |
+
* will be counted. Value is a uint32_t.
|
| 334 |
+
*/
|
| 335 |
+
CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT = 5,
|
| 336 |
+
/**
|
| 337 |
+
* Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
|
| 338 |
+
* CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, before
|
| 339 |
+
* adding any event.
|
| 340 |
+
* Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
|
| 341 |
+
* CUPTI_EVENT_PROFILING_SCOPE_CONTEXT when the scope of the events
|
| 342 |
+
* that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH.
|
| 343 |
+
* If profiling scope of event is either
|
| 344 |
+
* CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT
|
| 345 |
+
* then setting this attribute will not affect the default scope.
|
| 346 |
+
* It is not allowed to add events of different scope to same eventgroup.
|
| 347 |
+
* Value is a uint32_t.
|
| 348 |
+
*/
|
| 349 |
+
CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE = 6,
|
| 350 |
+
CUPTI_EVENT_GROUP_ATTR_FORCE_INT = 0x7fffffff,
|
| 351 |
+
} CUpti_EventGroupAttribute;
|
| 352 |
+
|
| 353 |
+
/**
|
| 354 |
+
* \brief Profiling scope for event.
|
| 355 |
+
*
|
| 356 |
+
* Profiling scope of event indicates if the event can be collected at context
|
| 357 |
+
* scope or device scope or both i.e. it can be collected at any of context or
|
| 358 |
+
* device scope.
|
| 359 |
+
*/
|
| 360 |
+
typedef enum {
|
| 361 |
+
/**
|
| 362 |
+
* Event is collected at context scope.
|
| 363 |
+
*/
|
| 364 |
+
CUPTI_EVENT_PROFILING_SCOPE_CONTEXT = 0,
|
| 365 |
+
/**
|
| 366 |
+
* Event is collected at device scope.
|
| 367 |
+
*/
|
| 368 |
+
CUPTI_EVENT_PROFILING_SCOPE_DEVICE = 1,
|
| 369 |
+
/**
|
| 370 |
+
* Event can be collected at device or context scope.
|
| 371 |
+
* The scope can be set using \ref cuptiEventGroupSetAttribute API.
|
| 372 |
+
*/
|
| 373 |
+
CUPTI_EVENT_PROFILING_SCOPE_BOTH = 2,
|
| 374 |
+
CUPTI_EVENT_PROFILING_SCOPE_FORCE_INT = 0x7fffffff
|
| 375 |
+
} CUpti_EventProfilingScope;
|
| 376 |
+
|
| 377 |
+
/**
|
| 378 |
+
* \brief Event attributes.
|
| 379 |
+
*
|
| 380 |
+
* Event attributes. These attributes can be read using \ref
|
| 381 |
+
* cuptiEventGetAttribute.
|
| 382 |
+
*/
|
| 383 |
+
typedef enum {
|
| 384 |
+
/**
|
| 385 |
+
* Event name. Value is a null terminated const c-string.
|
| 386 |
+
*/
|
| 387 |
+
CUPTI_EVENT_ATTR_NAME = 0,
|
| 388 |
+
/**
|
| 389 |
+
* Short description of event. Value is a null terminated const
|
| 390 |
+
* c-string.
|
| 391 |
+
*/
|
| 392 |
+
CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1,
|
| 393 |
+
/**
|
| 394 |
+
* Long description of event. Value is a null terminated const
|
| 395 |
+
* c-string.
|
| 396 |
+
*/
|
| 397 |
+
CUPTI_EVENT_ATTR_LONG_DESCRIPTION = 2,
|
| 398 |
+
/**
|
| 399 |
+
* Category of event. Value is CUpti_EventCategory.
|
| 400 |
+
*/
|
| 401 |
+
CUPTI_EVENT_ATTR_CATEGORY = 3,
|
| 402 |
+
/**
|
| 403 |
+
* Profiling scope of the events. It can be either device or context or both.
|
| 404 |
+
* Value is a \ref CUpti_EventProfilingScope.
|
| 405 |
+
*/
|
| 406 |
+
CUPTI_EVENT_ATTR_PROFILING_SCOPE = 5,
|
| 407 |
+
|
| 408 |
+
CUPTI_EVENT_ATTR_FORCE_INT = 0x7fffffff,
|
| 409 |
+
} CUpti_EventAttribute;
|
| 410 |
+
|
| 411 |
+
/**
|
| 412 |
+
* \brief Event collection modes.
|
| 413 |
+
*
|
| 414 |
+
* The event collection mode determines the period over which the
|
| 415 |
+
* events within the enabled event groups will be collected.
|
| 416 |
+
*/
|
| 417 |
+
typedef enum {
|
| 418 |
+
/**
|
| 419 |
+
* Events are collected for the entire duration between the
|
| 420 |
+
* cuptiEventGroupEnable and cuptiEventGroupDisable calls.
|
| 421 |
+
* Event values are reset when the events are read.
|
| 422 |
+
* For CUDA toolkit v6.0 and older this was the default mode.
|
| 423 |
+
*/
|
| 424 |
+
CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS = 0,
|
| 425 |
+
/**
|
| 426 |
+
* Events are collected only for the durations of kernel executions
|
| 427 |
+
* that occur between the cuptiEventGroupEnable and
|
| 428 |
+
* cuptiEventGroupDisable calls. Event collection begins when a
|
| 429 |
+
* kernel execution begins, and stops when kernel execution
|
| 430 |
+
* completes. Event values are reset to zero when each kernel
|
| 431 |
+
* execution begins. If multiple kernel executions occur between the
|
| 432 |
+
* cuptiEventGroupEnable and cuptiEventGroupDisable calls then the
|
| 433 |
+
* event values must be read after each kernel launch if those
|
| 434 |
+
* events need to be associated with the specific kernel launch.
|
| 435 |
+
* Note that collection in this mode may significantly change the
|
| 436 |
+
* overall performance characteristics of the application because
|
| 437 |
+
* kernel executions that occur between the cuptiEventGroupEnable and
|
| 438 |
+
* cuptiEventGroupDisable calls are serialized on the GPU.
|
| 439 |
+
* This is the default mode from CUDA toolkit v6.5
|
| 440 |
+
*/
|
| 441 |
+
CUPTI_EVENT_COLLECTION_MODE_KERNEL = 1,
|
| 442 |
+
CUPTI_EVENT_COLLECTION_MODE_FORCE_INT = 0x7fffffff
|
| 443 |
+
} CUpti_EventCollectionMode;
|
| 444 |
+
|
| 445 |
+
/**
|
| 446 |
+
* \brief An event category.
|
| 447 |
+
*
|
| 448 |
+
* Each event is assigned to a category that represents the general
|
| 449 |
+
* type of the event. A event's category is accessed using \ref
|
| 450 |
+
* cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute.
|
| 451 |
+
*/
|
| 452 |
+
typedef enum {
|
| 453 |
+
/**
|
| 454 |
+
* An instruction related event.
|
| 455 |
+
*/
|
| 456 |
+
CUPTI_EVENT_CATEGORY_INSTRUCTION = 0,
|
| 457 |
+
/**
|
| 458 |
+
* A memory related event.
|
| 459 |
+
*/
|
| 460 |
+
CUPTI_EVENT_CATEGORY_MEMORY = 1,
|
| 461 |
+
/**
|
| 462 |
+
* A cache related event.
|
| 463 |
+
*/
|
| 464 |
+
CUPTI_EVENT_CATEGORY_CACHE = 2,
|
| 465 |
+
/**
|
| 466 |
+
* A profile-trigger event.
|
| 467 |
+
*/
|
| 468 |
+
CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3,
|
| 469 |
+
/**
|
| 470 |
+
* A system event.
|
| 471 |
+
*/
|
| 472 |
+
CUPTI_EVENT_CATEGORY_SYSTEM = 4,
|
| 473 |
+
CUPTI_EVENT_CATEGORY_FORCE_INT = 0x7fffffff
|
| 474 |
+
} CUpti_EventCategory;
|
| 475 |
+
|
| 476 |
+
/**
|
| 477 |
+
* \brief The overflow value for a CUPTI event.
|
| 478 |
+
*
|
| 479 |
+
* The CUPTI event value that indicates an overflow.
|
| 480 |
+
*/
|
| 481 |
+
#define CUPTI_EVENT_OVERFLOW ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
|
| 482 |
+
|
| 483 |
+
/**
|
| 484 |
+
* \brief The value that indicates the event value is invalid
|
| 485 |
+
*/
|
| 486 |
+
#define CUPTI_EVENT_INVALID ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
|
| 487 |
+
|
| 488 |
+
/**
|
| 489 |
+
* \brief Flags for cuptiEventGroupReadEvent an
|
| 490 |
+
* cuptiEventGroupReadAllEvents.
|
| 491 |
+
*
|
| 492 |
+
* Flags for \ref cuptiEventGroupReadEvent an \ref
|
| 493 |
+
* cuptiEventGroupReadAllEvents.
|
| 494 |
+
*/
|
| 495 |
+
typedef enum {
|
| 496 |
+
/**
|
| 497 |
+
* No flags.
|
| 498 |
+
*/
|
| 499 |
+
CUPTI_EVENT_READ_FLAG_NONE = 0,
|
| 500 |
+
CUPTI_EVENT_READ_FLAG_FORCE_INT = 0x7fffffff,
|
| 501 |
+
} CUpti_ReadEventFlags;
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
/**
|
| 505 |
+
* \brief A set of event groups.
|
| 506 |
+
*
|
| 507 |
+
* A set of event groups. When returned by \ref
|
| 508 |
+
* cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
|
| 509 |
+
* a set indicates that event groups that can be enabled at the same
|
| 510 |
+
* time (i.e. all the events in the set can be collected
|
| 511 |
+
* simultaneously).
|
| 512 |
+
*/
|
| 513 |
+
typedef struct {
|
| 514 |
+
/**
|
| 515 |
+
* The number of event groups in the set.
|
| 516 |
+
*/
|
| 517 |
+
uint32_t numEventGroups;
|
| 518 |
+
/**
|
| 519 |
+
* An array of \p numEventGroups event groups.
|
| 520 |
+
*/
|
| 521 |
+
CUpti_EventGroup *eventGroups;
|
| 522 |
+
} CUpti_EventGroupSet;
|
| 523 |
+
|
| 524 |
+
/**
|
| 525 |
+
* \brief A set of event group sets.
|
| 526 |
+
*
|
| 527 |
+
* A set of event group sets. When returned by \ref
|
| 528 |
+
* cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
|
| 529 |
+
* a CUpti_EventGroupSets indicates the number of passes required to
|
| 530 |
+
* collect all the events, and the event groups that should be
|
| 531 |
+
* collected during each pass.
|
| 532 |
+
*/
|
| 533 |
+
typedef struct {
|
| 534 |
+
/**
|
| 535 |
+
* Number of event group sets.
|
| 536 |
+
*/
|
| 537 |
+
uint32_t numSets;
|
| 538 |
+
/**
|
| 539 |
+
* An array of \p numSets event group sets.
|
| 540 |
+
*/
|
| 541 |
+
CUpti_EventGroupSet *sets;
|
| 542 |
+
} CUpti_EventGroupSets;
|
| 543 |
+
|
| 544 |
+
/**
|
| 545 |
+
* \brief Set the event collection mode.
|
| 546 |
+
*
|
| 547 |
+
* Set the event collection mode for a \p context. The \p mode
|
| 548 |
+
* controls the event collection behavior of all events in event
|
| 549 |
+
* groups created in the \p context. This API is invalid in kernel
|
| 550 |
+
* replay mode.
|
| 551 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 552 |
+
*
|
| 553 |
+
* \param context The context
|
| 554 |
+
* \param mode The event collection mode
|
| 555 |
+
*
|
| 556 |
+
* \retval CUPTI_SUCCESS
|
| 557 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 558 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 559 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if called when replay mode is enabled
|
| 560 |
+
* \retval CUPTI_ERROR_NOT_SUPPORTED if mode is not supported on the device
|
| 561 |
+
*/
|
| 562 |
+
|
| 563 |
+
CUptiResult CUPTIAPI cuptiSetEventCollectionMode(CUcontext context,
|
| 564 |
+
CUpti_EventCollectionMode mode);
|
| 565 |
+
|
| 566 |
+
/**
|
| 567 |
+
* \brief Read a device attribute.
|
| 568 |
+
*
|
| 569 |
+
* Read a device attribute and return it in \p *value.
|
| 570 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 571 |
+
*
|
| 572 |
+
* \param device The CUDA device
|
| 573 |
+
* \param attrib The attribute to read
|
| 574 |
+
* \param valueSize Size of buffer pointed by the value, and
|
| 575 |
+
* returns the number of bytes written to \p value
|
| 576 |
+
* \param value Returns the value of the attribute
|
| 577 |
+
*
|
| 578 |
+
* \retval CUPTI_SUCCESS
|
| 579 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 580 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 581 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 582 |
+
* is NULL, or if \p attrib is not a device attribute
|
| 583 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 584 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 585 |
+
* to hold the attribute value.
|
| 586 |
+
*/
|
| 587 |
+
CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device,
|
| 588 |
+
CUpti_DeviceAttribute attrib,
|
| 589 |
+
size_t *valueSize,
|
| 590 |
+
void *value);
|
| 591 |
+
|
| 592 |
+
/**
|
| 593 |
+
* \brief Read a device timestamp.
|
| 594 |
+
*
|
| 595 |
+
* Returns the device timestamp in \p *timestamp. The timestamp is
|
| 596 |
+
* reported in nanoseconds and indicates the time since the device was
|
| 597 |
+
* last reset.
|
| 598 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 599 |
+
*
|
| 600 |
+
* \param context A context on the device from which to get the timestamp
|
| 601 |
+
* \param timestamp Returns the device timestamp
|
| 602 |
+
*
|
| 603 |
+
* \retval CUPTI_SUCCESS
|
| 604 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 605 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 606 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER is \p timestamp is NULL
|
| 607 |
+
|
| 608 |
+
* **DEPRECATED** This API is deprecated as of CUDA 11.3
|
| 609 |
+
*/
|
| 610 |
+
CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context,
|
| 611 |
+
uint64_t *timestamp);
|
| 612 |
+
|
| 613 |
+
/**
|
| 614 |
+
* \brief Get the number of domains for a device.
|
| 615 |
+
*
|
| 616 |
+
* Returns the number of domains in \p numDomains for a device.
|
| 617 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 618 |
+
*
|
| 619 |
+
* \param device The CUDA device
|
| 620 |
+
* \param numDomains Returns the number of domains
|
| 621 |
+
*
|
| 622 |
+
* \retval CUPTI_SUCCESS
|
| 623 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 624 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 625 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
|
| 626 |
+
*/
|
| 627 |
+
CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device,
|
| 628 |
+
uint32_t *numDomains);
|
| 629 |
+
|
| 630 |
+
/**
|
| 631 |
+
* \brief Get the event domains for a device.
|
| 632 |
+
*
|
| 633 |
+
* Returns the event domains IDs in \p domainArray for a device. The
|
| 634 |
+
* size of the \p domainArray buffer is given by \p
|
| 635 |
+
* *arraySizeBytes. The size of the \p domainArray buffer must be at
|
| 636 |
+
* least \p numdomains * sizeof(CUpti_EventDomainID) or else all
|
| 637 |
+
* domains will not be returned. The value returned in \p
|
| 638 |
+
* *arraySizeBytes contains the number of bytes returned in \p
|
| 639 |
+
* domainArray.
|
| 640 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 641 |
+
*
|
| 642 |
+
* \param device The CUDA device
|
| 643 |
+
* \param arraySizeBytes The size of \p domainArray in bytes, and
|
| 644 |
+
* returns the number of bytes written to \p domainArray
|
| 645 |
+
* \param domainArray Returns the IDs of the event domains for the device
|
| 646 |
+
*
|
| 647 |
+
* \retval CUPTI_SUCCESS
|
| 648 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 649 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 650 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
|
| 651 |
+
* \p domainArray are NULL
|
| 652 |
+
*/
|
| 653 |
+
CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains(CUdevice device,
|
| 654 |
+
size_t *arraySizeBytes,
|
| 655 |
+
CUpti_EventDomainID *domainArray);
|
| 656 |
+
|
| 657 |
+
/**
|
| 658 |
+
* \brief Read an event domain attribute.
|
| 659 |
+
*
|
| 660 |
+
* Returns an event domain attribute in \p *value. The size of the \p
|
| 661 |
+
* value buffer is given by \p *valueSize. The value returned in \p
|
| 662 |
+
* *valueSize contains the number of bytes returned in \p value.
|
| 663 |
+
*
|
| 664 |
+
* If the attribute value is a c-string that is longer than \p
|
| 665 |
+
* *valueSize, then only the first \p *valueSize characters will be
|
| 666 |
+
* returned and there will be no terminating null byte.
|
| 667 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 668 |
+
*
|
| 669 |
+
* \param device The CUDA device
|
| 670 |
+
* \param eventDomain ID of the event domain
|
| 671 |
+
* \param attrib The event domain attribute to read
|
| 672 |
+
* \param valueSize The size of the \p value buffer in bytes, and
|
| 673 |
+
* returns the number of bytes written to \p value
|
| 674 |
+
* \param value Returns the attribute's value
|
| 675 |
+
*
|
| 676 |
+
* \retval CUPTI_SUCCESS
|
| 677 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 678 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 679 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 680 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 681 |
+
* is NULL, or if \p attrib is not an event domain attribute
|
| 682 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 683 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 684 |
+
* to hold the attribute value.
|
| 685 |
+
*/
|
| 686 |
+
CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute(CUdevice device,
|
| 687 |
+
CUpti_EventDomainID eventDomain,
|
| 688 |
+
CUpti_EventDomainAttribute attrib,
|
| 689 |
+
size_t *valueSize,
|
| 690 |
+
void *value);
|
| 691 |
+
|
| 692 |
+
/**
|
| 693 |
+
* \brief Get the number of event domains available on any device.
|
| 694 |
+
*
|
| 695 |
+
* Returns the total number of event domains available on any
|
| 696 |
+
* CUDA-capable device.
|
| 697 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 698 |
+
*
|
| 699 |
+
* \param numDomains Returns the number of domains
|
| 700 |
+
*
|
| 701 |
+
* \retval CUPTI_SUCCESS
|
| 702 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
|
| 703 |
+
*/
|
| 704 |
+
CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains);
|
| 705 |
+
|
| 706 |
+
/**
|
| 707 |
+
* \brief Get the event domains available on any device.
|
| 708 |
+
*
|
| 709 |
+
* Returns all the event domains available on any CUDA-capable device.
|
| 710 |
+
* Event domain IDs are returned in \p domainArray. The size of the \p
|
| 711 |
+
* domainArray buffer is given by \p *arraySizeBytes. The size of the
|
| 712 |
+
* \p domainArray buffer must be at least \p numDomains *
|
| 713 |
+
* sizeof(CUpti_EventDomainID) or all domains will not be
|
| 714 |
+
* returned. The value returned in \p *arraySizeBytes contains the
|
| 715 |
+
* number of bytes returned in \p domainArray.
|
| 716 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 717 |
+
*
|
| 718 |
+
* \param arraySizeBytes The size of \p domainArray in bytes, and
|
| 719 |
+
* returns the number of bytes written to \p domainArray
|
| 720 |
+
* \param domainArray Returns all the event domains
|
| 721 |
+
*
|
| 722 |
+
* \retval CUPTI_SUCCESS
|
| 723 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
|
| 724 |
+
* \p domainArray are NULL
|
| 725 |
+
*/
|
| 726 |
+
CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes,
|
| 727 |
+
CUpti_EventDomainID *domainArray);
|
| 728 |
+
|
| 729 |
+
/**
|
| 730 |
+
* \brief Read an event domain attribute.
|
| 731 |
+
*
|
| 732 |
+
* Returns an event domain attribute in \p *value. The size of the \p
|
| 733 |
+
* value buffer is given by \p *valueSize. The value returned in \p
|
| 734 |
+
* *valueSize contains the number of bytes returned in \p value.
|
| 735 |
+
*
|
| 736 |
+
* If the attribute value is a c-string that is longer than \p
|
| 737 |
+
* *valueSize, then only the first \p *valueSize characters will be
|
| 738 |
+
* returned and there will be no terminating null byte.
|
| 739 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 740 |
+
*
|
| 741 |
+
* \param eventDomain ID of the event domain
|
| 742 |
+
* \param attrib The event domain attribute to read
|
| 743 |
+
* \param valueSize The size of the \p value buffer in bytes, and
|
| 744 |
+
* returns the number of bytes written to \p value
|
| 745 |
+
* \param value Returns the attribute's value
|
| 746 |
+
*
|
| 747 |
+
* \retval CUPTI_SUCCESS
|
| 748 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 749 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 750 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 751 |
+
* is NULL, or if \p attrib is not an event domain attribute
|
| 752 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 753 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 754 |
+
* to hold the attribute value.
|
| 755 |
+
*/
|
| 756 |
+
CUptiResult CUPTIAPI cuptiEventDomainGetAttribute(CUpti_EventDomainID eventDomain,
|
| 757 |
+
CUpti_EventDomainAttribute attrib,
|
| 758 |
+
size_t *valueSize,
|
| 759 |
+
void *value);
|
| 760 |
+
|
| 761 |
+
/**
|
| 762 |
+
* \brief Get number of events in a domain.
|
| 763 |
+
*
|
| 764 |
+
* Returns the number of events in \p numEvents for a domain.
|
| 765 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 766 |
+
*
|
| 767 |
+
* \param eventDomain ID of the event domain
|
| 768 |
+
* \param numEvents Returns the number of events in the domain
|
| 769 |
+
*
|
| 770 |
+
* \retval CUPTI_SUCCESS
|
| 771 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 772 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 773 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
|
| 774 |
+
*/
|
| 775 |
+
CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents(CUpti_EventDomainID eventDomain,
|
| 776 |
+
uint32_t *numEvents);
|
| 777 |
+
|
| 778 |
+
/**
|
| 779 |
+
* \brief Get the events in a domain.
|
| 780 |
+
*
|
| 781 |
+
* Returns the event IDs in \p eventArray for a domain. The size of
|
| 782 |
+
* the \p eventArray buffer is given by \p *arraySizeBytes. The size
|
| 783 |
+
* of the \p eventArray buffer must be at least \p numdomainevents *
|
| 784 |
+
* sizeof(CUpti_EventID) or else all events will not be returned. The
|
| 785 |
+
* value returned in \p *arraySizeBytes contains the number of bytes
|
| 786 |
+
* returned in \p eventArray.
|
| 787 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 788 |
+
*
|
| 789 |
+
* \param eventDomain ID of the event domain
|
| 790 |
+
* \param arraySizeBytes The size of \p eventArray in bytes, and
|
| 791 |
+
* returns the number of bytes written to \p eventArray
|
| 792 |
+
* \param eventArray Returns the IDs of the events in the domain
|
| 793 |
+
*
|
| 794 |
+
* \retval CUPTI_SUCCESS
|
| 795 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 796 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
|
| 797 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or \p
|
| 798 |
+
* eventArray are NULL
|
| 799 |
+
*/
|
| 800 |
+
CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain,
|
| 801 |
+
size_t *arraySizeBytes,
|
| 802 |
+
CUpti_EventID *eventArray);
|
| 803 |
+
|
| 804 |
+
/**
|
| 805 |
+
* \brief Get an event attribute.
|
| 806 |
+
*
|
| 807 |
+
* Returns an event attribute in \p *value. The size of the \p
|
| 808 |
+
* value buffer is given by \p *valueSize. The value returned in \p
|
| 809 |
+
* *valueSize contains the number of bytes returned in \p value.
|
| 810 |
+
*
|
| 811 |
+
* If the attribute value is a c-string that is longer than \p
|
| 812 |
+
* *valueSize, then only the first \p *valueSize characters will be
|
| 813 |
+
* returned and there will be no terminating null byte.
|
| 814 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 815 |
+
*
|
| 816 |
+
* \param event ID of the event
|
| 817 |
+
* \param attrib The event attribute to read
|
| 818 |
+
* \param valueSize The size of the \p value buffer in bytes, and
|
| 819 |
+
* returns the number of bytes written to \p value
|
| 820 |
+
* \param value Returns the attribute's value
|
| 821 |
+
*
|
| 822 |
+
* \retval CUPTI_SUCCESS
|
| 823 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 824 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 825 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 826 |
+
* is NULL, or if \p attrib is not an event attribute
|
| 827 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 828 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 829 |
+
* to hold the attribute value.
|
| 830 |
+
*/
|
| 831 |
+
CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event,
|
| 832 |
+
CUpti_EventAttribute attrib,
|
| 833 |
+
size_t *valueSize,
|
| 834 |
+
void *value);
|
| 835 |
+
|
| 836 |
+
/**
|
| 837 |
+
* \brief Find an event by name.
|
| 838 |
+
*
|
| 839 |
+
* Find an event by name and return the event ID in \p *event.
|
| 840 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 841 |
+
*
|
| 842 |
+
* \param device The CUDA device
|
| 843 |
+
* \param eventName The name of the event to find
|
| 844 |
+
* \param event Returns the ID of the found event or undefined if
|
| 845 |
+
* unable to find the event
|
| 846 |
+
*
|
| 847 |
+
* \retval CUPTI_SUCCESS
|
| 848 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 849 |
+
* \retval CUPTI_ERROR_INVALID_DEVICE
|
| 850 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_NAME if unable to find an event
|
| 851 |
+
* with name \p eventName. In this case \p *event is undefined
|
| 852 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventName or \p event are NULL
|
| 853 |
+
*/
|
| 854 |
+
CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device,
|
| 855 |
+
const char *eventName,
|
| 856 |
+
CUpti_EventID *event);
|
| 857 |
+
|
| 858 |
+
/**
|
| 859 |
+
* \brief Create a new event group for a context.
|
| 860 |
+
*
|
| 861 |
+
* Creates a new event group for \p context and returns the new group
|
| 862 |
+
* in \p *eventGroup.
|
| 863 |
+
* \note \p flags are reserved for future use and should be set to zero.
|
| 864 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 865 |
+
*
|
| 866 |
+
* \param context The context for the event group
|
| 867 |
+
* \param eventGroup Returns the new event group
|
| 868 |
+
* \param flags Reserved - must be zero
|
| 869 |
+
*
|
| 870 |
+
* \retval CUPTI_SUCCESS
|
| 871 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 872 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 873 |
+
* \retval CUPTI_ERROR_OUT_OF_MEMORY
|
| 874 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 875 |
+
*/
|
| 876 |
+
CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context,
|
| 877 |
+
CUpti_EventGroup *eventGroup,
|
| 878 |
+
uint32_t flags);
|
| 879 |
+
|
| 880 |
+
/**
|
| 881 |
+
* \brief Destroy an event group.
|
| 882 |
+
*
|
| 883 |
+
* Destroy an \p eventGroup and free its resources. An event group
|
| 884 |
+
* cannot be destroyed if it is enabled.
|
| 885 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 886 |
+
*
|
| 887 |
+
* \param eventGroup The event group to destroy
|
| 888 |
+
*
|
| 889 |
+
* \retval CUPTI_SUCCESS
|
| 890 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 891 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if the event group is enabled
|
| 892 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
|
| 893 |
+
*/
|
| 894 |
+
CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup);
|
| 895 |
+
|
| 896 |
+
/**
|
| 897 |
+
* \brief Read an event group attribute.
|
| 898 |
+
*
|
| 899 |
+
* Read an event group attribute and return it in \p *value.
|
| 900 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 901 |
+
* must guard against simultaneous destruction or modification of \p
|
| 902 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 903 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 904 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 905 |
+
* context in which \p eventGroup was created (for example, client
|
| 906 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 907 |
+
* cuCtxDestroy, etc.).
|
| 908 |
+
*
|
| 909 |
+
* \param eventGroup The event group
|
| 910 |
+
* \param attrib The attribute to read
|
| 911 |
+
* \param valueSize Size of buffer pointed by the value, and
|
| 912 |
+
* returns the number of bytes written to \p value
|
| 913 |
+
* \param value Returns the value of the attribute
|
| 914 |
+
*
|
| 915 |
+
* \retval CUPTI_SUCCESS
|
| 916 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 917 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 918 |
+
* is NULL, or if \p attrib is not an eventgroup attribute
|
| 919 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
|
| 920 |
+
* attribute values, indicates that the \p value buffer is too small
|
| 921 |
+
* to hold the attribute value.
|
| 922 |
+
*/
|
| 923 |
+
CUptiResult CUPTIAPI cuptiEventGroupGetAttribute(CUpti_EventGroup eventGroup,
|
| 924 |
+
CUpti_EventGroupAttribute attrib,
|
| 925 |
+
size_t *valueSize,
|
| 926 |
+
void *value);
|
| 927 |
+
|
| 928 |
+
/**
|
| 929 |
+
* \brief Write an event group attribute.
|
| 930 |
+
*
|
| 931 |
+
* Write an event group attribute.
|
| 932 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 933 |
+
*
|
| 934 |
+
* \param eventGroup The event group
|
| 935 |
+
* \param attrib The attribute to write
|
| 936 |
+
* \param valueSize The size, in bytes, of the value
|
| 937 |
+
* \param value The attribute value to write
|
| 938 |
+
*
|
| 939 |
+
* \retval CUPTI_SUCCESS
|
| 940 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 941 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
|
| 942 |
+
* is NULL, or if \p attrib is not an event group attribute, or if
|
| 943 |
+
* \p attrib is not a writable attribute
|
| 944 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that
|
| 945 |
+
* the \p value buffer is too small to hold the attribute value.
|
| 946 |
+
*/
|
| 947 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetAttribute(CUpti_EventGroup eventGroup,
|
| 948 |
+
CUpti_EventGroupAttribute attrib,
|
| 949 |
+
size_t valueSize,
|
| 950 |
+
void *value);
|
| 951 |
+
|
| 952 |
+
/**
|
| 953 |
+
* \brief Add an event to an event group.
|
| 954 |
+
*
|
| 955 |
+
* Add an event to an event group. The event add can fail for a number of reasons:
|
| 956 |
+
* \li The event group is enabled
|
| 957 |
+
* \li The event does not belong to the same event domain as the
|
| 958 |
+
* events that are already in the event group
|
| 959 |
+
* \li Device limitations on the events that can belong to the same group
|
| 960 |
+
* \li The event group is full
|
| 961 |
+
*
|
| 962 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 963 |
+
*
|
| 964 |
+
* \param eventGroup The event group
|
| 965 |
+
* \param event The event to add to the group
|
| 966 |
+
*
|
| 967 |
+
* \retval CUPTI_SUCCESS
|
| 968 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 969 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 970 |
+
* \retval CUPTI_ERROR_OUT_OF_MEMORY
|
| 971 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
|
| 972 |
+
* \retval CUPTI_ERROR_NOT_COMPATIBLE if \p event belongs to a
|
| 973 |
+
* different event domain than the events already in \p eventGroup, or
|
| 974 |
+
* if a device limitation prevents \p event from being collected at
|
| 975 |
+
* the same time as the events already in \p eventGroup
|
| 976 |
+
* \retval CUPTI_ERROR_MAX_LIMIT_REACHED if \p eventGroup is full
|
| 977 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 978 |
+
*/
|
| 979 |
+
CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup,
|
| 980 |
+
CUpti_EventID event);
|
| 981 |
+
|
| 982 |
+
/**
|
| 983 |
+
* \brief Remove an event from an event group.
|
| 984 |
+
*
|
| 985 |
+
* Remove \p event from the an event group. The event cannot be
|
| 986 |
+
* removed if the event group is enabled.
|
| 987 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 988 |
+
*
|
| 989 |
+
* \param eventGroup The event group
|
| 990 |
+
* \param event The event to remove from the group
|
| 991 |
+
*
|
| 992 |
+
* \retval CUPTI_SUCCESS
|
| 993 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 994 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 995 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
|
| 996 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 997 |
+
*/
|
| 998 |
+
CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup,
|
| 999 |
+
CUpti_EventID event);
|
| 1000 |
+
|
| 1001 |
+
/**
|
| 1002 |
+
* \brief Remove all events from an event group.
|
| 1003 |
+
*
|
| 1004 |
+
* Remove all events from an event group. Events cannot be removed if
|
| 1005 |
+
* the event group is enabled.
|
| 1006 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1007 |
+
*
|
| 1008 |
+
* \param eventGroup The event group
|
| 1009 |
+
*
|
| 1010 |
+
* \retval CUPTI_SUCCESS
|
| 1011 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1012 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
|
| 1013 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1014 |
+
*/
|
| 1015 |
+
CUptiResult CUPTIAPI cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup);
|
| 1016 |
+
|
| 1017 |
+
/**
|
| 1018 |
+
* \brief Zero all the event counts in an event group.
|
| 1019 |
+
*
|
| 1020 |
+
* Zero all the event counts in an event group.
|
| 1021 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 1022 |
+
* must guard against simultaneous destruction or modification of \p
|
| 1023 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 1024 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 1025 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 1026 |
+
* context in which \p eventGroup was created (for example, client
|
| 1027 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 1028 |
+
* cuCtxDestroy, etc.).
|
| 1029 |
+
*
|
| 1030 |
+
* \param eventGroup The event group
|
| 1031 |
+
*
|
| 1032 |
+
* \retval CUPTI_SUCCESS
|
| 1033 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1034 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1035 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1036 |
+
*/
|
| 1037 |
+
CUptiResult CUPTIAPI cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup);
|
| 1038 |
+
|
| 1039 |
+
/**
|
| 1040 |
+
* \brief Enable an event group.
|
| 1041 |
+
*
|
| 1042 |
+
* Enable an event group. Enabling an event group zeros the value of
|
| 1043 |
+
* all the events in the group and then starts collection of those
|
| 1044 |
+
* events.
|
| 1045 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1046 |
+
*
|
| 1047 |
+
* \param eventGroup The event group
|
| 1048 |
+
*
|
| 1049 |
+
* \retval CUPTI_SUCCESS
|
| 1050 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1051 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1052 |
+
* \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
|
| 1053 |
+
* \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
|
| 1054 |
+
* enabled due to other already enabled event groups
|
| 1055 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1056 |
+
* \retval CUPTI_ERROR_HARDWARE_BUSY if another client is profiling
|
| 1057 |
+
* and hardware is busy
|
| 1058 |
+
*/
|
| 1059 |
+
CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup);
|
| 1060 |
+
|
| 1061 |
+
/**
|
| 1062 |
+
* \brief Disable an event group.
|
| 1063 |
+
*
|
| 1064 |
+
* Disable an event group. Disabling an event group stops collection
|
| 1065 |
+
* of events contained in the group.
|
| 1066 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1067 |
+
*
|
| 1068 |
+
* \param eventGroup The event group
|
| 1069 |
+
*
|
| 1070 |
+
* \retval CUPTI_SUCCESS
|
| 1071 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1072 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1073 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
|
| 1074 |
+
*/
|
| 1075 |
+
CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup);
|
| 1076 |
+
|
| 1077 |
+
/**
|
| 1078 |
+
* \brief Read the value for an event in an event group.
|
| 1079 |
+
*
|
| 1080 |
+
* Read the value for an event in an event group. The event value is
|
| 1081 |
+
* returned in the \p eventValueBuffer buffer. \p
|
| 1082 |
+
* eventValueBufferSizeBytes indicates the size of the \p
|
| 1083 |
+
* eventValueBuffer buffer. The buffer must be at least sizeof(uint64)
|
| 1084 |
+
* if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set
|
| 1085 |
+
* on the group containing the event. The buffer must be at least
|
| 1086 |
+
* (sizeof(uint64) * number of domain instances) if
|
| 1087 |
+
* ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the
|
| 1088 |
+
* group.
|
| 1089 |
+
*
|
| 1090 |
+
* If any instance of an event counter overflows, the value returned
|
| 1091 |
+
* for that event instance will be ::CUPTI_EVENT_OVERFLOW.
|
| 1092 |
+
*
|
| 1093 |
+
* The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
|
| 1094 |
+
*
|
| 1095 |
+
* Reading an event from a disabled event group is not allowed. After
|
| 1096 |
+
* being read, an event's value is reset to zero.
|
| 1097 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 1098 |
+
* must guard against simultaneous destruction or modification of \p
|
| 1099 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 1100 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 1101 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 1102 |
+
* context in which \p eventGroup was created (for example, client
|
| 1103 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 1104 |
+
* cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
|
| 1105 |
+
* called simultaneously with this function, then returned event
|
| 1106 |
+
* values are undefined.
|
| 1107 |
+
*
|
| 1108 |
+
* \param eventGroup The event group
|
| 1109 |
+
* \param flags Flags controlling the reading mode
|
| 1110 |
+
* \param event The event to read
|
| 1111 |
+
* \param eventValueBufferSizeBytes The size of \p eventValueBuffer
|
| 1112 |
+
* in bytes, and returns the number of bytes written to \p
|
| 1113 |
+
* eventValueBuffer
|
| 1114 |
+
* \param eventValueBuffer Returns the event value(s)
|
| 1115 |
+
*
|
| 1116 |
+
* \retval CUPTI_SUCCESS
|
| 1117 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1118 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 1119 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1120 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
|
| 1121 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
|
| 1122 |
+
* eventValueBufferSizeBytes or \p eventValueBuffer is NULL
|
| 1123 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
|
| 1124 |
+
* is not sufficient
|
| 1125 |
+
*/
|
| 1126 |
+
CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup,
|
| 1127 |
+
CUpti_ReadEventFlags flags,
|
| 1128 |
+
CUpti_EventID event,
|
| 1129 |
+
size_t *eventValueBufferSizeBytes,
|
| 1130 |
+
uint64_t *eventValueBuffer);
|
| 1131 |
+
|
| 1132 |
+
/**
|
| 1133 |
+
* \brief Read the values for all the events in an event group.
|
| 1134 |
+
*
|
| 1135 |
+
* Read the values for all the events in an event group. The event
|
| 1136 |
+
* values are returned in the \p eventValueBuffer buffer. \p
|
| 1137 |
+
* eventValueBufferSizeBytes indicates the size of \p
|
| 1138 |
+
* eventValueBuffer. The buffer must be at least (sizeof(uint64) *
|
| 1139 |
+
* number of events in group) if
|
| 1140 |
+
* ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on
|
| 1141 |
+
* the group containing the events. The buffer must be at least
|
| 1142 |
+
* (sizeof(uint64) * number of domain instances * number of events in
|
| 1143 |
+
* group) if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is
|
| 1144 |
+
* set on the group.
|
| 1145 |
+
*
|
| 1146 |
+
* The data format returned in \p eventValueBuffer is:
|
| 1147 |
+
* - domain instance 0: event0 event1 ... eventN
|
| 1148 |
+
* - domain instance 1: event0 event1 ... eventN
|
| 1149 |
+
* - ...
|
| 1150 |
+
* - domain instance M: event0 event1 ... eventN
|
| 1151 |
+
*
|
| 1152 |
+
* The event order in \p eventValueBuffer is returned in \p
|
| 1153 |
+
* eventIdArray. The size of \p eventIdArray is specified in \p
|
| 1154 |
+
* eventIdArraySizeBytes. The size should be at least
|
| 1155 |
+
* (sizeof(CUpti_EventID) * number of events in group).
|
| 1156 |
+
*
|
| 1157 |
+
* If any instance of any event counter overflows, the value returned
|
| 1158 |
+
* for that event instance will be ::CUPTI_EVENT_OVERFLOW.
|
| 1159 |
+
*
|
| 1160 |
+
* The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
|
| 1161 |
+
*
|
| 1162 |
+
* Reading events from a disabled event group is not allowed. After
|
| 1163 |
+
* being read, an event's value is reset to zero.
|
| 1164 |
+
* \note \b Thread-safety: this function is thread safe but client
|
| 1165 |
+
* must guard against simultaneous destruction or modification of \p
|
| 1166 |
+
* eventGroup (for example, client must guard against simultaneous
|
| 1167 |
+
* calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
|
| 1168 |
+
* etc.), and must guard against simultaneous destruction of the
|
| 1169 |
+
* context in which \p eventGroup was created (for example, client
|
| 1170 |
+
* must guard against simultaneous calls to cudaDeviceReset,
|
| 1171 |
+
* cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
|
| 1172 |
+
* called simultaneously with this function, then returned event
|
| 1173 |
+
* values are undefined.
|
| 1174 |
+
*
|
| 1175 |
+
* \param eventGroup The event group
|
| 1176 |
+
* \param flags Flags controlling the reading mode
|
| 1177 |
+
* \param eventValueBufferSizeBytes The size of \p eventValueBuffer in
|
| 1178 |
+
* bytes, and returns the number of bytes written to \p
|
| 1179 |
+
* eventValueBuffer
|
| 1180 |
+
* \param eventValueBuffer Returns the event values
|
| 1181 |
+
* \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
|
| 1182 |
+
* and returns the number of bytes written to \p eventIdArray
|
| 1183 |
+
* \param eventIdArray Returns the IDs of the events in the same order
|
| 1184 |
+
* as the values return in eventValueBuffer.
|
| 1185 |
+
* \param numEventIdsRead Returns the number of event IDs returned
|
| 1186 |
+
* in \p eventIdArray
|
| 1187 |
+
*
|
| 1188 |
+
* \retval CUPTI_SUCCESS
|
| 1189 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1190 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1191 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
|
| 1192 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
|
| 1193 |
+
* eventValueBufferSizeBytes, \p eventValueBuffer, \p
|
| 1194 |
+
* eventIdArraySizeBytes, \p eventIdArray or \p numEventIdsRead is
|
| 1195 |
+
* NULL
|
| 1196 |
+
* \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
|
| 1197 |
+
* or \p eventIdArray is not sufficient
|
| 1198 |
+
*/
|
| 1199 |
+
CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents(CUpti_EventGroup eventGroup,
|
| 1200 |
+
CUpti_ReadEventFlags flags,
|
| 1201 |
+
size_t *eventValueBufferSizeBytes,
|
| 1202 |
+
uint64_t *eventValueBuffer,
|
| 1203 |
+
size_t *eventIdArraySizeBytes,
|
| 1204 |
+
CUpti_EventID *eventIdArray,
|
| 1205 |
+
size_t *numEventIdsRead);
|
| 1206 |
+
|
| 1207 |
+
/**
|
| 1208 |
+
* \brief For a set of events, get the grouping that indicates the
|
| 1209 |
+
* number of passes and the event groups necessary to collect the
|
| 1210 |
+
* events.
|
| 1211 |
+
*
|
| 1212 |
+
* The number of events that can be collected simultaneously varies by
|
| 1213 |
+
* device and by the type of the events. When events can be collected
|
| 1214 |
+
* simultaneously, they may need to be grouped into multiple event
|
| 1215 |
+
* groups because they are from different event domains. This function
|
| 1216 |
+
* takes a set of events and determines how many passes are required
|
| 1217 |
+
* to collect all those events, and which events can be collected
|
| 1218 |
+
* simultaneously in each pass.
|
| 1219 |
+
*
|
| 1220 |
+
* The CUpti_EventGroupSets returned in \p eventGroupPasses indicates
|
| 1221 |
+
* how many passes are required to collect the events with the \p
|
| 1222 |
+
* numSets field. Within each event group set, the \p sets array
|
| 1223 |
+
* indicates the event groups that should be collected on each pass.
|
| 1224 |
+
* \note \b Thread-safety: this function is thread safe, but client
|
| 1225 |
+
* must guard against another thread simultaneously destroying \p
|
| 1226 |
+
* context.
|
| 1227 |
+
*
|
| 1228 |
+
* \param context The context for event collection
|
| 1229 |
+
* \param eventIdArraySizeBytes Size of \p eventIdArray in bytes
|
| 1230 |
+
* \param eventIdArray Array of event IDs that need to be grouped
|
| 1231 |
+
* \param eventGroupPasses Returns a CUpti_EventGroupSets object that
|
| 1232 |
+
* indicates the number of passes required to collect the events and
|
| 1233 |
+
* the events to collect on each pass
|
| 1234 |
+
*
|
| 1235 |
+
* \retval CUPTI_SUCCESS
|
| 1236 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1237 |
+
* \retval CUPTI_ERROR_INVALID_CONTEXT
|
| 1238 |
+
* \retval CUPTI_ERROR_INVALID_EVENT_ID
|
| 1239 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArray or
|
| 1240 |
+
* \p eventGroupPasses is NULL
|
| 1241 |
+
*/
|
| 1242 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetsCreate(CUcontext context,
|
| 1243 |
+
size_t eventIdArraySizeBytes,
|
| 1244 |
+
CUpti_EventID *eventIdArray,
|
| 1245 |
+
CUpti_EventGroupSets **eventGroupPasses);
|
| 1246 |
+
|
| 1247 |
+
/**
|
| 1248 |
+
* \brief Destroy a event group sets object.
|
| 1249 |
+
*
|
| 1250 |
+
* Destroy a CUpti_EventGroupSets object.
|
| 1251 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1252 |
+
*
|
| 1253 |
+
* \param eventGroupSets The object to destroy
|
| 1254 |
+
*
|
| 1255 |
+
* \retval CUPTI_SUCCESS
|
| 1256 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1257 |
+
* \retval CUPTI_ERROR_INVALID_OPERATION if any of the event groups
|
| 1258 |
+
* contained in the sets is enabled
|
| 1259 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSets is NULL
|
| 1260 |
+
*/
|
| 1261 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets);
|
| 1262 |
+
|
| 1263 |
+
|
| 1264 |
+
/**
|
| 1265 |
+
* \brief Enable an event group set.
|
| 1266 |
+
*
|
| 1267 |
+
* Enable a set of event groups. Enabling a set of event groups zeros the value of
|
| 1268 |
+
* all the events in all the groups and then starts collection of those events.
|
| 1269 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1270 |
+
*
|
| 1271 |
+
* \param eventGroupSet The pointer to the event group set
|
| 1272 |
+
*
|
| 1273 |
+
* \retval CUPTI_SUCCESS
|
| 1274 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1275 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1276 |
+
* \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
|
| 1277 |
+
* \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
|
| 1278 |
+
* enabled due to other already enabled event groups
|
| 1279 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
|
| 1280 |
+
* \retval CUPTI_ERROR_HARDWARE_BUSY if other client is profiling and hardware is
|
| 1281 |
+
* busy
|
| 1282 |
+
*/
|
| 1283 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet);
|
| 1284 |
+
|
| 1285 |
+
/**
|
| 1286 |
+
* \brief Disable an event group set.
|
| 1287 |
+
*
|
| 1288 |
+
* Disable a set of event groups. Disabling a set of event groups
|
| 1289 |
+
* stops collection of events contained in the groups.
|
| 1290 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1291 |
+
* \note \b If this call fails, some of the event groups in the set may be disabled
|
| 1292 |
+
* and other event groups may remain enabled.
|
| 1293 |
+
*
|
| 1294 |
+
* \param eventGroupSet The pointer to the event group set
|
| 1295 |
+
* \retval CUPTI_SUCCESS
|
| 1296 |
+
* \retval CUPTI_ERROR_NOT_INITIALIZED
|
| 1297 |
+
* \retval CUPTI_ERROR_HARDWARE
|
| 1298 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
|
| 1299 |
+
*/
|
| 1300 |
+
CUptiResult CUPTIAPI cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet);
|
| 1301 |
+
|
| 1302 |
+
/**
|
| 1303 |
+
* \brief Enable kernel replay mode.
|
| 1304 |
+
*
|
| 1305 |
+
* Set profiling mode for the context to replay mode. In this mode,
|
| 1306 |
+
* any number of events can be collected in one run of the kernel. The
|
| 1307 |
+
* event collection mode will automatically switch to
|
| 1308 |
+
* CUPTI_EVENT_COLLECTION_MODE_KERNEL. In this mode, \ref
|
| 1309 |
+
* cuptiSetEventCollectionMode will return
|
| 1310 |
+
* CUPTI_ERROR_INVALID_OPERATION.
|
| 1311 |
+
* \note \b Kernels might take longer to run if many events are enabled.
|
| 1312 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1313 |
+
*
|
| 1314 |
+
* \param context The context
|
| 1315 |
+
* \retval CUPTI_SUCCESS
|
| 1316 |
+
*/
|
| 1317 |
+
CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context);
|
| 1318 |
+
|
| 1319 |
+
/**
|
| 1320 |
+
* \brief Disable kernel replay mode.
|
| 1321 |
+
*
|
| 1322 |
+
* Set profiling mode for the context to non-replay (default)
|
| 1323 |
+
* mode. Event collection mode will be set to
|
| 1324 |
+
* CUPTI_EVENT_COLLECTION_MODE_KERNEL. All previously enabled
|
| 1325 |
+
* event groups and event group sets will be disabled.
|
| 1326 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 1327 |
+
*
|
| 1328 |
+
* \param context The context
|
| 1329 |
+
* \retval CUPTI_SUCCESS
|
| 1330 |
+
*/
|
| 1331 |
+
CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context);
|
| 1332 |
+
|
| 1333 |
+
/**
|
| 1334 |
+
* \brief Function type for getting updates on kernel replay.
|
| 1335 |
+
*
|
| 1336 |
+
* \param kernelName The mangled kernel name
|
| 1337 |
+
* \param numReplaysDone Number of replays done so far
|
| 1338 |
+
* \param customData Pointer of any custom data passed in when subscribing
|
| 1339 |
+
*/
|
| 1340 |
+
typedef void (CUPTIAPI *CUpti_KernelReplayUpdateFunc)(
|
| 1341 |
+
const char *kernelName,
|
| 1342 |
+
int numReplaysDone,
|
| 1343 |
+
void *customData);
|
| 1344 |
+
|
| 1345 |
+
/**
|
| 1346 |
+
* \brief Subscribe to kernel replay updates.
|
| 1347 |
+
*
|
| 1348 |
+
* When subscribed, the function pointer passed in will be called each time a
|
| 1349 |
+
* kernel run is finished during kernel replay. Previously subscribed function
|
| 1350 |
+
* pointer will be replaced. Pass in NULL as the function pointer unsubscribes
|
| 1351 |
+
* the update.
|
| 1352 |
+
*
|
| 1353 |
+
* \param updateFunc The update function pointer
|
| 1354 |
+
* \param customData Pointer to any custom data
|
| 1355 |
+
* \retval CUPTI_SUCCESS
|
| 1356 |
+
*/
|
| 1357 |
+
CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate(CUpti_KernelReplayUpdateFunc updateFunc, void *customData);
|
| 1358 |
+
|
| 1359 |
+
/** @} */ /* END CUPTI_EVENT_API */
|
| 1360 |
+
|
| 1361 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 1362 |
+
#pragma GCC visibility pop
|
| 1363 |
+
#endif
|
| 1364 |
+
|
| 1365 |
+
#if defined(__cplusplus)
|
| 1366 |
+
}
|
| 1367 |
+
#endif
|
| 1368 |
+
|
| 1369 |
+
#endif /*_CUPTI_EVENTS_H_*/
|
| 1370 |
+
|
| 1371 |
+
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#if !defined(_CUPTI_PCSAMPLING_UTIL_H_)
|
| 2 |
+
#define _CUPTI_PCSAMPLING_UTIL_H_
|
| 3 |
+
|
| 4 |
+
#include <cupti_pcsampling.h>
|
| 5 |
+
#include <fstream>
|
| 6 |
+
|
| 7 |
+
#ifndef CUPTIUTILAPI
|
| 8 |
+
#ifdef _WIN32
|
| 9 |
+
#define CUPTIUTILAPI __stdcall
|
| 10 |
+
#else
|
| 11 |
+
#define CUPTIUTILAPI
|
| 12 |
+
#endif
|
| 13 |
+
#endif
|
| 14 |
+
|
| 15 |
+
#define ACTIVITY_RECORD_ALIGNMENT 8
|
| 16 |
+
#if defined(_WIN32) // Windows 32- and 64-bit
|
| 17 |
+
#define START_PACKED_ALIGNMENT __pragma(pack(push,1)) // exact fit - no padding
|
| 18 |
+
#define PACKED_ALIGNMENT __declspec(align(ACTIVITY_RECORD_ALIGNMENT))
|
| 19 |
+
#define END_PACKED_ALIGNMENT __pragma(pack(pop))
|
| 20 |
+
#elif defined(__GNUC__) // GCC
|
| 21 |
+
#define START_PACKED_ALIGNMENT
|
| 22 |
+
#define PACKED_ALIGNMENT __attribute__ ((__packed__)) __attribute__ ((aligned (ACTIVITY_RECORD_ALIGNMENT)))
|
| 23 |
+
#define END_PACKED_ALIGNMENT
|
| 24 |
+
#else // all other compilers
|
| 25 |
+
#define START_PACKED_ALIGNMENT
|
| 26 |
+
#define PACKED_ALIGNMENT
|
| 27 |
+
#define END_PACKED_ALIGNMENT
|
| 28 |
+
#endif
|
| 29 |
+
|
| 30 |
+
#ifndef CUPTI_UTIL_STRUCT_SIZE
|
| 31 |
+
#define CUPTI_UTIL_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
|
| 32 |
+
#endif
|
| 33 |
+
|
| 34 |
+
#ifndef CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS
|
| 35 |
+
#define CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(type, member, structSize) \
|
| 36 |
+
(offsetof(type, member) < structSize)
|
| 37 |
+
#endif
|
| 38 |
+
|
| 39 |
+
#if defined(__cplusplus)
|
| 40 |
+
extern "C" {
|
| 41 |
+
#endif
|
| 42 |
+
|
| 43 |
+
#if defined(__GNUC__)
|
| 44 |
+
#pragma GCC visibility push(default)
|
| 45 |
+
#endif
|
| 46 |
+
|
| 47 |
+
namespace CUPTI { namespace PcSamplingUtil {
|
| 48 |
+
|
| 49 |
+
/**
|
| 50 |
+
* \defgroup CUPTI_PCSAMPLING_UTILITY CUPTI PC Sampling Utility API
|
| 51 |
+
* Functions, types, and enums that implement the CUPTI PC Sampling Utility API.
|
| 52 |
+
* @{
|
| 53 |
+
*/
|
| 54 |
+
|
| 55 |
+
/**
|
| 56 |
+
* \brief Header info will be stored in file.
|
| 57 |
+
*/
|
| 58 |
+
typedef struct PACKED_ALIGNMENT {
|
| 59 |
+
/**
|
| 60 |
+
* Version of file format.
|
| 61 |
+
*/
|
| 62 |
+
uint32_t version;
|
| 63 |
+
/**
|
| 64 |
+
* Total number of buffers present in the file.
|
| 65 |
+
*/
|
| 66 |
+
uint32_t totalBuffers;
|
| 67 |
+
} Header;
|
| 68 |
+
|
| 69 |
+
/**
|
| 70 |
+
* \brief BufferInfo will be stored in the file for every buffer
|
| 71 |
+
* i.e for every call of UtilDumpPcSamplingBufferInFile() API.
|
| 72 |
+
*/
|
| 73 |
+
typedef struct PACKED_ALIGNMENT {
|
| 74 |
+
/**
|
| 75 |
+
* Total number of PC records.
|
| 76 |
+
*/
|
| 77 |
+
uint64_t recordCount;
|
| 78 |
+
/**
|
| 79 |
+
* Count of all stall reasons supported on the GPU
|
| 80 |
+
*/
|
| 81 |
+
size_t numStallReasons;
|
| 82 |
+
/**
|
| 83 |
+
* Total number of stall reasons in single record.
|
| 84 |
+
*/
|
| 85 |
+
uint64_t numSelectedStallReasons;
|
| 86 |
+
/**
|
| 87 |
+
* Buffer size in Bytes.
|
| 88 |
+
*/
|
| 89 |
+
uint64_t bufferByteSize;
|
| 90 |
+
} BufferInfo;
|
| 91 |
+
|
| 92 |
+
/**
|
| 93 |
+
* \brief All available stall reasons name and respective indexes
|
| 94 |
+
* will be stored in it.
|
| 95 |
+
*/
|
| 96 |
+
typedef struct PACKED_ALIGNMENT {
|
| 97 |
+
/**
|
| 98 |
+
* Number of all available stall reasons
|
| 99 |
+
*/
|
| 100 |
+
size_t numStallReasons;
|
| 101 |
+
/**
|
| 102 |
+
* Stall reasons names of all available stall reasons
|
| 103 |
+
*/
|
| 104 |
+
char **stallReasons;
|
| 105 |
+
/**
|
| 106 |
+
* Stall reason index of all available stall reasons
|
| 107 |
+
*/
|
| 108 |
+
uint32_t *stallReasonIndex;
|
| 109 |
+
} PcSamplingStallReasons;
|
| 110 |
+
|
| 111 |
+
typedef enum {
|
| 112 |
+
/**
|
| 113 |
+
* Invalid buffer type.
|
| 114 |
+
*/
|
| 115 |
+
PC_SAMPLING_BUFFER_INVALID = 0,
|
| 116 |
+
/**
|
| 117 |
+
* Refers to CUpti_PCSamplingData buffer.
|
| 118 |
+
*/
|
| 119 |
+
PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA = 1
|
| 120 |
+
} PcSamplingBufferType;
|
| 121 |
+
|
| 122 |
+
/**
|
| 123 |
+
* \brief CUPTI PC sampling utility API result codes.
|
| 124 |
+
*
|
| 125 |
+
* Error and result codes returned by CUPTI PC sampling utility API.
|
| 126 |
+
*/
|
| 127 |
+
typedef enum {
|
| 128 |
+
/**
|
| 129 |
+
* No error
|
| 130 |
+
*/
|
| 131 |
+
CUPTI_UTIL_SUCCESS = 0,
|
| 132 |
+
/**
|
| 133 |
+
* One or more of the parameters are invalid.
|
| 134 |
+
*/
|
| 135 |
+
CUPTI_UTIL_ERROR_INVALID_PARAMETER = 1,
|
| 136 |
+
/**
|
| 137 |
+
* Unable to create a new file
|
| 138 |
+
*/
|
| 139 |
+
CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE = 2,
|
| 140 |
+
/**
|
| 141 |
+
* Unable to open a file
|
| 142 |
+
*/
|
| 143 |
+
CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE = 3,
|
| 144 |
+
/**
|
| 145 |
+
* Read or write operation failed
|
| 146 |
+
*/
|
| 147 |
+
CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED = 4,
|
| 148 |
+
/**
|
| 149 |
+
* Provided file handle is corrupted.
|
| 150 |
+
*/
|
| 151 |
+
CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED = 5,
|
| 152 |
+
/**
|
| 153 |
+
* seek operation failed.
|
| 154 |
+
*/
|
| 155 |
+
CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED = 6,
|
| 156 |
+
/**
|
| 157 |
+
* Unable to allocate enough memory to perform the requested
|
| 158 |
+
* operation.
|
| 159 |
+
*/
|
| 160 |
+
CUPTI_UTIL_ERROR_OUT_OF_MEMORY = 7,
|
| 161 |
+
/**
|
| 162 |
+
* An unknown internal error has occurred.
|
| 163 |
+
*/
|
| 164 |
+
CUPTI_UTIL_ERROR_UNKNOWN = 999,
|
| 165 |
+
CUPTI_UTIL_ERROR_FORCE_INT = 0x7fffffff
|
| 166 |
+
} CUptiUtilResult;
|
| 167 |
+
|
| 168 |
+
/**
|
| 169 |
+
* \brief Params for \ref CuptiUtilPutPcSampData
|
| 170 |
+
*/
|
| 171 |
+
typedef struct {
|
| 172 |
+
/**
|
| 173 |
+
* Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
|
| 174 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 175 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 176 |
+
*/
|
| 177 |
+
size_t size;
|
| 178 |
+
/**
|
| 179 |
+
* Type of buffer to store in file
|
| 180 |
+
*/
|
| 181 |
+
PcSamplingBufferType bufferType;
|
| 182 |
+
/**
|
| 183 |
+
* PC sampling buffer.
|
| 184 |
+
*/
|
| 185 |
+
void *pSamplingData;
|
| 186 |
+
/**
|
| 187 |
+
* Number of configured attributes
|
| 188 |
+
*/
|
| 189 |
+
size_t numAttributes;
|
| 190 |
+
/**
|
| 191 |
+
* Refer \ref CUpti_PCSamplingConfigurationInfo
|
| 192 |
+
* It is expected to provide configuration details of at least
|
| 193 |
+
* CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute.
|
| 194 |
+
*/
|
| 195 |
+
CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
|
| 196 |
+
/**
|
| 197 |
+
* Refer \ref PcSamplingStallReasons.
|
| 198 |
+
*/
|
| 199 |
+
PcSamplingStallReasons *pPcSamplingStallReasons;
|
| 200 |
+
/**
|
| 201 |
+
* File name to store buffer into it.
|
| 202 |
+
*/
|
| 203 |
+
const char* fileName;
|
| 204 |
+
} CUptiUtil_PutPcSampDataParams;
|
| 205 |
+
#define CUptiUtil_PutPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_PutPcSampDataParams, fileName)
|
| 206 |
+
|
| 207 |
+
/**
|
| 208 |
+
* \brief Dump PC sampling data into the file.
|
| 209 |
+
*
|
| 210 |
+
* This API can be called multiple times.
|
| 211 |
+
* It will append buffer in the file.
|
| 212 |
+
* For every buffer it will store BufferInfo
|
| 213 |
+
* so that before retrieving data it will help to allocate buffer
|
| 214 |
+
* to store retrieved data.
|
| 215 |
+
* This API creates file if file does not present.
|
| 216 |
+
* If stallReasonIndex or stallReasons pointer of \ref CUptiUtil_PutPcSampDataParams is NULL
|
| 217 |
+
* then stall reasons data will not be stored in file.
|
| 218 |
+
* It is expected to store all available stall reason data at least once to refer it during
|
| 219 |
+
* offline correlation.
|
| 220 |
+
*
|
| 221 |
+
* \retval CUPTI_UTIL_SUCCESS
|
| 222 |
+
* \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
|
| 223 |
+
* or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details not provided
|
| 224 |
+
* or filename is empty.
|
| 225 |
+
* \retval CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE
|
| 226 |
+
* \retval CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE
|
| 227 |
+
* \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
|
| 228 |
+
*/
|
| 229 |
+
CUptiUtilResult CUPTIUTILAPI CuptiUtilPutPcSampData(CUptiUtil_PutPcSampDataParams *pParams);
|
| 230 |
+
|
| 231 |
+
/**
|
| 232 |
+
* \brief Params for \ref CuptiUtilGetHeaderData
|
| 233 |
+
*/
|
| 234 |
+
typedef struct {
|
| 235 |
+
/**
|
| 236 |
+
* Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
|
| 237 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 238 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 239 |
+
*/
|
| 240 |
+
size_t size;
|
| 241 |
+
/**
|
| 242 |
+
* File handle.
|
| 243 |
+
*/
|
| 244 |
+
std::ifstream *fileHandler;
|
| 245 |
+
/**
|
| 246 |
+
* Header Info.
|
| 247 |
+
*/
|
| 248 |
+
Header headerInfo;
|
| 249 |
+
|
| 250 |
+
} CUptiUtil_GetHeaderDataParams;
|
| 251 |
+
#define CUptiUtil_GetHeaderDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetHeaderDataParams, headerInfo)
|
| 252 |
+
|
| 253 |
+
/**
|
| 254 |
+
* \brief Get header data of file.
|
| 255 |
+
*
|
| 256 |
+
* This API must be called once initially while retrieving data from file.
|
| 257 |
+
* \ref Header structure, it gives info about total number
|
| 258 |
+
* of buffers present in the file.
|
| 259 |
+
*
|
| 260 |
+
* \retval CUPTI_UTIL_SUCCESS
|
| 261 |
+
* \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
|
| 262 |
+
* \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file
|
| 263 |
+
* \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
|
| 264 |
+
*/
|
| 265 |
+
CUptiUtilResult CUPTIUTILAPI CuptiUtilGetHeaderData(CUptiUtil_GetHeaderDataParams *pParams);
|
| 266 |
+
|
| 267 |
+
/**
|
| 268 |
+
* \brief Params for \ref CuptiUtilGetBufferInfo
|
| 269 |
+
*/
|
| 270 |
+
typedef struct {
|
| 271 |
+
/**
|
| 272 |
+
* Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
|
| 273 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 274 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 275 |
+
*/
|
| 276 |
+
size_t size;
|
| 277 |
+
/**
|
| 278 |
+
* File handle.
|
| 279 |
+
*/
|
| 280 |
+
std::ifstream *fileHandler;
|
| 281 |
+
/**
|
| 282 |
+
* Buffer Info.
|
| 283 |
+
*/
|
| 284 |
+
BufferInfo bufferInfoData;
|
| 285 |
+
} CUptiUtil_GetBufferInfoParams;
|
| 286 |
+
#define CUptiUtil_GetBufferInfoParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetBufferInfoParams, bufferInfoData)
|
| 287 |
+
|
| 288 |
+
/**
|
| 289 |
+
* \brief Get buffer info data of file.
|
| 290 |
+
*
|
| 291 |
+
* This API must be called every time before calling CuptiUtilGetPcSampData API.
|
| 292 |
+
* \ref BufferInfo structure, it gives info about recordCount and stallReasonCount
|
| 293 |
+
* of every record in the buffer. This will help to allocate exact buffer to retrieve data into it.
|
| 294 |
+
*
|
| 295 |
+
* \retval CUPTI_UTIL_SUCCESS
|
| 296 |
+
* \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
|
| 297 |
+
* \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
|
| 298 |
+
* \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
|
| 299 |
+
*/
|
| 300 |
+
CUptiUtilResult CUPTIUTILAPI CuptiUtilGetBufferInfo(CUptiUtil_GetBufferInfoParams *pParams);
|
| 301 |
+
|
| 302 |
+
/**
|
| 303 |
+
* \brief Params for \ref CuptiUtilGetPcSampData
|
| 304 |
+
*/
|
| 305 |
+
typedef struct {
|
| 306 |
+
/**
|
| 307 |
+
* Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
|
| 308 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 309 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 310 |
+
*/
|
| 311 |
+
size_t size;
|
| 312 |
+
/**
|
| 313 |
+
* File handle.
|
| 314 |
+
*/
|
| 315 |
+
std::ifstream *fileHandler;
|
| 316 |
+
/**
|
| 317 |
+
* Type of buffer to store in file
|
| 318 |
+
*/
|
| 319 |
+
PcSamplingBufferType bufferType;
|
| 320 |
+
/**
|
| 321 |
+
* Pointer to collected buffer info using \ref CuptiUtilGetBufferInfo
|
| 322 |
+
*/
|
| 323 |
+
BufferInfo *pBufferInfoData;
|
| 324 |
+
/**
|
| 325 |
+
* Pointer to allocated memory to store retrieved data from file.
|
| 326 |
+
*/
|
| 327 |
+
void *pSamplingData;
|
| 328 |
+
/**
|
| 329 |
+
* Number of configuration attributes
|
| 330 |
+
*/
|
| 331 |
+
size_t numAttributes;
|
| 332 |
+
/**
|
| 333 |
+
* Refer \ref CUpti_PCSamplingConfigurationInfo
|
| 334 |
+
*/
|
| 335 |
+
CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
|
| 336 |
+
/**
|
| 337 |
+
* Refer \ref PcSamplingStallReasons.
|
| 338 |
+
* For stallReasons field of \ref PcSamplingStallReasons it is expected to
|
| 339 |
+
* allocate memory for each string element of array.
|
| 340 |
+
*/
|
| 341 |
+
PcSamplingStallReasons *pPcSamplingStallReasons;
|
| 342 |
+
} CUptiUtil_GetPcSampDataParams;
|
| 343 |
+
#define CUptiUtil_GetPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetPcSampDataParams, pPcSamplingStallReasons)
|
| 344 |
+
|
| 345 |
+
/**
|
| 346 |
+
* \brief Retrieve PC sampling data from file into allocated buffer.
|
| 347 |
+
*
|
| 348 |
+
* This API must be called after CuptiUtilGetBufferInfo API.
|
| 349 |
+
* It will retrieve data from file into allocated buffer.
|
| 350 |
+
*
|
| 351 |
+
* \retval CUPTI_UTIL_SUCCESS
|
| 352 |
+
* \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
|
| 353 |
+
* or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then
|
| 354 |
+
* error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL.
|
| 355 |
+
* or filename is empty.
|
| 356 |
+
* \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
|
| 357 |
+
* \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
|
| 358 |
+
*/
|
| 359 |
+
CUptiUtilResult CUPTIUTILAPI CuptiUtilGetPcSampData(CUptiUtil_GetPcSampDataParams *pParams);
|
| 360 |
+
|
| 361 |
+
/**
|
| 362 |
+
* \brief Params for \ref CuptiUtilMergePcSampData
|
| 363 |
+
*/
|
| 364 |
+
typedef struct
|
| 365 |
+
{
|
| 366 |
+
/**
|
| 367 |
+
* Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
|
| 368 |
+
* CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
|
| 369 |
+
* available in the structure. Used to preserve backward compatibility.
|
| 370 |
+
*/
|
| 371 |
+
size_t size;
|
| 372 |
+
/**
|
| 373 |
+
* Number of buffers to merge.
|
| 374 |
+
*/
|
| 375 |
+
size_t numberOfBuffers;
|
| 376 |
+
/**
|
| 377 |
+
* Pointer to array of buffers to merge
|
| 378 |
+
*/
|
| 379 |
+
CUpti_PCSamplingData *PcSampDataBuffer;
|
| 380 |
+
/**
|
| 381 |
+
* Pointer to array of merged buffers as per the range id.
|
| 382 |
+
*/
|
| 383 |
+
CUpti_PCSamplingData **MergedPcSampDataBuffers;
|
| 384 |
+
/**
|
| 385 |
+
* Number of merged buffers.
|
| 386 |
+
*/
|
| 387 |
+
size_t *numMergedBuffer;
|
| 388 |
+
} CUptiUtil_MergePcSampDataParams;
|
| 389 |
+
#define CUptiUtil_MergePcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_MergePcSampDataParams, numMergedBuffer)
|
| 390 |
+
|
| 391 |
+
/**
|
| 392 |
+
* \brief Merge PC sampling data range id wise.
|
| 393 |
+
*
|
| 394 |
+
* This API merge PC sampling data range id wise.
|
| 395 |
+
* It allocates memory for merged data and fill data in it
|
| 396 |
+
* and provide buffer pointer in MergedPcSampDataBuffers field.
|
| 397 |
+
* It is expected from user to free merge data buffers after use.
|
| 398 |
+
*
|
| 399 |
+
* \retval CUPTI_UTIL_SUCCESS
|
| 400 |
+
* \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if param struct size is invalid
|
| 401 |
+
* or count of buffers to merge is invalid i.e less than 1
|
| 402 |
+
* or either of PcSampDataBuffer, MergedPcSampDataBuffers, numMergedBuffer is NULL
|
| 403 |
+
* \retval CUPTI_UTIL_ERROR_OUT_OF_MEMORY Unable to allocate memory for merged buffer.
|
| 404 |
+
*/
|
| 405 |
+
CUptiUtilResult CUPTIUTILAPI CuptiUtilMergePcSampData(CUptiUtil_MergePcSampDataParams *pParams);
|
| 406 |
+
|
| 407 |
+
/** @} */ /* END CUPTI_PCSAMPLING_UTILITY */
|
| 408 |
+
|
| 409 |
+
} }
|
| 410 |
+
|
| 411 |
+
#if defined(__GNUC__)
|
| 412 |
+
#pragma GCC visibility pop
|
| 413 |
+
#endif
|
| 414 |
+
|
| 415 |
+
#if defined(__cplusplus)
|
| 416 |
+
}
|
| 417 |
+
#endif
|
| 418 |
+
|
| 419 |
+
#endif
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_result.h
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2010-2021 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(_CUPTI_RESULT_H_)
|
| 51 |
+
#define _CUPTI_RESULT_H_
|
| 52 |
+
|
| 53 |
+
#ifndef CUPTIAPI
|
| 54 |
+
#ifdef _WIN32
|
| 55 |
+
#define CUPTIAPI __stdcall
|
| 56 |
+
#else
|
| 57 |
+
#define CUPTIAPI
|
| 58 |
+
#endif
|
| 59 |
+
#endif
|
| 60 |
+
|
| 61 |
+
#if defined(__cplusplus)
|
| 62 |
+
extern "C" {
|
| 63 |
+
#endif
|
| 64 |
+
|
| 65 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 66 |
+
#pragma GCC visibility push(default)
|
| 67 |
+
#endif
|
| 68 |
+
|
| 69 |
+
/**
|
| 70 |
+
* \defgroup CUPTI_RESULT_API CUPTI Result Codes
|
| 71 |
+
* Error and result codes returned by CUPTI functions.
|
| 72 |
+
* @{
|
| 73 |
+
*/
|
| 74 |
+
|
| 75 |
+
/**
|
| 76 |
+
* \brief CUPTI result codes.
|
| 77 |
+
*
|
| 78 |
+
* Error and result codes returned by CUPTI functions.
|
| 79 |
+
*/
|
| 80 |
+
typedef enum {
|
| 81 |
+
/**
|
| 82 |
+
* No error.
|
| 83 |
+
*/
|
| 84 |
+
CUPTI_SUCCESS = 0,
|
| 85 |
+
/**
|
| 86 |
+
* One or more of the parameters is invalid.
|
| 87 |
+
*/
|
| 88 |
+
CUPTI_ERROR_INVALID_PARAMETER = 1,
|
| 89 |
+
/**
|
| 90 |
+
* The device does not correspond to a valid CUDA device.
|
| 91 |
+
*/
|
| 92 |
+
CUPTI_ERROR_INVALID_DEVICE = 2,
|
| 93 |
+
/**
|
| 94 |
+
* The context is NULL or not valid.
|
| 95 |
+
*/
|
| 96 |
+
CUPTI_ERROR_INVALID_CONTEXT = 3,
|
| 97 |
+
/**
|
| 98 |
+
* The event domain id is invalid.
|
| 99 |
+
*/
|
| 100 |
+
CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID = 4,
|
| 101 |
+
/**
|
| 102 |
+
* The event id is invalid.
|
| 103 |
+
*/
|
| 104 |
+
CUPTI_ERROR_INVALID_EVENT_ID = 5,
|
| 105 |
+
/**
|
| 106 |
+
* The event name is invalid.
|
| 107 |
+
*/
|
| 108 |
+
CUPTI_ERROR_INVALID_EVENT_NAME = 6,
|
| 109 |
+
/**
|
| 110 |
+
* The current operation cannot be performed due to dependency on
|
| 111 |
+
* other factors.
|
| 112 |
+
*/
|
| 113 |
+
CUPTI_ERROR_INVALID_OPERATION = 7,
|
| 114 |
+
/**
|
| 115 |
+
* Unable to allocate enough memory to perform the requested
|
| 116 |
+
* operation.
|
| 117 |
+
*/
|
| 118 |
+
CUPTI_ERROR_OUT_OF_MEMORY = 8,
|
| 119 |
+
/**
|
| 120 |
+
* An error occurred on the performance monitoring hardware.
|
| 121 |
+
*/
|
| 122 |
+
CUPTI_ERROR_HARDWARE = 9,
|
| 123 |
+
/**
|
| 124 |
+
* The output buffer size is not sufficient to return all
|
| 125 |
+
* requested data.
|
| 126 |
+
*/
|
| 127 |
+
CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT = 10,
|
| 128 |
+
/**
|
| 129 |
+
* API is not implemented.
|
| 130 |
+
*/
|
| 131 |
+
CUPTI_ERROR_API_NOT_IMPLEMENTED = 11,
|
| 132 |
+
/**
|
| 133 |
+
* The maximum limit is reached.
|
| 134 |
+
*/
|
| 135 |
+
CUPTI_ERROR_MAX_LIMIT_REACHED = 12,
|
| 136 |
+
/**
|
| 137 |
+
* The object is not yet ready to perform the requested operation.
|
| 138 |
+
*/
|
| 139 |
+
CUPTI_ERROR_NOT_READY = 13,
|
| 140 |
+
/**
|
| 141 |
+
* The current operation is not compatible with the current state
|
| 142 |
+
* of the object
|
| 143 |
+
*/
|
| 144 |
+
CUPTI_ERROR_NOT_COMPATIBLE = 14,
|
| 145 |
+
/**
|
| 146 |
+
* CUPTI is unable to initialize its connection to the CUDA
|
| 147 |
+
* driver.
|
| 148 |
+
*/
|
| 149 |
+
CUPTI_ERROR_NOT_INITIALIZED = 15,
|
| 150 |
+
/**
|
| 151 |
+
* The metric id is invalid.
|
| 152 |
+
*/
|
| 153 |
+
CUPTI_ERROR_INVALID_METRIC_ID = 16,
|
| 154 |
+
/**
|
| 155 |
+
* The metric name is invalid.
|
| 156 |
+
*/
|
| 157 |
+
CUPTI_ERROR_INVALID_METRIC_NAME = 17,
|
| 158 |
+
/**
|
| 159 |
+
* The queue is empty.
|
| 160 |
+
*/
|
| 161 |
+
CUPTI_ERROR_QUEUE_EMPTY = 18,
|
| 162 |
+
/**
|
| 163 |
+
* Invalid handle (internal?).
|
| 164 |
+
*/
|
| 165 |
+
CUPTI_ERROR_INVALID_HANDLE = 19,
|
| 166 |
+
/**
|
| 167 |
+
* Invalid stream.
|
| 168 |
+
*/
|
| 169 |
+
CUPTI_ERROR_INVALID_STREAM = 20,
|
| 170 |
+
/**
|
| 171 |
+
* Invalid kind.
|
| 172 |
+
*/
|
| 173 |
+
CUPTI_ERROR_INVALID_KIND = 21,
|
| 174 |
+
/**
|
| 175 |
+
* Invalid event value.
|
| 176 |
+
*/
|
| 177 |
+
CUPTI_ERROR_INVALID_EVENT_VALUE = 22,
|
| 178 |
+
/**
|
| 179 |
+
* CUPTI is disabled due to conflicts with other enabled profilers
|
| 180 |
+
*/
|
| 181 |
+
CUPTI_ERROR_DISABLED = 23,
|
| 182 |
+
/**
|
| 183 |
+
* Invalid module.
|
| 184 |
+
*/
|
| 185 |
+
CUPTI_ERROR_INVALID_MODULE = 24,
|
| 186 |
+
/**
|
| 187 |
+
* Invalid metric value.
|
| 188 |
+
*/
|
| 189 |
+
CUPTI_ERROR_INVALID_METRIC_VALUE = 25,
|
| 190 |
+
/**
|
| 191 |
+
* The performance monitoring hardware is in use by other client.
|
| 192 |
+
*/
|
| 193 |
+
CUPTI_ERROR_HARDWARE_BUSY = 26,
|
| 194 |
+
/**
|
| 195 |
+
* The attempted operation is not supported on the current
|
| 196 |
+
* system or device.
|
| 197 |
+
*/
|
| 198 |
+
CUPTI_ERROR_NOT_SUPPORTED = 27,
|
| 199 |
+
/**
|
| 200 |
+
* Unified memory profiling is not supported on the system.
|
| 201 |
+
* Potential reason could be unsupported OS or architecture.
|
| 202 |
+
*/
|
| 203 |
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED = 28,
|
| 204 |
+
/**
|
| 205 |
+
* Unified memory profiling is not supported on the device
|
| 206 |
+
*/
|
| 207 |
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE = 29,
|
| 208 |
+
/**
|
| 209 |
+
* Unified memory profiling is not supported on a multi-GPU
|
| 210 |
+
* configuration without P2P support between any pair of devices
|
| 211 |
+
*/
|
| 212 |
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES = 30,
|
| 213 |
+
/**
|
| 214 |
+
* Unified memory profiling is not supported under the
|
| 215 |
+
* Multi-Process Service (MPS) environment. CUDA 7.5 removes this
|
| 216 |
+
* restriction.
|
| 217 |
+
*/
|
| 218 |
+
CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS = 31,
|
| 219 |
+
/**
|
| 220 |
+
* In CUDA 9.0, devices with compute capability 7.0 don't
|
| 221 |
+
* support CDP tracing
|
| 222 |
+
*/
|
| 223 |
+
CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED = 32,
|
| 224 |
+
/**
|
| 225 |
+
* Profiling on virtualized GPU is not supported.
|
| 226 |
+
*/
|
| 227 |
+
CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 33,
|
| 228 |
+
/**
|
| 229 |
+
* Profiling results might be incorrect for CUDA applications
|
| 230 |
+
* compiled with nvcc version older than 9.0 for devices with
|
| 231 |
+
* compute capability 6.0 and 6.1.
|
| 232 |
+
* Profiling session will continue and CUPTI will notify it using this error code.
|
| 233 |
+
* User is advised to recompile the application code with nvcc version 9.0 or later.
|
| 234 |
+
* Ignore this warning if code is already compiled with the recommended nvcc version.
|
| 235 |
+
*/
|
| 236 |
+
CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE = 34,
|
| 237 |
+
/**
|
| 238 |
+
* User doesn't have sufficient privileges which are required to
|
| 239 |
+
* start the profiling session.
|
| 240 |
+
* One possible reason for this may be that the NVIDIA driver or your system
|
| 241 |
+
* administrator may have restricted access to the NVIDIA GPU performance counters.
|
| 242 |
+
* To learn how to resolve this issue and find more information, please visit
|
| 243 |
+
* https://developer.nvidia.com/CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
|
| 244 |
+
*/
|
| 245 |
+
CUPTI_ERROR_INSUFFICIENT_PRIVILEGES = 35,
|
| 246 |
+
/**
|
| 247 |
+
* Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
|
| 248 |
+
* metric API from the header cupti_metrics.h are not compatible with the
|
| 249 |
+
* Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
|
| 250 |
+
* in the headers nvperf_host.h and nvperf_target.h.
|
| 251 |
+
*/
|
| 252 |
+
CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED = 36,
|
| 253 |
+
/**
|
| 254 |
+
* Missing definition of the OpenACC API routine in the linked OpenACC library.
|
| 255 |
+
*
|
| 256 |
+
* One possible reason is that OpenACC library is linked statically in the
|
| 257 |
+
* user application, which might not have the definition of all the OpenACC
|
| 258 |
+
* API routines needed for the OpenACC profiling, as compiler might ignore
|
| 259 |
+
* definitions for the functions not used in the application. This issue
|
| 260 |
+
* can be mitigated by linking the OpenACC library dynamically.
|
| 261 |
+
*/
|
| 262 |
+
CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE = 37,
|
| 263 |
+
/**
|
| 264 |
+
* Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
|
| 265 |
+
* metric API from the header cupti_metrics.h are not supported on devices with
|
| 266 |
+
* compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
|
| 267 |
+
* These API will be deprecated in a future CUDA release. These are replaced by
|
| 268 |
+
* Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
|
| 269 |
+
* in the headers nvperf_host.h and nvperf_target.h.
|
| 270 |
+
*/
|
| 271 |
+
CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED = 38,
|
| 272 |
+
/**
|
| 273 |
+
* CUPTI doesn't allow multiple callback subscribers. Only a single subscriber
|
| 274 |
+
* can be registered at a time.
|
| 275 |
+
* Same error code is used when application is launched using NVIDIA tools
|
| 276 |
+
* like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb and
|
| 277 |
+
* cuda-memcheck.
|
| 278 |
+
*/
|
| 279 |
+
CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 39,
|
| 280 |
+
/**
|
| 281 |
+
* Profiling on virtualized GPU is not allowed by hypervisor.
|
| 282 |
+
*/
|
| 283 |
+
CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES = 40,
|
| 284 |
+
/**
|
| 285 |
+
* Profiling and tracing are not allowed when confidential computing mode
|
| 286 |
+
* is enabled.
|
| 287 |
+
*/
|
| 288 |
+
CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED = 41,
|
| 289 |
+
/**
|
| 290 |
+
* CUPTI does not support NVIDIA Crypto Mining Processors (CMP).
|
| 291 |
+
* For more information, please visit https://developer.nvidia.com/ERR_NVCMPGPU
|
| 292 |
+
*/
|
| 293 |
+
CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED = 42,
|
| 294 |
+
/**
|
| 295 |
+
* An unknown internal error has occurred.
|
| 296 |
+
*/
|
| 297 |
+
CUPTI_ERROR_UNKNOWN = 999,
|
| 298 |
+
CUPTI_ERROR_FORCE_INT = 0x7fffffff
|
| 299 |
+
} CUptiResult;
|
| 300 |
+
|
| 301 |
+
/**
|
| 302 |
+
* \brief Get the descriptive string for a CUptiResult.
|
| 303 |
+
*
|
| 304 |
+
* Return the descriptive string for a CUptiResult in \p *str.
|
| 305 |
+
* \note \b Thread-safety: this function is thread safe.
|
| 306 |
+
*
|
| 307 |
+
* \param result The result to get the string for
|
| 308 |
+
* \param str Returns the string
|
| 309 |
+
*
|
| 310 |
+
* \retval CUPTI_SUCCESS on success
|
| 311 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p str is NULL or \p
|
| 312 |
+
* result is not a valid CUptiResult
|
| 313 |
+
*/
|
| 314 |
+
CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, const char **str);
|
| 315 |
+
|
| 316 |
+
/** @} */ /* END CUPTI_RESULT_API */
|
| 317 |
+
|
| 318 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 319 |
+
#pragma GCC visibility pop
|
| 320 |
+
#endif
|
| 321 |
+
|
| 322 |
+
#if defined(__cplusplus)
|
| 323 |
+
}
|
| 324 |
+
#endif
|
| 325 |
+
|
| 326 |
+
#endif /*_CUPTI_RESULT_H_*/
|
| 327 |
+
|
| 328 |
+
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h
ADDED
|
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
// *************************************************************************
|
| 3 |
+
// Definitions of indices for API functions, unique across entire API
|
| 4 |
+
// *************************************************************************
|
| 5 |
+
|
| 6 |
+
// This file is generated. Any changes you make will be lost during the next clean build.
|
| 7 |
+
// CUDA public interface, for type definitions and cu* function prototypes
|
| 8 |
+
|
| 9 |
+
typedef enum CUpti_runtime_api_trace_cbid_enum {
|
| 10 |
+
CUPTI_RUNTIME_TRACE_CBID_INVALID = 0,
|
| 11 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDriverGetVersion_v3020 = 1,
|
| 12 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaRuntimeGetVersion_v3020 = 2,
|
| 13 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceCount_v3020 = 3,
|
| 14 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v3020 = 4,
|
| 15 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaChooseDevice_v3020 = 5,
|
| 16 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetChannelDesc_v3020 = 6,
|
| 17 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateChannelDesc_v3020 = 7,
|
| 18 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaConfigureCall_v3020 = 8,
|
| 19 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetupArgument_v3020 = 9,
|
| 20 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 = 10,
|
| 21 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeekAtLastError_v3020 = 11,
|
| 22 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorString_v3020 = 12,
|
| 23 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020 = 13,
|
| 24 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetCacheConfig_v3020 = 14,
|
| 25 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetAttributes_v3020 = 15,
|
| 26 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 = 16,
|
| 27 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 = 17,
|
| 28 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetValidDevices_v3020 = 18,
|
| 29 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDeviceFlags_v3020 = 19,
|
| 30 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020 = 20,
|
| 31 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocPitch_v3020 = 21,
|
| 32 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020 = 22,
|
| 33 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocArray_v3020 = 23,
|
| 34 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeArray_v3020 = 24,
|
| 35 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocHost_v3020 = 25,
|
| 36 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeHost_v3020 = 26,
|
| 37 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostAlloc_v3020 = 27,
|
| 38 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostGetDevicePointer_v3020 = 28,
|
| 39 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostGetFlags_v3020 = 29,
|
| 40 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemGetInfo_v3020 = 30,
|
| 41 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020 = 31,
|
| 42 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_v3020 = 32,
|
| 43 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_v3020 = 33,
|
| 44 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_v3020 = 34,
|
| 45 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_v3020 = 35,
|
| 46 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_v3020 = 36,
|
| 47 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_v3020 = 37,
|
| 48 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_v3020 = 38,
|
| 49 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_v3020 = 39,
|
| 50 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_v3020 = 40,
|
| 51 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020 = 41,
|
| 52 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_v3020 = 42,
|
| 53 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_v3020 = 43,
|
| 54 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_v3020 = 44,
|
| 55 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_v3020 = 45,
|
| 56 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_v3020 = 46,
|
| 57 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_v3020 = 47,
|
| 58 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_v3020 = 48,
|
| 59 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020 = 49,
|
| 60 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_v3020 = 50,
|
| 61 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020 = 51,
|
| 62 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_v3020 = 52,
|
| 63 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolAddress_v3020 = 53,
|
| 64 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolSize_v3020 = 54,
|
| 65 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture_v3020 = 55,
|
| 66 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture2D_v3020 = 56,
|
| 67 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToArray_v3020 = 57,
|
| 68 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUnbindTexture_v3020 = 58,
|
| 69 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureAlignmentOffset_v3020 = 59,
|
| 70 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureReference_v3020 = 60,
|
| 71 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindSurfaceToArray_v3020 = 61,
|
| 72 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceReference_v3020 = 62,
|
| 73 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLSetGLDevice_v3020 = 63,
|
| 74 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLRegisterBufferObject_v3020 = 64,
|
| 75 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObject_v3020 = 65,
|
| 76 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObject_v3020 = 66,
|
| 77 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLUnregisterBufferObject_v3020 = 67,
|
| 78 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLSetBufferObjectMapFlags_v3020 = 68,
|
| 79 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObjectAsync_v3020 = 69,
|
| 80 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObjectAsync_v3020 = 70,
|
| 81 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWGLGetDevice_v3020 = 71,
|
| 82 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterImage_v3020 = 72,
|
| 83 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterBuffer_v3020 = 73,
|
| 84 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnregisterResource_v3020 = 74,
|
| 85 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceSetMapFlags_v3020 = 75,
|
| 86 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsMapResources_v3020 = 76,
|
| 87 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnmapResources_v3020 = 77,
|
| 88 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedPointer_v3020 = 78,
|
| 89 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsSubResourceGetMappedArray_v3020 = 79,
|
| 90 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUGetDevice_v3020 = 80,
|
| 91 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUSetVDPAUDevice_v3020 = 81,
|
| 92 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterVideoSurface_v3020 = 82,
|
| 93 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterOutputSurface_v3020 = 83,
|
| 94 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevice_v3020 = 84,
|
| 95 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevices_v3020 = 85,
|
| 96 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11SetDirect3DDevice_v3020 = 86,
|
| 97 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D11RegisterResource_v3020 = 87,
|
| 98 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevice_v3020 = 88,
|
| 99 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevices_v3020 = 89,
|
| 100 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10SetDirect3DDevice_v3020 = 90,
|
| 101 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D10RegisterResource_v3020 = 91,
|
| 102 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10RegisterResource_v3020 = 92,
|
| 103 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnregisterResource_v3020 = 93,
|
| 104 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10MapResources_v3020 = 94,
|
| 105 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnmapResources_v3020 = 95,
|
| 106 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceSetMapFlags_v3020 = 96,
|
| 107 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetSurfaceDimensions_v3020 = 97,
|
| 108 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedArray_v3020 = 98,
|
| 109 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPointer_v3020 = 99,
|
| 110 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedSize_v3020 = 100,
|
| 111 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPitch_v3020 = 101,
|
| 112 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevice_v3020 = 102,
|
| 113 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevices_v3020 = 103,
|
| 114 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9SetDirect3DDevice_v3020 = 104,
|
| 115 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDirect3DDevice_v3020 = 105,
|
| 116 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D9RegisterResource_v3020 = 106,
|
| 117 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterResource_v3020 = 107,
|
| 118 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterResource_v3020 = 108,
|
| 119 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapResources_v3020 = 109,
|
| 120 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapResources_v3020 = 110,
|
| 121 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceSetMapFlags_v3020 = 111,
|
| 122 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetSurfaceDimensions_v3020 = 112,
|
| 123 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedArray_v3020 = 113,
|
| 124 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPointer_v3020 = 114,
|
| 125 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedSize_v3020 = 115,
|
| 126 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPitch_v3020 = 116,
|
| 127 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9Begin_v3020 = 117,
|
| 128 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9End_v3020 = 118,
|
| 129 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterVertexBuffer_v3020 = 119,
|
| 130 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterVertexBuffer_v3020 = 120,
|
| 131 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapVertexBuffer_v3020 = 121,
|
| 132 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapVertexBuffer_v3020 = 122,
|
| 133 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadExit_v3020 = 123,
|
| 134 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForDevice_v3020 = 124,
|
| 135 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForHost_v3020 = 125,
|
| 136 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020 = 126,
|
| 137 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetLimit_v3020 = 127,
|
| 138 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetLimit_v3020 = 128,
|
| 139 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreate_v3020 = 129,
|
| 140 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v3020 = 130,
|
| 141 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020 = 131,
|
| 142 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_v3020 = 132,
|
| 143 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 = 133,
|
| 144 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 = 134,
|
| 145 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 = 135,
|
| 146 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 = 136,
|
| 147 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020 = 137,
|
| 148 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020 = 138,
|
| 149 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v3020 = 139,
|
| 150 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3D_v3020 = 140,
|
| 151 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3DArray_v3020 = 141,
|
| 152 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_v3020 = 142,
|
| 153 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_v3020 = 143,
|
| 154 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_v3020 = 144,
|
| 155 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_v3020 = 145,
|
| 156 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetCacheConfig_v3020 = 146,
|
| 157 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020 = 147,
|
| 158 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDirect3DDevice_v3020 = 148,
|
| 159 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDirect3DDevice_v3020 = 149,
|
| 160 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetCacheConfig_v3020 = 150,
|
| 161 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPointerGetAttributes_v4000 = 151,
|
| 162 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostRegister_v4000 = 152,
|
| 163 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaHostUnregister_v4000 = 153,
|
| 164 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceCanAccessPeer_v4000 = 154,
|
| 165 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceEnablePeerAccess_v4000 = 155,
|
| 166 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceDisablePeerAccess_v4000 = 156,
|
| 167 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeerRegister_v4000 = 157,
|
| 168 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeerUnregister_v4000 = 158,
|
| 169 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaPeerGetDevicePointer_v4000 = 159,
|
| 170 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeer_v4000 = 160,
|
| 171 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeerAsync_v4000 = 161,
|
| 172 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_v4000 = 162,
|
| 173 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_v4000 = 163,
|
| 174 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceReset_v3020 = 164,
|
| 175 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020 = 165,
|
| 176 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetLimit_v3020 = 166,
|
| 177 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetLimit_v3020 = 167,
|
| 178 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetCacheConfig_v3020 = 168,
|
| 179 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetCacheConfig_v3020 = 169,
|
| 180 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaProfilerInitialize_v4000 = 170,
|
| 181 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStart_v4000 = 171,
|
| 182 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStop_v4000 = 172,
|
| 183 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetByPCIBusId_v4010 = 173,
|
| 184 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetPCIBusId_v4010 = 174,
|
| 185 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGLGetDevices_v4010 = 175,
|
| 186 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetEventHandle_v4010 = 176,
|
| 187 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenEventHandle_v4010 = 177,
|
| 188 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetMemHandle_v4010 = 178,
|
| 189 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenMemHandle_v4010 = 179,
|
| 190 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaIpcCloseMemHandle_v4010 = 180,
|
| 191 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetInfo_v4010 = 181,
|
| 192 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetSharedMemConfig_v4020 = 182,
|
| 193 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetSharedMemConfig_v4020 = 183,
|
| 194 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetSharedMemConfig_v4020 = 184,
|
| 195 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v5000 = 185,
|
| 196 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroyTextureObject_v5000 = 186,
|
| 197 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceDesc_v5000 = 187,
|
| 198 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v5000 = 188,
|
| 199 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCreateSurfaceObject_v5000 = 189,
|
| 200 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroySurfaceObject_v5000 = 190,
|
| 201 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceObjectResourceDesc_v5000 = 191,
|
| 202 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocMipmappedArray_v5000 = 192,
|
| 203 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetMipmappedArrayLevel_v5000 = 193,
|
| 204 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeMipmappedArray_v5000 = 194,
|
| 205 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToMipmappedArray_v5000 = 195,
|
| 206 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedMipmappedArray_v5000 = 196,
|
| 207 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_v5000 = 197,
|
| 208 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithFlags_v5000 = 198,
|
| 209 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceViewDesc_v5000 = 199,
|
| 210 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetAttribute_v5000 = 200,
|
| 211 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v5050 = 201,
|
| 212 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithPriority_v5050 = 202,
|
| 213 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_v5050 = 203,
|
| 214 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_v5050 = 204,
|
| 215 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetStreamPriorityRange_v5050 = 205,
|
| 216 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocManaged_v6000 = 206,
|
| 217 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207,
|
| 218 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_v6000 = 208,
|
| 219 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorName_v6050 = 209,
|
| 220 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210,
|
| 221 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 = 211,
|
| 222 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceFlags_v7000 = 212,
|
| 223 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_ptsz_v7000 = 213,
|
| 224 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000 = 214,
|
| 225 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_ptds_v7000 = 215,
|
| 226 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_ptds_v7000 = 216,
|
| 227 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_ptds_v7000 = 217,
|
| 228 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_ptds_v7000 = 218,
|
| 229 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_ptds_v7000 = 219,
|
| 230 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_ptds_v7000 = 220,
|
| 231 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_ptds_v7000 = 221,
|
| 232 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_ptds_v7000 = 222,
|
| 233 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_ptds_v7000 = 223,
|
| 234 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_ptds_v7000 = 224,
|
| 235 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_ptsz_v7000 = 225,
|
| 236 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_ptsz_v7000 = 226,
|
| 237 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_ptsz_v7000 = 227,
|
| 238 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_ptsz_v7000 = 228,
|
| 239 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_ptsz_v7000 = 229,
|
| 240 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_ptsz_v7000 = 230,
|
| 241 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_ptsz_v7000 = 231,
|
| 242 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_ptsz_v7000 = 232,
|
| 243 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset_ptds_v7000 = 233,
|
| 244 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_ptds_v7000 = 234,
|
| 245 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_ptsz_v7000 = 235,
|
| 246 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_ptsz_v7000 = 236,
|
| 247 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_ptsz_v7000 = 237,
|
| 248 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_ptsz_v7000 = 238,
|
| 249 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_ptsz_v7000 = 239,
|
| 250 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_ptsz_v7000 = 240,
|
| 251 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_ptsz_v7000 = 241,
|
| 252 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_ptsz_v7000 = 242,
|
| 253 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_ptds_v7000 = 243,
|
| 254 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_ptsz_v7000 = 244,
|
| 255 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_ptds_v7000 = 245,
|
| 256 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_ptsz_v7000 = 246,
|
| 257 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_ptsz_v7000 = 247,
|
| 258 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_ptsz_v7000 = 248,
|
| 259 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_ptds_v7000 = 249,
|
| 260 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_ptsz_v7000 = 250,
|
| 261 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000 = 251,
|
| 262 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v8000 = 252,
|
| 263 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_ptsz_v8000 = 253,
|
| 264 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v8000 = 254,
|
| 265 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetP2PAttribute_v8000 = 255,
|
| 266 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsEGLRegisterImage_v7000 = 256,
|
| 267 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnect_v7000 = 257,
|
| 268 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerDisconnect_v7000 = 258,
|
| 269 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerAcquireFrame_v7000 = 259,
|
| 270 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerReleaseFrame_v7000 = 260,
|
| 271 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerConnect_v7000 = 261,
|
| 272 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerDisconnect_v7000 = 262,
|
| 273 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerPresentFrame_v7000 = 263,
|
| 274 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerReturnFrame_v7000 = 264,
|
| 275 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedEglFrame_v7000 = 265,
|
| 276 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttribute_v8000 = 266,
|
| 277 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttributes_v8000 = 267,
|
| 278 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnectWithFlags_v7000 = 268,
|
| 279 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000 = 269,
|
| 280 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_ptsz_v9000 = 270,
|
| 281 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateFromEGLSync_v9000 = 271,
|
| 282 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000 = 272,
|
| 283 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetAttribute_v9000 = 273,
|
| 284 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalMemory_v10000 = 274,
|
| 285 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedBuffer_v10000 = 275,
|
| 286 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedMipmappedArray_v10000 = 276,
|
| 287 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalMemory_v10000 = 277,
|
| 288 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalSemaphore_v10000 = 278,
|
| 289 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v10000 = 279,
|
| 290 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_ptsz_v10000 = 280,
|
| 291 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v10000 = 281,
|
| 292 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_ptsz_v10000 = 282,
|
| 293 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalSemaphore_v10000 = 283,
|
| 294 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_v10000 = 284,
|
| 295 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_ptsz_v10000 = 285,
|
| 296 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphCreate_v10000 = 286,
|
| 297 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetParams_v10000 = 287,
|
| 298 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetParams_v10000 = 288,
|
| 299 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddKernelNode_v10000 = 289,
|
| 300 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode_v10000 = 290,
|
| 301 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeGetParams_v10000 = 291,
|
| 302 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams_v10000 = 292,
|
| 303 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemsetNode_v10000 = 293,
|
| 304 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeGetParams_v10000 = 294,
|
| 305 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeSetParams_v10000 = 295,
|
| 306 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddHostNode_v10000 = 296,
|
| 307 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeGetParams_v10000 = 297,
|
| 308 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddChildGraphNode_v10000 = 298,
|
| 309 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphChildGraphNodeGetGraph_v10000 = 299,
|
| 310 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEmptyNode_v10000 = 300,
|
| 311 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphClone_v10000 = 301,
|
| 312 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeFindInClone_v10000 = 302,
|
| 313 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetType_v10000 = 303,
|
| 314 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetRootNodes_v10000 = 304,
|
| 315 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v10000 = 305,
|
| 316 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v10000 = 306,
|
| 317 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v10000 = 307,
|
| 318 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v10000 = 308,
|
| 319 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroyNode_v10000 = 309,
|
| 320 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v10000 = 310,
|
| 321 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_v10000 = 311,
|
| 322 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000 = 312,
|
| 323 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecDestroy_v10000 = 313,
|
| 324 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroy_v10000 = 314,
|
| 325 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_v10000 = 315,
|
| 326 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_ptsz_v10000 = 316,
|
| 327 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_v10000 = 317,
|
| 328 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_ptsz_v10000 = 318,
|
| 329 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_v10000 = 319,
|
| 330 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_ptsz_v10000 = 320,
|
| 331 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeSetParams_v10000 = 321,
|
| 332 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetNodes_v10000 = 322,
|
| 333 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v10000 = 323,
|
| 334 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v10010 = 324,
|
| 335 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_ptsz_v10010 = 325,
|
| 336 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecKernelNodeSetParams_v10010 = 326,
|
| 337 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaThreadExchangeStreamCaptureMode_v10010 = 327,
|
| 338 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetNvSciSyncAttributes_v10020 = 328,
|
| 339 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyAvailableDynamicSMemPerBlock_v10200 = 329,
|
| 340 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_v10200 = 330,
|
| 341 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_ptsz_v10200 = 331,
|
| 342 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams_v10020 = 332,
|
| 343 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemsetNodeSetParams_v10020 = 333,
|
| 344 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecHostNodeSetParams_v10020 = 334,
|
| 345 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecUpdate_v10020 = 335,
|
| 346 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetFuncBySymbol_v11000 = 336,
|
| 347 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaCtxResetPersistingL2Cache_v11000 = 337,
|
| 348 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeCopyAttributes_v11000 = 338,
|
| 349 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetAttribute_v11000 = 339,
|
| 350 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetAttribute_v11000 = 340,
|
| 351 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_v11000 = 341,
|
| 352 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_ptsz_v11000 = 342,
|
| 353 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_v11000 = 343,
|
| 354 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_ptsz_v11000 = 344,
|
| 355 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_v11000 = 345,
|
| 356 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_ptsz_v11000 = 346,
|
| 357 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetTexture1DLinearMaxWidth_v11010 = 347,
|
| 358 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_v10000 = 348,
|
| 359 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_ptsz_v10000 = 349,
|
| 360 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeToSymbol_v11010 = 350,
|
| 361 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeFromSymbol_v11010 = 351,
|
| 362 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode1D_v11010 = 352,
|
| 363 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsToSymbol_v11010 = 353,
|
| 364 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsFromSymbol_v11010 = 354,
|
| 365 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams1D_v11010 = 355,
|
| 366 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010 = 356,
|
| 367 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010 = 357,
|
| 368 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams1D_v11010 = 358,
|
| 369 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetSparseProperties_v11010 = 359,
|
| 370 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetSparseProperties_v11010 = 360,
|
| 371 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecChildGraphNodeSetParams_v11010 = 361,
|
| 372 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventRecordNode_v11010 = 362,
|
| 373 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeGetEvent_v11010 = 363,
|
| 374 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeSetEvent_v11010 = 364,
|
| 375 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventWaitNode_v11010 = 365,
|
| 376 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeGetEvent_v11010 = 366,
|
| 377 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeSetEvent_v11010 = 367,
|
| 378 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventRecordNodeSetEvent_v11010 = 368,
|
| 379 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventWaitNodeSetEvent_v11010 = 369,
|
| 380 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_v11010 = 370,
|
| 381 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_ptsz_v11010 = 371,
|
| 382 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetDefaultMemPool_v11020 = 372,
|
| 383 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_v11020 = 373,
|
| 384 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_ptsz_v11020 = 374,
|
| 385 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_v11020 = 375,
|
| 386 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_ptsz_v11020 = 376,
|
| 387 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolTrimTo_v11020 = 377,
|
| 388 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAttribute_v11020 = 378,
|
| 389 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAttribute_v11020 = 379,
|
| 390 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAccess_v11020 = 380,
|
| 391 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetPlane_v11020 = 381,
|
| 392 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAccess_v11020 = 382,
|
| 393 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolCreate_v11020 = 383,
|
| 394 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolDestroy_v11020 = 384,
|
| 395 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetMemPool_v11020 = 385,
|
| 396 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetMemPool_v11020 = 386,
|
| 397 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportToShareableHandle_v11020 = 387,
|
| 398 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportFromShareableHandle_v11020 = 388,
|
| 399 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportPointer_v11020 = 389,
|
| 400 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportPointer_v11020 = 390,
|
| 401 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_v11020 = 391,
|
| 402 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_ptsz_v11020 = 392,
|
| 403 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_v11020 = 393,
|
| 404 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020 = 394,
|
| 405 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_v11020 = 395,
|
| 406 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020 = 396,
|
| 407 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresSignalNode_v11020 = 397,
|
| 408 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeGetParams_v11020 = 398,
|
| 409 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeSetParams_v11020 = 399,
|
| 410 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresWaitNode_v11020 = 400,
|
| 411 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeGetParams_v11020 = 401,
|
| 412 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeSetParams_v11020 = 402,
|
| 413 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020 = 403,
|
| 414 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020 = 404,
|
| 415 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceFlushGPUDirectRDMAWrites_v11030 = 405,
|
| 416 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_v11030 = 406,
|
| 417 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_ptsz_v11030 = 407,
|
| 418 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphDebugDotPrint_v11030 = 408,
|
| 419 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_v11030 = 409,
|
| 420 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_ptsz_v11030 = 410,
|
| 421 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v11030 = 411,
|
| 422 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_ptsz_v11030 = 412,
|
| 423 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectCreate_v11030 = 413,
|
| 424 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRetain_v11030 = 414,
|
| 425 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRelease_v11030 = 415,
|
| 426 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphRetainUserObject_v11030 = 416,
|
| 427 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphReleaseUserObject_v11030 = 417,
|
| 428 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithFlags_v11040 = 418,
|
| 429 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemAllocNode_v11040 = 419,
|
| 430 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemAllocNodeGetParams_v11040 = 420,
|
| 431 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemFreeNode_v11040 = 421,
|
| 432 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemFreeNodeGetParams_v11040 = 422,
|
| 433 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGraphMemTrim_v11040 = 423,
|
| 434 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetGraphMemAttribute_v11040 = 424,
|
| 435 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetGraphMemAttribute_v11040 = 425,
|
| 436 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetEnabled_v11060 = 426,
|
| 437 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetEnabled_v11060 = 427,
|
| 438 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetMemoryRequirements_v11060 = 428,
|
| 439 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetMemoryRequirements_v11060 = 429,
|
| 440 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060 = 430,
|
| 441 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060 = 431,
|
| 442 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxPotentialClusterSize_v11070 = 432,
|
| 443 |
+
CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveClusters_v11070 = 433,
|
| 444 |
+
CUPTI_RUNTIME_TRACE_CBID_SIZE = 434,
|
| 445 |
+
CUPTI_RUNTIME_TRACE_CBID_FORCE_INT = 0x7fffffff
|
| 446 |
+
} CUpti_runtime_api_trace_cbid;
|
| 447 |
+
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_target.h
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#if !defined(_CUPTI_TARGET_H_)
|
| 2 |
+
#define _CUPTI_TARGET_H_
|
| 3 |
+
|
| 4 |
+
/*
|
| 5 |
+
CUPTI profiler target API's
|
| 6 |
+
This file contains the CUPTI profiling API's.
|
| 7 |
+
*/
|
| 8 |
+
#include <cupti_result.h>
|
| 9 |
+
#include <stddef.h>
|
| 10 |
+
#include <stdint.h>
|
| 11 |
+
|
| 12 |
+
#ifdef __cplusplus
|
| 13 |
+
extern "C" {
|
| 14 |
+
#endif
|
| 15 |
+
|
| 16 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 17 |
+
#pragma GCC visibility push(default)
|
| 18 |
+
#endif
|
| 19 |
+
|
| 20 |
+
#ifndef CUPTI_PROFILER_STRUCT_SIZE
|
| 21 |
+
#define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
|
| 22 |
+
#endif
|
| 23 |
+
|
| 24 |
+
typedef struct CUpti_Device_GetChipName_Params
|
| 25 |
+
{
|
| 26 |
+
size_t structSize; //!< [in]
|
| 27 |
+
void* pPriv; //!< [in] assign to NULL
|
| 28 |
+
|
| 29 |
+
size_t deviceIndex; //!< [in]
|
| 30 |
+
const char* pChipName; //!< [out]
|
| 31 |
+
} CUpti_Device_GetChipName_Params;
|
| 32 |
+
|
| 33 |
+
#define CUpti_Device_GetChipName_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Device_GetChipName_Params, pChipName)
|
| 34 |
+
CUptiResult CUPTIAPI cuptiDeviceGetChipName(CUpti_Device_GetChipName_Params *pParams);
|
| 35 |
+
|
| 36 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 37 |
+
#pragma GCC visibility pop
|
| 38 |
+
#endif
|
| 39 |
+
|
| 40 |
+
#ifdef __cplusplus
|
| 41 |
+
} /* extern "C" */
|
| 42 |
+
#endif
|
| 43 |
+
#endif
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_version.h
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2010-2018 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
#if !defined(_CUPTI_VERSION_H_)
|
| 51 |
+
#define _CUPTI_VERSION_H_
|
| 52 |
+
|
| 53 |
+
#include <cuda_stdint.h>
|
| 54 |
+
#include <cupti_result.h>
|
| 55 |
+
|
| 56 |
+
#ifndef CUPTIAPI
|
| 57 |
+
#ifdef _WIN32
|
| 58 |
+
#define CUPTIAPI __stdcall
|
| 59 |
+
#else
|
| 60 |
+
#define CUPTIAPI
|
| 61 |
+
#endif
|
| 62 |
+
#endif
|
| 63 |
+
|
| 64 |
+
#if defined(__cplusplus)
|
| 65 |
+
extern "C" {
|
| 66 |
+
#endif
|
| 67 |
+
|
| 68 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 69 |
+
#pragma GCC visibility push(default)
|
| 70 |
+
#endif
|
| 71 |
+
|
| 72 |
+
/**
|
| 73 |
+
* \defgroup CUPTI_VERSION_API CUPTI Version
|
| 74 |
+
* Function and macro to determine the CUPTI version.
|
| 75 |
+
* @{
|
| 76 |
+
*/
|
| 77 |
+
|
| 78 |
+
/**
|
| 79 |
+
* \brief The API version for this implementation of CUPTI.
|
| 80 |
+
*
|
| 81 |
+
* The API version for this implementation of CUPTI. This define along
|
| 82 |
+
* with \ref cuptiGetVersion can be used to dynamically detect if the
|
| 83 |
+
* version of CUPTI compiled against matches the version of the loaded
|
| 84 |
+
* CUPTI library.
|
| 85 |
+
*
|
| 86 |
+
* v1 : CUDAToolsSDK 4.0
|
| 87 |
+
* v2 : CUDAToolsSDK 4.1
|
| 88 |
+
* v3 : CUDA Toolkit 5.0
|
| 89 |
+
* v4 : CUDA Toolkit 5.5
|
| 90 |
+
* v5 : CUDA Toolkit 6.0
|
| 91 |
+
* v6 : CUDA Toolkit 6.5
|
| 92 |
+
* v7 : CUDA Toolkit 6.5(with sm_52 support)
|
| 93 |
+
* v8 : CUDA Toolkit 7.0
|
| 94 |
+
* v9 : CUDA Toolkit 8.0
|
| 95 |
+
* v10 : CUDA Toolkit 9.0
|
| 96 |
+
* v11 : CUDA Toolkit 9.1
|
| 97 |
+
* v12 : CUDA Toolkit 10.0, 10.1 and 10.2
|
| 98 |
+
* v13 : CUDA Toolkit 11.0
|
| 99 |
+
* v14 : CUDA Toolkit 11.1
|
| 100 |
+
* v15 : CUDA Toolkit 11.2, 11.3 and 11.4
|
| 101 |
+
* v16 : CUDA Toolkit 11.5
|
| 102 |
+
* v17 : CUDA Toolkit 11.6
|
| 103 |
+
* v18 : CUDA Toolkit 11.8
|
| 104 |
+
*/
|
| 105 |
+
#define CUPTI_API_VERSION 18
|
| 106 |
+
|
| 107 |
+
/**
|
| 108 |
+
* \brief Get the CUPTI API version.
|
| 109 |
+
*
|
| 110 |
+
* Return the API version in \p *version.
|
| 111 |
+
*
|
| 112 |
+
* \param version Returns the version
|
| 113 |
+
*
|
| 114 |
+
* \retval CUPTI_SUCCESS on success
|
| 115 |
+
* \retval CUPTI_ERROR_INVALID_PARAMETER if \p version is NULL
|
| 116 |
+
* \sa CUPTI_API_VERSION
|
| 117 |
+
*/
|
| 118 |
+
CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version);
|
| 119 |
+
|
| 120 |
+
/** @} */ /* END CUPTI_VERSION_API */
|
| 121 |
+
|
| 122 |
+
#if defined(__GNUC__) && defined(CUPTI_LIB)
|
| 123 |
+
#pragma GCC visibility pop
|
| 124 |
+
#endif
|
| 125 |
+
|
| 126 |
+
#if defined(__cplusplus)
|
| 127 |
+
}
|
| 128 |
+
#endif
|
| 129 |
+
|
| 130 |
+
#endif /*_CUPTI_VERSION_H_*/
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h
ADDED
|
@@ -0,0 +1,2941 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// This file is generated. Any changes you make will be lost during the next clean build.
|
| 2 |
+
|
| 3 |
+
// No dependent includes
|
| 4 |
+
|
| 5 |
+
// CUDA public interface, for type definitions and cu* function prototypes
|
| 6 |
+
#include "cuda.h"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
// *************************************************************************
|
| 10 |
+
// Definitions of structs to hold parameters for each function
|
| 11 |
+
// *************************************************************************
|
| 12 |
+
|
| 13 |
+
typedef struct cuGetErrorString_params_st {
|
| 14 |
+
CUresult error;
|
| 15 |
+
const char **pStr;
|
| 16 |
+
} cuGetErrorString_params;
|
| 17 |
+
|
| 18 |
+
typedef struct cuGetErrorName_params_st {
|
| 19 |
+
CUresult error;
|
| 20 |
+
const char **pStr;
|
| 21 |
+
} cuGetErrorName_params;
|
| 22 |
+
|
| 23 |
+
typedef struct cuInit_params_st {
|
| 24 |
+
unsigned int Flags;
|
| 25 |
+
} cuInit_params;
|
| 26 |
+
|
| 27 |
+
typedef struct cuDriverGetVersion_params_st {
|
| 28 |
+
int *driverVersion;
|
| 29 |
+
} cuDriverGetVersion_params;
|
| 30 |
+
|
| 31 |
+
typedef struct cuDeviceGet_params_st {
|
| 32 |
+
CUdevice *device;
|
| 33 |
+
int ordinal;
|
| 34 |
+
} cuDeviceGet_params;
|
| 35 |
+
|
| 36 |
+
typedef struct cuDeviceGetCount_params_st {
|
| 37 |
+
int *count;
|
| 38 |
+
} cuDeviceGetCount_params;
|
| 39 |
+
|
| 40 |
+
typedef struct cuDeviceGetName_params_st {
|
| 41 |
+
char *name;
|
| 42 |
+
int len;
|
| 43 |
+
CUdevice dev;
|
| 44 |
+
} cuDeviceGetName_params;
|
| 45 |
+
|
| 46 |
+
typedef struct cuDeviceGetUuid_params_st {
|
| 47 |
+
CUuuid *uuid;
|
| 48 |
+
CUdevice dev;
|
| 49 |
+
} cuDeviceGetUuid_params;
|
| 50 |
+
|
| 51 |
+
typedef struct cuDeviceGetUuid_v2_params_st {
|
| 52 |
+
CUuuid *uuid;
|
| 53 |
+
CUdevice dev;
|
| 54 |
+
} cuDeviceGetUuid_v2_params;
|
| 55 |
+
|
| 56 |
+
typedef struct cuDeviceGetLuid_params_st {
|
| 57 |
+
char *luid;
|
| 58 |
+
unsigned int *deviceNodeMask;
|
| 59 |
+
CUdevice dev;
|
| 60 |
+
} cuDeviceGetLuid_params;
|
| 61 |
+
|
| 62 |
+
typedef struct cuDeviceTotalMem_v2_params_st {
|
| 63 |
+
size_t *bytes;
|
| 64 |
+
CUdevice dev;
|
| 65 |
+
} cuDeviceTotalMem_v2_params;
|
| 66 |
+
|
| 67 |
+
typedef struct cuDeviceGetTexture1DLinearMaxWidth_params_st {
|
| 68 |
+
size_t *maxWidthInElements;
|
| 69 |
+
CUarray_format format;
|
| 70 |
+
unsigned numChannels;
|
| 71 |
+
CUdevice dev;
|
| 72 |
+
} cuDeviceGetTexture1DLinearMaxWidth_params;
|
| 73 |
+
|
| 74 |
+
typedef struct cuDeviceGetAttribute_params_st {
|
| 75 |
+
int *pi;
|
| 76 |
+
CUdevice_attribute attrib;
|
| 77 |
+
CUdevice dev;
|
| 78 |
+
} cuDeviceGetAttribute_params;
|
| 79 |
+
|
| 80 |
+
typedef struct cuDeviceGetNvSciSyncAttributes_params_st {
|
| 81 |
+
void *nvSciSyncAttrList;
|
| 82 |
+
CUdevice dev;
|
| 83 |
+
int flags;
|
| 84 |
+
} cuDeviceGetNvSciSyncAttributes_params;
|
| 85 |
+
|
| 86 |
+
typedef struct cuDeviceSetMemPool_params_st {
|
| 87 |
+
CUdevice dev;
|
| 88 |
+
CUmemoryPool pool;
|
| 89 |
+
} cuDeviceSetMemPool_params;
|
| 90 |
+
|
| 91 |
+
typedef struct cuDeviceGetMemPool_params_st {
|
| 92 |
+
CUmemoryPool *pool;
|
| 93 |
+
CUdevice dev;
|
| 94 |
+
} cuDeviceGetMemPool_params;
|
| 95 |
+
|
| 96 |
+
typedef struct cuDeviceGetDefaultMemPool_params_st {
|
| 97 |
+
CUmemoryPool *pool_out;
|
| 98 |
+
CUdevice dev;
|
| 99 |
+
} cuDeviceGetDefaultMemPool_params;
|
| 100 |
+
|
| 101 |
+
typedef struct cuFlushGPUDirectRDMAWrites_params_st {
|
| 102 |
+
CUflushGPUDirectRDMAWritesTarget target;
|
| 103 |
+
CUflushGPUDirectRDMAWritesScope scope;
|
| 104 |
+
} cuFlushGPUDirectRDMAWrites_params;
|
| 105 |
+
|
| 106 |
+
typedef struct cuDeviceGetProperties_params_st {
|
| 107 |
+
CUdevprop *prop;
|
| 108 |
+
CUdevice dev;
|
| 109 |
+
} cuDeviceGetProperties_params;
|
| 110 |
+
|
| 111 |
+
typedef struct cuDeviceComputeCapability_params_st {
|
| 112 |
+
int *major;
|
| 113 |
+
int *minor;
|
| 114 |
+
CUdevice dev;
|
| 115 |
+
} cuDeviceComputeCapability_params;
|
| 116 |
+
|
| 117 |
+
typedef struct cuDevicePrimaryCtxRetain_params_st {
|
| 118 |
+
CUcontext *pctx;
|
| 119 |
+
CUdevice dev;
|
| 120 |
+
} cuDevicePrimaryCtxRetain_params;
|
| 121 |
+
|
| 122 |
+
typedef struct cuDevicePrimaryCtxRelease_v2_params_st {
|
| 123 |
+
CUdevice dev;
|
| 124 |
+
} cuDevicePrimaryCtxRelease_v2_params;
|
| 125 |
+
|
| 126 |
+
typedef struct cuDevicePrimaryCtxSetFlags_v2_params_st {
|
| 127 |
+
CUdevice dev;
|
| 128 |
+
unsigned int flags;
|
| 129 |
+
} cuDevicePrimaryCtxSetFlags_v2_params;
|
| 130 |
+
|
| 131 |
+
typedef struct cuDevicePrimaryCtxGetState_params_st {
|
| 132 |
+
CUdevice dev;
|
| 133 |
+
unsigned int *flags;
|
| 134 |
+
int *active;
|
| 135 |
+
} cuDevicePrimaryCtxGetState_params;
|
| 136 |
+
|
| 137 |
+
typedef struct cuDevicePrimaryCtxReset_v2_params_st {
|
| 138 |
+
CUdevice dev;
|
| 139 |
+
} cuDevicePrimaryCtxReset_v2_params;
|
| 140 |
+
|
| 141 |
+
typedef struct cuDeviceGetExecAffinitySupport_params_st {
|
| 142 |
+
int *pi;
|
| 143 |
+
CUexecAffinityType type;
|
| 144 |
+
CUdevice dev;
|
| 145 |
+
} cuDeviceGetExecAffinitySupport_params;
|
| 146 |
+
|
| 147 |
+
typedef struct cuCtxCreate_v2_params_st {
|
| 148 |
+
CUcontext *pctx;
|
| 149 |
+
unsigned int flags;
|
| 150 |
+
CUdevice dev;
|
| 151 |
+
} cuCtxCreate_v2_params;
|
| 152 |
+
|
| 153 |
+
typedef struct cuCtxCreate_v3_params_st {
|
| 154 |
+
CUcontext *pctx;
|
| 155 |
+
CUexecAffinityParam *paramsArray;
|
| 156 |
+
int numParams;
|
| 157 |
+
unsigned int flags;
|
| 158 |
+
CUdevice dev;
|
| 159 |
+
} cuCtxCreate_v3_params;
|
| 160 |
+
|
| 161 |
+
typedef struct cuCtxDestroy_v2_params_st {
|
| 162 |
+
CUcontext ctx;
|
| 163 |
+
} cuCtxDestroy_v2_params;
|
| 164 |
+
|
| 165 |
+
typedef struct cuCtxPushCurrent_v2_params_st {
|
| 166 |
+
CUcontext ctx;
|
| 167 |
+
} cuCtxPushCurrent_v2_params;
|
| 168 |
+
|
| 169 |
+
typedef struct cuCtxPopCurrent_v2_params_st {
|
| 170 |
+
CUcontext *pctx;
|
| 171 |
+
} cuCtxPopCurrent_v2_params;
|
| 172 |
+
|
| 173 |
+
typedef struct cuCtxSetCurrent_params_st {
|
| 174 |
+
CUcontext ctx;
|
| 175 |
+
} cuCtxSetCurrent_params;
|
| 176 |
+
|
| 177 |
+
typedef struct cuCtxGetCurrent_params_st {
|
| 178 |
+
CUcontext *pctx;
|
| 179 |
+
} cuCtxGetCurrent_params;
|
| 180 |
+
|
| 181 |
+
typedef struct cuCtxGetDevice_params_st {
|
| 182 |
+
CUdevice *device;
|
| 183 |
+
} cuCtxGetDevice_params;
|
| 184 |
+
|
| 185 |
+
typedef struct cuCtxGetFlags_params_st {
|
| 186 |
+
unsigned int *flags;
|
| 187 |
+
} cuCtxGetFlags_params;
|
| 188 |
+
|
| 189 |
+
typedef struct cuCtxSetLimit_params_st {
|
| 190 |
+
CUlimit limit;
|
| 191 |
+
size_t value;
|
| 192 |
+
} cuCtxSetLimit_params;
|
| 193 |
+
|
| 194 |
+
typedef struct cuCtxGetLimit_params_st {
|
| 195 |
+
size_t *pvalue;
|
| 196 |
+
CUlimit limit;
|
| 197 |
+
} cuCtxGetLimit_params;
|
| 198 |
+
|
| 199 |
+
typedef struct cuCtxGetCacheConfig_params_st {
|
| 200 |
+
CUfunc_cache *pconfig;
|
| 201 |
+
} cuCtxGetCacheConfig_params;
|
| 202 |
+
|
| 203 |
+
typedef struct cuCtxSetCacheConfig_params_st {
|
| 204 |
+
CUfunc_cache config;
|
| 205 |
+
} cuCtxSetCacheConfig_params;
|
| 206 |
+
|
| 207 |
+
typedef struct cuCtxGetSharedMemConfig_params_st {
|
| 208 |
+
CUsharedconfig *pConfig;
|
| 209 |
+
} cuCtxGetSharedMemConfig_params;
|
| 210 |
+
|
| 211 |
+
typedef struct cuCtxSetSharedMemConfig_params_st {
|
| 212 |
+
CUsharedconfig config;
|
| 213 |
+
} cuCtxSetSharedMemConfig_params;
|
| 214 |
+
|
| 215 |
+
typedef struct cuCtxGetApiVersion_params_st {
|
| 216 |
+
CUcontext ctx;
|
| 217 |
+
unsigned int *version;
|
| 218 |
+
} cuCtxGetApiVersion_params;
|
| 219 |
+
|
| 220 |
+
typedef struct cuCtxGetStreamPriorityRange_params_st {
|
| 221 |
+
int *leastPriority;
|
| 222 |
+
int *greatestPriority;
|
| 223 |
+
} cuCtxGetStreamPriorityRange_params;
|
| 224 |
+
|
| 225 |
+
typedef struct cuCtxGetExecAffinity_params_st {
|
| 226 |
+
CUexecAffinityParam *pExecAffinity;
|
| 227 |
+
CUexecAffinityType type;
|
| 228 |
+
} cuCtxGetExecAffinity_params;
|
| 229 |
+
|
| 230 |
+
typedef struct cuCtxAttach_params_st {
|
| 231 |
+
CUcontext *pctx;
|
| 232 |
+
unsigned int flags;
|
| 233 |
+
} cuCtxAttach_params;
|
| 234 |
+
|
| 235 |
+
typedef struct cuCtxDetach_params_st {
|
| 236 |
+
CUcontext ctx;
|
| 237 |
+
} cuCtxDetach_params;
|
| 238 |
+
|
| 239 |
+
typedef struct cuModuleLoad_params_st {
|
| 240 |
+
CUmodule *module;
|
| 241 |
+
const char *fname;
|
| 242 |
+
} cuModuleLoad_params;
|
| 243 |
+
|
| 244 |
+
typedef struct cuModuleLoadData_params_st {
|
| 245 |
+
CUmodule *module;
|
| 246 |
+
const void *image;
|
| 247 |
+
} cuModuleLoadData_params;
|
| 248 |
+
|
| 249 |
+
typedef struct cuModuleLoadDataEx_params_st {
|
| 250 |
+
CUmodule *module;
|
| 251 |
+
const void *image;
|
| 252 |
+
unsigned int numOptions;
|
| 253 |
+
CUjit_option *options;
|
| 254 |
+
void **optionValues;
|
| 255 |
+
} cuModuleLoadDataEx_params;
|
| 256 |
+
|
| 257 |
+
typedef struct cuModuleLoadFatBinary_params_st {
|
| 258 |
+
CUmodule *module;
|
| 259 |
+
const void *fatCubin;
|
| 260 |
+
} cuModuleLoadFatBinary_params;
|
| 261 |
+
|
| 262 |
+
typedef struct cuModuleUnload_params_st {
|
| 263 |
+
CUmodule hmod;
|
| 264 |
+
} cuModuleUnload_params;
|
| 265 |
+
|
| 266 |
+
typedef struct cuModuleGetFunction_params_st {
|
| 267 |
+
CUfunction *hfunc;
|
| 268 |
+
CUmodule hmod;
|
| 269 |
+
const char *name;
|
| 270 |
+
} cuModuleGetFunction_params;
|
| 271 |
+
|
| 272 |
+
typedef struct cuModuleGetGlobal_v2_params_st {
|
| 273 |
+
CUdeviceptr *dptr;
|
| 274 |
+
size_t *bytes;
|
| 275 |
+
CUmodule hmod;
|
| 276 |
+
const char *name;
|
| 277 |
+
} cuModuleGetGlobal_v2_params;
|
| 278 |
+
|
| 279 |
+
typedef struct cuModuleGetTexRef_params_st {
|
| 280 |
+
CUtexref *pTexRef;
|
| 281 |
+
CUmodule hmod;
|
| 282 |
+
const char *name;
|
| 283 |
+
} cuModuleGetTexRef_params;
|
| 284 |
+
|
| 285 |
+
typedef struct cuModuleGetSurfRef_params_st {
|
| 286 |
+
CUsurfref *pSurfRef;
|
| 287 |
+
CUmodule hmod;
|
| 288 |
+
const char *name;
|
| 289 |
+
} cuModuleGetSurfRef_params;
|
| 290 |
+
|
| 291 |
+
typedef struct cuLinkCreate_v2_params_st {
|
| 292 |
+
unsigned int numOptions;
|
| 293 |
+
CUjit_option *options;
|
| 294 |
+
void **optionValues;
|
| 295 |
+
CUlinkState *stateOut;
|
| 296 |
+
} cuLinkCreate_v2_params;
|
| 297 |
+
|
| 298 |
+
typedef struct cuLinkAddData_v2_params_st {
|
| 299 |
+
CUlinkState state;
|
| 300 |
+
CUjitInputType type;
|
| 301 |
+
void *data;
|
| 302 |
+
size_t size;
|
| 303 |
+
const char *name;
|
| 304 |
+
unsigned int numOptions;
|
| 305 |
+
CUjit_option *options;
|
| 306 |
+
void **optionValues;
|
| 307 |
+
} cuLinkAddData_v2_params;
|
| 308 |
+
|
| 309 |
+
typedef struct cuLinkAddFile_v2_params_st {
|
| 310 |
+
CUlinkState state;
|
| 311 |
+
CUjitInputType type;
|
| 312 |
+
const char *path;
|
| 313 |
+
unsigned int numOptions;
|
| 314 |
+
CUjit_option *options;
|
| 315 |
+
void **optionValues;
|
| 316 |
+
} cuLinkAddFile_v2_params;
|
| 317 |
+
|
| 318 |
+
typedef struct cuLinkComplete_params_st {
|
| 319 |
+
CUlinkState state;
|
| 320 |
+
void **cubinOut;
|
| 321 |
+
size_t *sizeOut;
|
| 322 |
+
} cuLinkComplete_params;
|
| 323 |
+
|
| 324 |
+
typedef struct cuLinkDestroy_params_st {
|
| 325 |
+
CUlinkState state;
|
| 326 |
+
} cuLinkDestroy_params;
|
| 327 |
+
|
| 328 |
+
typedef struct cuMemGetInfo_v2_params_st {
|
| 329 |
+
size_t *free;
|
| 330 |
+
size_t *total;
|
| 331 |
+
} cuMemGetInfo_v2_params;
|
| 332 |
+
|
| 333 |
+
typedef struct cuMemAlloc_v2_params_st {
|
| 334 |
+
CUdeviceptr *dptr;
|
| 335 |
+
size_t bytesize;
|
| 336 |
+
} cuMemAlloc_v2_params;
|
| 337 |
+
|
| 338 |
+
typedef struct cuMemAllocPitch_v2_params_st {
|
| 339 |
+
CUdeviceptr *dptr;
|
| 340 |
+
size_t *pPitch;
|
| 341 |
+
size_t WidthInBytes;
|
| 342 |
+
size_t Height;
|
| 343 |
+
unsigned int ElementSizeBytes;
|
| 344 |
+
} cuMemAllocPitch_v2_params;
|
| 345 |
+
|
| 346 |
+
typedef struct cuMemFree_v2_params_st {
|
| 347 |
+
CUdeviceptr dptr;
|
| 348 |
+
} cuMemFree_v2_params;
|
| 349 |
+
|
| 350 |
+
typedef struct cuMemGetAddressRange_v2_params_st {
|
| 351 |
+
CUdeviceptr *pbase;
|
| 352 |
+
size_t *psize;
|
| 353 |
+
CUdeviceptr dptr;
|
| 354 |
+
} cuMemGetAddressRange_v2_params;
|
| 355 |
+
|
| 356 |
+
typedef struct cuMemAllocHost_v2_params_st {
|
| 357 |
+
void **pp;
|
| 358 |
+
size_t bytesize;
|
| 359 |
+
} cuMemAllocHost_v2_params;
|
| 360 |
+
|
| 361 |
+
typedef struct cuMemFreeHost_params_st {
|
| 362 |
+
void *p;
|
| 363 |
+
} cuMemFreeHost_params;
|
| 364 |
+
|
| 365 |
+
typedef struct cuMemHostAlloc_params_st {
|
| 366 |
+
void **pp;
|
| 367 |
+
size_t bytesize;
|
| 368 |
+
unsigned int Flags;
|
| 369 |
+
} cuMemHostAlloc_params;
|
| 370 |
+
|
| 371 |
+
typedef struct cuMemHostGetDevicePointer_v2_params_st {
|
| 372 |
+
CUdeviceptr *pdptr;
|
| 373 |
+
void *p;
|
| 374 |
+
unsigned int Flags;
|
| 375 |
+
} cuMemHostGetDevicePointer_v2_params;
|
| 376 |
+
|
| 377 |
+
typedef struct cuMemHostGetFlags_params_st {
|
| 378 |
+
unsigned int *pFlags;
|
| 379 |
+
void *p;
|
| 380 |
+
} cuMemHostGetFlags_params;
|
| 381 |
+
|
| 382 |
+
typedef struct cuMemAllocManaged_params_st {
|
| 383 |
+
CUdeviceptr *dptr;
|
| 384 |
+
size_t bytesize;
|
| 385 |
+
unsigned int flags;
|
| 386 |
+
} cuMemAllocManaged_params;
|
| 387 |
+
|
| 388 |
+
typedef struct cuDeviceGetByPCIBusId_params_st {
|
| 389 |
+
CUdevice *dev;
|
| 390 |
+
const char *pciBusId;
|
| 391 |
+
} cuDeviceGetByPCIBusId_params;
|
| 392 |
+
|
| 393 |
+
typedef struct cuDeviceGetPCIBusId_params_st {
|
| 394 |
+
char *pciBusId;
|
| 395 |
+
int len;
|
| 396 |
+
CUdevice dev;
|
| 397 |
+
} cuDeviceGetPCIBusId_params;
|
| 398 |
+
|
| 399 |
+
typedef struct cuIpcGetEventHandle_params_st {
|
| 400 |
+
CUipcEventHandle *pHandle;
|
| 401 |
+
CUevent event;
|
| 402 |
+
} cuIpcGetEventHandle_params;
|
| 403 |
+
|
| 404 |
+
typedef struct cuIpcOpenEventHandle_params_st {
|
| 405 |
+
CUevent *phEvent;
|
| 406 |
+
CUipcEventHandle handle;
|
| 407 |
+
} cuIpcOpenEventHandle_params;
|
| 408 |
+
|
| 409 |
+
typedef struct cuIpcGetMemHandle_params_st {
|
| 410 |
+
CUipcMemHandle *pHandle;
|
| 411 |
+
CUdeviceptr dptr;
|
| 412 |
+
} cuIpcGetMemHandle_params;
|
| 413 |
+
|
| 414 |
+
typedef struct cuIpcOpenMemHandle_v2_params_st {
|
| 415 |
+
CUdeviceptr *pdptr;
|
| 416 |
+
CUipcMemHandle handle;
|
| 417 |
+
unsigned int Flags;
|
| 418 |
+
} cuIpcOpenMemHandle_v2_params;
|
| 419 |
+
|
| 420 |
+
typedef struct cuIpcCloseMemHandle_params_st {
|
| 421 |
+
CUdeviceptr dptr;
|
| 422 |
+
} cuIpcCloseMemHandle_params;
|
| 423 |
+
|
| 424 |
+
typedef struct cuMemHostRegister_v2_params_st {
|
| 425 |
+
void *p;
|
| 426 |
+
size_t bytesize;
|
| 427 |
+
unsigned int Flags;
|
| 428 |
+
} cuMemHostRegister_v2_params;
|
| 429 |
+
|
| 430 |
+
typedef struct cuMemHostUnregister_params_st {
|
| 431 |
+
void *p;
|
| 432 |
+
} cuMemHostUnregister_params;
|
| 433 |
+
|
| 434 |
+
typedef struct cuMemcpy_ptds_params_st {
|
| 435 |
+
CUdeviceptr dst;
|
| 436 |
+
CUdeviceptr src;
|
| 437 |
+
size_t ByteCount;
|
| 438 |
+
} cuMemcpy_ptds_params;
|
| 439 |
+
|
| 440 |
+
typedef struct cuMemcpyPeer_ptds_params_st {
|
| 441 |
+
CUdeviceptr dstDevice;
|
| 442 |
+
CUcontext dstContext;
|
| 443 |
+
CUdeviceptr srcDevice;
|
| 444 |
+
CUcontext srcContext;
|
| 445 |
+
size_t ByteCount;
|
| 446 |
+
} cuMemcpyPeer_ptds_params;
|
| 447 |
+
|
| 448 |
+
typedef struct cuMemcpyHtoD_v2_ptds_params_st {
|
| 449 |
+
CUdeviceptr dstDevice;
|
| 450 |
+
const void *srcHost;
|
| 451 |
+
size_t ByteCount;
|
| 452 |
+
} cuMemcpyHtoD_v2_ptds_params;
|
| 453 |
+
|
| 454 |
+
typedef struct cuMemcpyDtoH_v2_ptds_params_st {
|
| 455 |
+
void *dstHost;
|
| 456 |
+
CUdeviceptr srcDevice;
|
| 457 |
+
size_t ByteCount;
|
| 458 |
+
} cuMemcpyDtoH_v2_ptds_params;
|
| 459 |
+
|
| 460 |
+
typedef struct cuMemcpyDtoD_v2_ptds_params_st {
|
| 461 |
+
CUdeviceptr dstDevice;
|
| 462 |
+
CUdeviceptr srcDevice;
|
| 463 |
+
size_t ByteCount;
|
| 464 |
+
} cuMemcpyDtoD_v2_ptds_params;
|
| 465 |
+
|
| 466 |
+
typedef struct cuMemcpyDtoA_v2_ptds_params_st {
|
| 467 |
+
CUarray dstArray;
|
| 468 |
+
size_t dstOffset;
|
| 469 |
+
CUdeviceptr srcDevice;
|
| 470 |
+
size_t ByteCount;
|
| 471 |
+
} cuMemcpyDtoA_v2_ptds_params;
|
| 472 |
+
|
| 473 |
+
typedef struct cuMemcpyAtoD_v2_ptds_params_st {
|
| 474 |
+
CUdeviceptr dstDevice;
|
| 475 |
+
CUarray srcArray;
|
| 476 |
+
size_t srcOffset;
|
| 477 |
+
size_t ByteCount;
|
| 478 |
+
} cuMemcpyAtoD_v2_ptds_params;
|
| 479 |
+
|
| 480 |
+
typedef struct cuMemcpyHtoA_v2_ptds_params_st {
|
| 481 |
+
CUarray dstArray;
|
| 482 |
+
size_t dstOffset;
|
| 483 |
+
const void *srcHost;
|
| 484 |
+
size_t ByteCount;
|
| 485 |
+
} cuMemcpyHtoA_v2_ptds_params;
|
| 486 |
+
|
| 487 |
+
typedef struct cuMemcpyAtoH_v2_ptds_params_st {
|
| 488 |
+
void *dstHost;
|
| 489 |
+
CUarray srcArray;
|
| 490 |
+
size_t srcOffset;
|
| 491 |
+
size_t ByteCount;
|
| 492 |
+
} cuMemcpyAtoH_v2_ptds_params;
|
| 493 |
+
|
| 494 |
+
typedef struct cuMemcpyAtoA_v2_ptds_params_st {
|
| 495 |
+
CUarray dstArray;
|
| 496 |
+
size_t dstOffset;
|
| 497 |
+
CUarray srcArray;
|
| 498 |
+
size_t srcOffset;
|
| 499 |
+
size_t ByteCount;
|
| 500 |
+
} cuMemcpyAtoA_v2_ptds_params;
|
| 501 |
+
|
| 502 |
+
typedef struct cuMemcpy2D_v2_ptds_params_st {
|
| 503 |
+
const CUDA_MEMCPY2D *pCopy;
|
| 504 |
+
} cuMemcpy2D_v2_ptds_params;
|
| 505 |
+
|
| 506 |
+
typedef struct cuMemcpy2DUnaligned_v2_ptds_params_st {
|
| 507 |
+
const CUDA_MEMCPY2D *pCopy;
|
| 508 |
+
} cuMemcpy2DUnaligned_v2_ptds_params;
|
| 509 |
+
|
| 510 |
+
typedef struct cuMemcpy3D_v2_ptds_params_st {
|
| 511 |
+
const CUDA_MEMCPY3D *pCopy;
|
| 512 |
+
} cuMemcpy3D_v2_ptds_params;
|
| 513 |
+
|
| 514 |
+
typedef struct cuMemcpy3DPeer_ptds_params_st {
|
| 515 |
+
const CUDA_MEMCPY3D_PEER *pCopy;
|
| 516 |
+
} cuMemcpy3DPeer_ptds_params;
|
| 517 |
+
|
| 518 |
+
typedef struct cuMemcpyAsync_ptsz_params_st {
|
| 519 |
+
CUdeviceptr dst;
|
| 520 |
+
CUdeviceptr src;
|
| 521 |
+
size_t ByteCount;
|
| 522 |
+
CUstream hStream;
|
| 523 |
+
} cuMemcpyAsync_ptsz_params;
|
| 524 |
+
|
| 525 |
+
typedef struct cuMemcpyPeerAsync_ptsz_params_st {
|
| 526 |
+
CUdeviceptr dstDevice;
|
| 527 |
+
CUcontext dstContext;
|
| 528 |
+
CUdeviceptr srcDevice;
|
| 529 |
+
CUcontext srcContext;
|
| 530 |
+
size_t ByteCount;
|
| 531 |
+
CUstream hStream;
|
| 532 |
+
} cuMemcpyPeerAsync_ptsz_params;
|
| 533 |
+
|
| 534 |
+
typedef struct cuMemcpyHtoDAsync_v2_ptsz_params_st {
|
| 535 |
+
CUdeviceptr dstDevice;
|
| 536 |
+
const void *srcHost;
|
| 537 |
+
size_t ByteCount;
|
| 538 |
+
CUstream hStream;
|
| 539 |
+
} cuMemcpyHtoDAsync_v2_ptsz_params;
|
| 540 |
+
|
| 541 |
+
typedef struct cuMemcpyDtoHAsync_v2_ptsz_params_st {
|
| 542 |
+
void *dstHost;
|
| 543 |
+
CUdeviceptr srcDevice;
|
| 544 |
+
size_t ByteCount;
|
| 545 |
+
CUstream hStream;
|
| 546 |
+
} cuMemcpyDtoHAsync_v2_ptsz_params;
|
| 547 |
+
|
| 548 |
+
typedef struct cuMemcpyDtoDAsync_v2_ptsz_params_st {
|
| 549 |
+
CUdeviceptr dstDevice;
|
| 550 |
+
CUdeviceptr srcDevice;
|
| 551 |
+
size_t ByteCount;
|
| 552 |
+
CUstream hStream;
|
| 553 |
+
} cuMemcpyDtoDAsync_v2_ptsz_params;
|
| 554 |
+
|
| 555 |
+
typedef struct cuMemcpyHtoAAsync_v2_ptsz_params_st {
|
| 556 |
+
CUarray dstArray;
|
| 557 |
+
size_t dstOffset;
|
| 558 |
+
const void *srcHost;
|
| 559 |
+
size_t ByteCount;
|
| 560 |
+
CUstream hStream;
|
| 561 |
+
} cuMemcpyHtoAAsync_v2_ptsz_params;
|
| 562 |
+
|
| 563 |
+
typedef struct cuMemcpyAtoHAsync_v2_ptsz_params_st {
|
| 564 |
+
void *dstHost;
|
| 565 |
+
CUarray srcArray;
|
| 566 |
+
size_t srcOffset;
|
| 567 |
+
size_t ByteCount;
|
| 568 |
+
CUstream hStream;
|
| 569 |
+
} cuMemcpyAtoHAsync_v2_ptsz_params;
|
| 570 |
+
|
| 571 |
+
typedef struct cuMemcpy2DAsync_v2_ptsz_params_st {
|
| 572 |
+
const CUDA_MEMCPY2D *pCopy;
|
| 573 |
+
CUstream hStream;
|
| 574 |
+
} cuMemcpy2DAsync_v2_ptsz_params;
|
| 575 |
+
|
| 576 |
+
typedef struct cuMemcpy3DAsync_v2_ptsz_params_st {
|
| 577 |
+
const CUDA_MEMCPY3D *pCopy;
|
| 578 |
+
CUstream hStream;
|
| 579 |
+
} cuMemcpy3DAsync_v2_ptsz_params;
|
| 580 |
+
|
| 581 |
+
typedef struct cuMemcpy3DPeerAsync_ptsz_params_st {
|
| 582 |
+
const CUDA_MEMCPY3D_PEER *pCopy;
|
| 583 |
+
CUstream hStream;
|
| 584 |
+
} cuMemcpy3DPeerAsync_ptsz_params;
|
| 585 |
+
|
| 586 |
+
typedef struct cuMemsetD8_v2_ptds_params_st {
|
| 587 |
+
CUdeviceptr dstDevice;
|
| 588 |
+
unsigned char uc;
|
| 589 |
+
size_t N;
|
| 590 |
+
} cuMemsetD8_v2_ptds_params;
|
| 591 |
+
|
| 592 |
+
typedef struct cuMemsetD16_v2_ptds_params_st {
|
| 593 |
+
CUdeviceptr dstDevice;
|
| 594 |
+
unsigned short us;
|
| 595 |
+
size_t N;
|
| 596 |
+
} cuMemsetD16_v2_ptds_params;
|
| 597 |
+
|
| 598 |
+
typedef struct cuMemsetD32_v2_ptds_params_st {
|
| 599 |
+
CUdeviceptr dstDevice;
|
| 600 |
+
unsigned int ui;
|
| 601 |
+
size_t N;
|
| 602 |
+
} cuMemsetD32_v2_ptds_params;
|
| 603 |
+
|
| 604 |
+
typedef struct cuMemsetD2D8_v2_ptds_params_st {
|
| 605 |
+
CUdeviceptr dstDevice;
|
| 606 |
+
size_t dstPitch;
|
| 607 |
+
unsigned char uc;
|
| 608 |
+
size_t Width;
|
| 609 |
+
size_t Height;
|
| 610 |
+
} cuMemsetD2D8_v2_ptds_params;
|
| 611 |
+
|
| 612 |
+
typedef struct cuMemsetD2D16_v2_ptds_params_st {
|
| 613 |
+
CUdeviceptr dstDevice;
|
| 614 |
+
size_t dstPitch;
|
| 615 |
+
unsigned short us;
|
| 616 |
+
size_t Width;
|
| 617 |
+
size_t Height;
|
| 618 |
+
} cuMemsetD2D16_v2_ptds_params;
|
| 619 |
+
|
| 620 |
+
typedef struct cuMemsetD2D32_v2_ptds_params_st {
|
| 621 |
+
CUdeviceptr dstDevice;
|
| 622 |
+
size_t dstPitch;
|
| 623 |
+
unsigned int ui;
|
| 624 |
+
size_t Width;
|
| 625 |
+
size_t Height;
|
| 626 |
+
} cuMemsetD2D32_v2_ptds_params;
|
| 627 |
+
|
| 628 |
+
typedef struct cuMemsetD8Async_ptsz_params_st {
|
| 629 |
+
CUdeviceptr dstDevice;
|
| 630 |
+
unsigned char uc;
|
| 631 |
+
size_t N;
|
| 632 |
+
CUstream hStream;
|
| 633 |
+
} cuMemsetD8Async_ptsz_params;
|
| 634 |
+
|
| 635 |
+
typedef struct cuMemsetD16Async_ptsz_params_st {
|
| 636 |
+
CUdeviceptr dstDevice;
|
| 637 |
+
unsigned short us;
|
| 638 |
+
size_t N;
|
| 639 |
+
CUstream hStream;
|
| 640 |
+
} cuMemsetD16Async_ptsz_params;
|
| 641 |
+
|
| 642 |
+
typedef struct cuMemsetD32Async_ptsz_params_st {
|
| 643 |
+
CUdeviceptr dstDevice;
|
| 644 |
+
unsigned int ui;
|
| 645 |
+
size_t N;
|
| 646 |
+
CUstream hStream;
|
| 647 |
+
} cuMemsetD32Async_ptsz_params;
|
| 648 |
+
|
| 649 |
+
typedef struct cuMemsetD2D8Async_ptsz_params_st {
|
| 650 |
+
CUdeviceptr dstDevice;
|
| 651 |
+
size_t dstPitch;
|
| 652 |
+
unsigned char uc;
|
| 653 |
+
size_t Width;
|
| 654 |
+
size_t Height;
|
| 655 |
+
CUstream hStream;
|
| 656 |
+
} cuMemsetD2D8Async_ptsz_params;
|
| 657 |
+
|
| 658 |
+
typedef struct cuMemsetD2D16Async_ptsz_params_st {
|
| 659 |
+
CUdeviceptr dstDevice;
|
| 660 |
+
size_t dstPitch;
|
| 661 |
+
unsigned short us;
|
| 662 |
+
size_t Width;
|
| 663 |
+
size_t Height;
|
| 664 |
+
CUstream hStream;
|
| 665 |
+
} cuMemsetD2D16Async_ptsz_params;
|
| 666 |
+
|
| 667 |
+
typedef struct cuMemsetD2D32Async_ptsz_params_st {
|
| 668 |
+
CUdeviceptr dstDevice;
|
| 669 |
+
size_t dstPitch;
|
| 670 |
+
unsigned int ui;
|
| 671 |
+
size_t Width;
|
| 672 |
+
size_t Height;
|
| 673 |
+
CUstream hStream;
|
| 674 |
+
} cuMemsetD2D32Async_ptsz_params;
|
| 675 |
+
|
| 676 |
+
typedef struct cuArrayCreate_v2_params_st {
|
| 677 |
+
CUarray *pHandle;
|
| 678 |
+
const CUDA_ARRAY_DESCRIPTOR *pAllocateArray;
|
| 679 |
+
} cuArrayCreate_v2_params;
|
| 680 |
+
|
| 681 |
+
typedef struct cuArrayGetDescriptor_v2_params_st {
|
| 682 |
+
CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor;
|
| 683 |
+
CUarray hArray;
|
| 684 |
+
} cuArrayGetDescriptor_v2_params;
|
| 685 |
+
|
| 686 |
+
typedef struct cuArrayGetSparseProperties_params_st {
|
| 687 |
+
CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties;
|
| 688 |
+
CUarray array;
|
| 689 |
+
} cuArrayGetSparseProperties_params;
|
| 690 |
+
|
| 691 |
+
typedef struct cuMipmappedArrayGetSparseProperties_params_st {
|
| 692 |
+
CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties;
|
| 693 |
+
CUmipmappedArray mipmap;
|
| 694 |
+
} cuMipmappedArrayGetSparseProperties_params;
|
| 695 |
+
|
| 696 |
+
typedef struct cuArrayGetMemoryRequirements_params_st {
|
| 697 |
+
CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements;
|
| 698 |
+
CUarray array;
|
| 699 |
+
CUdevice device;
|
| 700 |
+
} cuArrayGetMemoryRequirements_params;
|
| 701 |
+
|
| 702 |
+
typedef struct cuMipmappedArrayGetMemoryRequirements_params_st {
|
| 703 |
+
CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements;
|
| 704 |
+
CUmipmappedArray mipmap;
|
| 705 |
+
CUdevice device;
|
| 706 |
+
} cuMipmappedArrayGetMemoryRequirements_params;
|
| 707 |
+
|
| 708 |
+
typedef struct cuArrayGetPlane_params_st {
|
| 709 |
+
CUarray *pPlaneArray;
|
| 710 |
+
CUarray hArray;
|
| 711 |
+
unsigned int planeIdx;
|
| 712 |
+
} cuArrayGetPlane_params;
|
| 713 |
+
|
| 714 |
+
typedef struct cuArrayDestroy_params_st {
|
| 715 |
+
CUarray hArray;
|
| 716 |
+
} cuArrayDestroy_params;
|
| 717 |
+
|
| 718 |
+
typedef struct cuArray3DCreate_v2_params_st {
|
| 719 |
+
CUarray *pHandle;
|
| 720 |
+
const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray;
|
| 721 |
+
} cuArray3DCreate_v2_params;
|
| 722 |
+
|
| 723 |
+
typedef struct cuArray3DGetDescriptor_v2_params_st {
|
| 724 |
+
CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor;
|
| 725 |
+
CUarray hArray;
|
| 726 |
+
} cuArray3DGetDescriptor_v2_params;
|
| 727 |
+
|
| 728 |
+
typedef struct cuMipmappedArrayCreate_params_st {
|
| 729 |
+
CUmipmappedArray *pHandle;
|
| 730 |
+
const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc;
|
| 731 |
+
unsigned int numMipmapLevels;
|
| 732 |
+
} cuMipmappedArrayCreate_params;
|
| 733 |
+
|
| 734 |
+
typedef struct cuMipmappedArrayGetLevel_params_st {
|
| 735 |
+
CUarray *pLevelArray;
|
| 736 |
+
CUmipmappedArray hMipmappedArray;
|
| 737 |
+
unsigned int level;
|
| 738 |
+
} cuMipmappedArrayGetLevel_params;
|
| 739 |
+
|
| 740 |
+
typedef struct cuMipmappedArrayDestroy_params_st {
|
| 741 |
+
CUmipmappedArray hMipmappedArray;
|
| 742 |
+
} cuMipmappedArrayDestroy_params;
|
| 743 |
+
|
| 744 |
+
typedef struct cuMemAddressReserve_params_st {
|
| 745 |
+
CUdeviceptr *ptr;
|
| 746 |
+
size_t size;
|
| 747 |
+
size_t alignment;
|
| 748 |
+
CUdeviceptr addr;
|
| 749 |
+
unsigned long long flags;
|
| 750 |
+
} cuMemAddressReserve_params;
|
| 751 |
+
|
| 752 |
+
typedef struct cuMemAddressFree_params_st {
|
| 753 |
+
CUdeviceptr ptr;
|
| 754 |
+
size_t size;
|
| 755 |
+
} cuMemAddressFree_params;
|
| 756 |
+
|
| 757 |
+
typedef struct cuMemCreate_params_st {
|
| 758 |
+
CUmemGenericAllocationHandle *handle;
|
| 759 |
+
size_t size;
|
| 760 |
+
const CUmemAllocationProp *prop;
|
| 761 |
+
unsigned long long flags;
|
| 762 |
+
} cuMemCreate_params;
|
| 763 |
+
|
| 764 |
+
typedef struct cuMemRelease_params_st {
|
| 765 |
+
CUmemGenericAllocationHandle handle;
|
| 766 |
+
} cuMemRelease_params;
|
| 767 |
+
|
| 768 |
+
typedef struct cuMemMap_params_st {
|
| 769 |
+
CUdeviceptr ptr;
|
| 770 |
+
size_t size;
|
| 771 |
+
size_t offset;
|
| 772 |
+
CUmemGenericAllocationHandle handle;
|
| 773 |
+
unsigned long long flags;
|
| 774 |
+
} cuMemMap_params;
|
| 775 |
+
|
| 776 |
+
typedef struct cuMemMapArrayAsync_ptsz_params_st {
|
| 777 |
+
CUarrayMapInfo *mapInfoList;
|
| 778 |
+
unsigned int count;
|
| 779 |
+
CUstream hStream;
|
| 780 |
+
} cuMemMapArrayAsync_ptsz_params;
|
| 781 |
+
|
| 782 |
+
typedef struct cuMemUnmap_params_st {
|
| 783 |
+
CUdeviceptr ptr;
|
| 784 |
+
size_t size;
|
| 785 |
+
} cuMemUnmap_params;
|
| 786 |
+
|
| 787 |
+
typedef struct cuMemSetAccess_params_st {
|
| 788 |
+
CUdeviceptr ptr;
|
| 789 |
+
size_t size;
|
| 790 |
+
const CUmemAccessDesc *desc;
|
| 791 |
+
size_t count;
|
| 792 |
+
} cuMemSetAccess_params;
|
| 793 |
+
|
| 794 |
+
typedef struct cuMemGetAccess_params_st {
|
| 795 |
+
unsigned long long *flags;
|
| 796 |
+
const CUmemLocation *location;
|
| 797 |
+
CUdeviceptr ptr;
|
| 798 |
+
} cuMemGetAccess_params;
|
| 799 |
+
|
| 800 |
+
typedef struct cuMemExportToShareableHandle_params_st {
|
| 801 |
+
void *shareableHandle;
|
| 802 |
+
CUmemGenericAllocationHandle handle;
|
| 803 |
+
CUmemAllocationHandleType handleType;
|
| 804 |
+
unsigned long long flags;
|
| 805 |
+
} cuMemExportToShareableHandle_params;
|
| 806 |
+
|
| 807 |
+
typedef struct cuMemImportFromShareableHandle_params_st {
|
| 808 |
+
CUmemGenericAllocationHandle *handle;
|
| 809 |
+
void *osHandle;
|
| 810 |
+
CUmemAllocationHandleType shHandleType;
|
| 811 |
+
} cuMemImportFromShareableHandle_params;
|
| 812 |
+
|
| 813 |
+
typedef struct cuMemGetAllocationGranularity_params_st {
|
| 814 |
+
size_t *granularity;
|
| 815 |
+
const CUmemAllocationProp *prop;
|
| 816 |
+
CUmemAllocationGranularity_flags option;
|
| 817 |
+
} cuMemGetAllocationGranularity_params;
|
| 818 |
+
|
| 819 |
+
typedef struct cuMemGetAllocationPropertiesFromHandle_params_st {
|
| 820 |
+
CUmemAllocationProp *prop;
|
| 821 |
+
CUmemGenericAllocationHandle handle;
|
| 822 |
+
} cuMemGetAllocationPropertiesFromHandle_params;
|
| 823 |
+
|
| 824 |
+
typedef struct cuMemRetainAllocationHandle_params_st {
|
| 825 |
+
CUmemGenericAllocationHandle *handle;
|
| 826 |
+
void *addr;
|
| 827 |
+
} cuMemRetainAllocationHandle_params;
|
| 828 |
+
|
| 829 |
+
typedef struct cuMemFreeAsync_ptsz_params_st {
|
| 830 |
+
CUdeviceptr dptr;
|
| 831 |
+
CUstream hStream;
|
| 832 |
+
} cuMemFreeAsync_ptsz_params;
|
| 833 |
+
|
| 834 |
+
typedef struct cuMemAllocAsync_ptsz_params_st {
|
| 835 |
+
CUdeviceptr *dptr;
|
| 836 |
+
size_t bytesize;
|
| 837 |
+
CUstream hStream;
|
| 838 |
+
} cuMemAllocAsync_ptsz_params;
|
| 839 |
+
|
| 840 |
+
typedef struct cuMemPoolTrimTo_params_st {
|
| 841 |
+
CUmemoryPool pool;
|
| 842 |
+
size_t minBytesToKeep;
|
| 843 |
+
} cuMemPoolTrimTo_params;
|
| 844 |
+
|
| 845 |
+
typedef struct cuMemPoolSetAttribute_params_st {
|
| 846 |
+
CUmemoryPool pool;
|
| 847 |
+
CUmemPool_attribute attr;
|
| 848 |
+
void *value;
|
| 849 |
+
} cuMemPoolSetAttribute_params;
|
| 850 |
+
|
| 851 |
+
typedef struct cuMemPoolGetAttribute_params_st {
|
| 852 |
+
CUmemoryPool pool;
|
| 853 |
+
CUmemPool_attribute attr;
|
| 854 |
+
void *value;
|
| 855 |
+
} cuMemPoolGetAttribute_params;
|
| 856 |
+
|
| 857 |
+
typedef struct cuMemPoolSetAccess_params_st {
|
| 858 |
+
CUmemoryPool pool;
|
| 859 |
+
const CUmemAccessDesc *map;
|
| 860 |
+
size_t count;
|
| 861 |
+
} cuMemPoolSetAccess_params;
|
| 862 |
+
|
| 863 |
+
typedef struct cuMemPoolGetAccess_params_st {
|
| 864 |
+
CUmemAccess_flags *flags;
|
| 865 |
+
CUmemoryPool memPool;
|
| 866 |
+
CUmemLocation *location;
|
| 867 |
+
} cuMemPoolGetAccess_params;
|
| 868 |
+
|
| 869 |
+
typedef struct cuMemPoolCreate_params_st {
|
| 870 |
+
CUmemoryPool *pool;
|
| 871 |
+
const CUmemPoolProps *poolProps;
|
| 872 |
+
} cuMemPoolCreate_params;
|
| 873 |
+
|
| 874 |
+
typedef struct cuMemPoolDestroy_params_st {
|
| 875 |
+
CUmemoryPool pool;
|
| 876 |
+
} cuMemPoolDestroy_params;
|
| 877 |
+
|
| 878 |
+
typedef struct cuMemAllocFromPoolAsync_ptsz_params_st {
|
| 879 |
+
CUdeviceptr *dptr;
|
| 880 |
+
size_t bytesize;
|
| 881 |
+
CUmemoryPool pool;
|
| 882 |
+
CUstream hStream;
|
| 883 |
+
} cuMemAllocFromPoolAsync_ptsz_params;
|
| 884 |
+
|
| 885 |
+
typedef struct cuMemPoolExportToShareableHandle_params_st {
|
| 886 |
+
void *handle_out;
|
| 887 |
+
CUmemoryPool pool;
|
| 888 |
+
CUmemAllocationHandleType handleType;
|
| 889 |
+
unsigned long long flags;
|
| 890 |
+
} cuMemPoolExportToShareableHandle_params;
|
| 891 |
+
|
| 892 |
+
typedef struct cuMemPoolImportFromShareableHandle_params_st {
|
| 893 |
+
CUmemoryPool *pool_out;
|
| 894 |
+
void *handle;
|
| 895 |
+
CUmemAllocationHandleType handleType;
|
| 896 |
+
unsigned long long flags;
|
| 897 |
+
} cuMemPoolImportFromShareableHandle_params;
|
| 898 |
+
|
| 899 |
+
typedef struct cuMemPoolExportPointer_params_st {
|
| 900 |
+
CUmemPoolPtrExportData *shareData_out;
|
| 901 |
+
CUdeviceptr ptr;
|
| 902 |
+
} cuMemPoolExportPointer_params;
|
| 903 |
+
|
| 904 |
+
typedef struct cuMemPoolImportPointer_params_st {
|
| 905 |
+
CUdeviceptr *ptr_out;
|
| 906 |
+
CUmemoryPool pool;
|
| 907 |
+
CUmemPoolPtrExportData *shareData;
|
| 908 |
+
} cuMemPoolImportPointer_params;
|
| 909 |
+
|
| 910 |
+
typedef struct cuPointerGetAttribute_params_st {
|
| 911 |
+
void *data;
|
| 912 |
+
CUpointer_attribute attribute;
|
| 913 |
+
CUdeviceptr ptr;
|
| 914 |
+
} cuPointerGetAttribute_params;
|
| 915 |
+
|
| 916 |
+
typedef struct cuMemPrefetchAsync_ptsz_params_st {
|
| 917 |
+
CUdeviceptr devPtr;
|
| 918 |
+
size_t count;
|
| 919 |
+
CUdevice dstDevice;
|
| 920 |
+
CUstream hStream;
|
| 921 |
+
} cuMemPrefetchAsync_ptsz_params;
|
| 922 |
+
|
| 923 |
+
typedef struct cuMemAdvise_params_st {
|
| 924 |
+
CUdeviceptr devPtr;
|
| 925 |
+
size_t count;
|
| 926 |
+
CUmem_advise advice;
|
| 927 |
+
CUdevice device;
|
| 928 |
+
} cuMemAdvise_params;
|
| 929 |
+
|
| 930 |
+
typedef struct cuMemRangeGetAttribute_params_st {
|
| 931 |
+
void *data;
|
| 932 |
+
size_t dataSize;
|
| 933 |
+
CUmem_range_attribute attribute;
|
| 934 |
+
CUdeviceptr devPtr;
|
| 935 |
+
size_t count;
|
| 936 |
+
} cuMemRangeGetAttribute_params;
|
| 937 |
+
|
| 938 |
+
typedef struct cuMemRangeGetAttributes_params_st {
|
| 939 |
+
void **data;
|
| 940 |
+
size_t *dataSizes;
|
| 941 |
+
CUmem_range_attribute *attributes;
|
| 942 |
+
size_t numAttributes;
|
| 943 |
+
CUdeviceptr devPtr;
|
| 944 |
+
size_t count;
|
| 945 |
+
} cuMemRangeGetAttributes_params;
|
| 946 |
+
|
| 947 |
+
typedef struct cuPointerSetAttribute_params_st {
|
| 948 |
+
const void *value;
|
| 949 |
+
CUpointer_attribute attribute;
|
| 950 |
+
CUdeviceptr ptr;
|
| 951 |
+
} cuPointerSetAttribute_params;
|
| 952 |
+
|
| 953 |
+
typedef struct cuPointerGetAttributes_params_st {
|
| 954 |
+
unsigned int numAttributes;
|
| 955 |
+
CUpointer_attribute *attributes;
|
| 956 |
+
void **data;
|
| 957 |
+
CUdeviceptr ptr;
|
| 958 |
+
} cuPointerGetAttributes_params;
|
| 959 |
+
|
| 960 |
+
typedef struct cuStreamCreate_params_st {
|
| 961 |
+
CUstream *phStream;
|
| 962 |
+
unsigned int Flags;
|
| 963 |
+
} cuStreamCreate_params;
|
| 964 |
+
|
| 965 |
+
typedef struct cuStreamCreateWithPriority_params_st {
|
| 966 |
+
CUstream *phStream;
|
| 967 |
+
unsigned int flags;
|
| 968 |
+
int priority;
|
| 969 |
+
} cuStreamCreateWithPriority_params;
|
| 970 |
+
|
| 971 |
+
typedef struct cuStreamGetPriority_ptsz_params_st {
|
| 972 |
+
CUstream hStream;
|
| 973 |
+
int *priority;
|
| 974 |
+
} cuStreamGetPriority_ptsz_params;
|
| 975 |
+
|
| 976 |
+
typedef struct cuStreamGetFlags_ptsz_params_st {
|
| 977 |
+
CUstream hStream;
|
| 978 |
+
unsigned int *flags;
|
| 979 |
+
} cuStreamGetFlags_ptsz_params;
|
| 980 |
+
|
| 981 |
+
typedef struct cuStreamGetCtx_ptsz_params_st {
|
| 982 |
+
CUstream hStream;
|
| 983 |
+
CUcontext *pctx;
|
| 984 |
+
} cuStreamGetCtx_ptsz_params;
|
| 985 |
+
|
| 986 |
+
typedef struct cuStreamWaitEvent_ptsz_params_st {
|
| 987 |
+
CUstream hStream;
|
| 988 |
+
CUevent hEvent;
|
| 989 |
+
unsigned int Flags;
|
| 990 |
+
} cuStreamWaitEvent_ptsz_params;
|
| 991 |
+
|
| 992 |
+
typedef struct cuStreamAddCallback_ptsz_params_st {
|
| 993 |
+
CUstream hStream;
|
| 994 |
+
CUstreamCallback callback;
|
| 995 |
+
void *userData;
|
| 996 |
+
unsigned int flags;
|
| 997 |
+
} cuStreamAddCallback_ptsz_params;
|
| 998 |
+
|
| 999 |
+
typedef struct cuStreamBeginCapture_v2_ptsz_params_st {
|
| 1000 |
+
CUstream hStream;
|
| 1001 |
+
CUstreamCaptureMode mode;
|
| 1002 |
+
} cuStreamBeginCapture_v2_ptsz_params;
|
| 1003 |
+
|
| 1004 |
+
typedef struct cuThreadExchangeStreamCaptureMode_params_st {
|
| 1005 |
+
CUstreamCaptureMode *mode;
|
| 1006 |
+
} cuThreadExchangeStreamCaptureMode_params;
|
| 1007 |
+
|
| 1008 |
+
typedef struct cuStreamEndCapture_ptsz_params_st {
|
| 1009 |
+
CUstream hStream;
|
| 1010 |
+
CUgraph *phGraph;
|
| 1011 |
+
} cuStreamEndCapture_ptsz_params;
|
| 1012 |
+
|
| 1013 |
+
typedef struct cuStreamIsCapturing_ptsz_params_st {
|
| 1014 |
+
CUstream hStream;
|
| 1015 |
+
CUstreamCaptureStatus *captureStatus;
|
| 1016 |
+
} cuStreamIsCapturing_ptsz_params;
|
| 1017 |
+
|
| 1018 |
+
typedef struct cuStreamGetCaptureInfo_ptsz_params_st {
|
| 1019 |
+
CUstream hStream;
|
| 1020 |
+
CUstreamCaptureStatus *captureStatus_out;
|
| 1021 |
+
cuuint64_t *id_out;
|
| 1022 |
+
} cuStreamGetCaptureInfo_ptsz_params;
|
| 1023 |
+
|
| 1024 |
+
typedef struct cuStreamGetCaptureInfo_v2_ptsz_params_st {
|
| 1025 |
+
CUstream hStream;
|
| 1026 |
+
CUstreamCaptureStatus *captureStatus_out;
|
| 1027 |
+
cuuint64_t *id_out;
|
| 1028 |
+
CUgraph *graph_out;
|
| 1029 |
+
const CUgraphNode **dependencies_out;
|
| 1030 |
+
size_t *numDependencies_out;
|
| 1031 |
+
} cuStreamGetCaptureInfo_v2_ptsz_params;
|
| 1032 |
+
|
| 1033 |
+
typedef struct cuStreamUpdateCaptureDependencies_ptsz_params_st {
|
| 1034 |
+
CUstream hStream;
|
| 1035 |
+
CUgraphNode *dependencies;
|
| 1036 |
+
size_t numDependencies;
|
| 1037 |
+
unsigned int flags;
|
| 1038 |
+
} cuStreamUpdateCaptureDependencies_ptsz_params;
|
| 1039 |
+
|
| 1040 |
+
typedef struct cuStreamAttachMemAsync_ptsz_params_st {
|
| 1041 |
+
CUstream hStream;
|
| 1042 |
+
CUdeviceptr dptr;
|
| 1043 |
+
size_t length;
|
| 1044 |
+
unsigned int flags;
|
| 1045 |
+
} cuStreamAttachMemAsync_ptsz_params;
|
| 1046 |
+
|
| 1047 |
+
typedef struct cuStreamQuery_ptsz_params_st {
|
| 1048 |
+
CUstream hStream;
|
| 1049 |
+
} cuStreamQuery_ptsz_params;
|
| 1050 |
+
|
| 1051 |
+
typedef struct cuStreamSynchronize_ptsz_params_st {
|
| 1052 |
+
CUstream hStream;
|
| 1053 |
+
} cuStreamSynchronize_ptsz_params;
|
| 1054 |
+
|
| 1055 |
+
typedef struct cuStreamDestroy_v2_params_st {
|
| 1056 |
+
CUstream hStream;
|
| 1057 |
+
} cuStreamDestroy_v2_params;
|
| 1058 |
+
|
| 1059 |
+
typedef struct cuStreamCopyAttributes_ptsz_params_st {
|
| 1060 |
+
CUstream dst;
|
| 1061 |
+
CUstream src;
|
| 1062 |
+
} cuStreamCopyAttributes_ptsz_params;
|
| 1063 |
+
|
| 1064 |
+
typedef struct cuStreamGetAttribute_ptsz_params_st {
|
| 1065 |
+
CUstream hStream;
|
| 1066 |
+
CUstreamAttrID attr;
|
| 1067 |
+
CUstreamAttrValue *value_out;
|
| 1068 |
+
} cuStreamGetAttribute_ptsz_params;
|
| 1069 |
+
|
| 1070 |
+
typedef struct cuStreamSetAttribute_ptsz_params_st {
|
| 1071 |
+
CUstream hStream;
|
| 1072 |
+
CUstreamAttrID attr;
|
| 1073 |
+
const CUstreamAttrValue *value;
|
| 1074 |
+
} cuStreamSetAttribute_ptsz_params;
|
| 1075 |
+
|
| 1076 |
+
typedef struct cuEventCreate_params_st {
|
| 1077 |
+
CUevent *phEvent;
|
| 1078 |
+
unsigned int Flags;
|
| 1079 |
+
} cuEventCreate_params;
|
| 1080 |
+
|
| 1081 |
+
typedef struct cuEventRecord_ptsz_params_st {
|
| 1082 |
+
CUevent hEvent;
|
| 1083 |
+
CUstream hStream;
|
| 1084 |
+
} cuEventRecord_ptsz_params;
|
| 1085 |
+
|
| 1086 |
+
typedef struct cuEventRecordWithFlags_ptsz_params_st {
|
| 1087 |
+
CUevent hEvent;
|
| 1088 |
+
CUstream hStream;
|
| 1089 |
+
unsigned int flags;
|
| 1090 |
+
} cuEventRecordWithFlags_ptsz_params;
|
| 1091 |
+
|
| 1092 |
+
typedef struct cuEventQuery_params_st {
|
| 1093 |
+
CUevent hEvent;
|
| 1094 |
+
} cuEventQuery_params;
|
| 1095 |
+
|
| 1096 |
+
typedef struct cuEventSynchronize_params_st {
|
| 1097 |
+
CUevent hEvent;
|
| 1098 |
+
} cuEventSynchronize_params;
|
| 1099 |
+
|
| 1100 |
+
typedef struct cuEventDestroy_v2_params_st {
|
| 1101 |
+
CUevent hEvent;
|
| 1102 |
+
} cuEventDestroy_v2_params;
|
| 1103 |
+
|
| 1104 |
+
typedef struct cuEventElapsedTime_params_st {
|
| 1105 |
+
float *pMilliseconds;
|
| 1106 |
+
CUevent hStart;
|
| 1107 |
+
CUevent hEnd;
|
| 1108 |
+
} cuEventElapsedTime_params;
|
| 1109 |
+
|
| 1110 |
+
typedef struct cuImportExternalMemory_params_st {
|
| 1111 |
+
CUexternalMemory *extMem_out;
|
| 1112 |
+
const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc;
|
| 1113 |
+
} cuImportExternalMemory_params;
|
| 1114 |
+
|
| 1115 |
+
typedef struct cuExternalMemoryGetMappedBuffer_params_st {
|
| 1116 |
+
CUdeviceptr *devPtr;
|
| 1117 |
+
CUexternalMemory extMem;
|
| 1118 |
+
const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc;
|
| 1119 |
+
} cuExternalMemoryGetMappedBuffer_params;
|
| 1120 |
+
|
| 1121 |
+
typedef struct cuExternalMemoryGetMappedMipmappedArray_params_st {
|
| 1122 |
+
CUmipmappedArray *mipmap;
|
| 1123 |
+
CUexternalMemory extMem;
|
| 1124 |
+
const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc;
|
| 1125 |
+
} cuExternalMemoryGetMappedMipmappedArray_params;
|
| 1126 |
+
|
| 1127 |
+
typedef struct cuDestroyExternalMemory_params_st {
|
| 1128 |
+
CUexternalMemory extMem;
|
| 1129 |
+
} cuDestroyExternalMemory_params;
|
| 1130 |
+
|
| 1131 |
+
typedef struct cuImportExternalSemaphore_params_st {
|
| 1132 |
+
CUexternalSemaphore *extSem_out;
|
| 1133 |
+
const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc;
|
| 1134 |
+
} cuImportExternalSemaphore_params;
|
| 1135 |
+
|
| 1136 |
+
typedef struct cuSignalExternalSemaphoresAsync_ptsz_params_st {
|
| 1137 |
+
const CUexternalSemaphore *extSemArray;
|
| 1138 |
+
const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray;
|
| 1139 |
+
unsigned int numExtSems;
|
| 1140 |
+
CUstream stream;
|
| 1141 |
+
} cuSignalExternalSemaphoresAsync_ptsz_params;
|
| 1142 |
+
|
| 1143 |
+
typedef struct cuWaitExternalSemaphoresAsync_ptsz_params_st {
|
| 1144 |
+
const CUexternalSemaphore *extSemArray;
|
| 1145 |
+
const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray;
|
| 1146 |
+
unsigned int numExtSems;
|
| 1147 |
+
CUstream stream;
|
| 1148 |
+
} cuWaitExternalSemaphoresAsync_ptsz_params;
|
| 1149 |
+
|
| 1150 |
+
typedef struct cuDestroyExternalSemaphore_params_st {
|
| 1151 |
+
CUexternalSemaphore extSem;
|
| 1152 |
+
} cuDestroyExternalSemaphore_params;
|
| 1153 |
+
|
| 1154 |
+
typedef struct cuStreamWaitValue32_ptsz_params_st {
|
| 1155 |
+
CUstream stream;
|
| 1156 |
+
CUdeviceptr addr;
|
| 1157 |
+
cuuint32_t value;
|
| 1158 |
+
unsigned int flags;
|
| 1159 |
+
} cuStreamWaitValue32_ptsz_params;
|
| 1160 |
+
|
| 1161 |
+
typedef struct cuStreamWaitValue64_ptsz_params_st {
|
| 1162 |
+
CUstream stream;
|
| 1163 |
+
CUdeviceptr addr;
|
| 1164 |
+
cuuint64_t value;
|
| 1165 |
+
unsigned int flags;
|
| 1166 |
+
} cuStreamWaitValue64_ptsz_params;
|
| 1167 |
+
|
| 1168 |
+
typedef struct cuStreamWriteValue32_ptsz_params_st {
|
| 1169 |
+
CUstream stream;
|
| 1170 |
+
CUdeviceptr addr;
|
| 1171 |
+
cuuint32_t value;
|
| 1172 |
+
unsigned int flags;
|
| 1173 |
+
} cuStreamWriteValue32_ptsz_params;
|
| 1174 |
+
|
| 1175 |
+
typedef struct cuStreamWriteValue64_ptsz_params_st {
|
| 1176 |
+
CUstream stream;
|
| 1177 |
+
CUdeviceptr addr;
|
| 1178 |
+
cuuint64_t value;
|
| 1179 |
+
unsigned int flags;
|
| 1180 |
+
} cuStreamWriteValue64_ptsz_params;
|
| 1181 |
+
|
| 1182 |
+
typedef struct cuStreamBatchMemOp_ptsz_params_st {
|
| 1183 |
+
CUstream stream;
|
| 1184 |
+
unsigned int count;
|
| 1185 |
+
CUstreamBatchMemOpParams *paramArray;
|
| 1186 |
+
unsigned int flags;
|
| 1187 |
+
} cuStreamBatchMemOp_ptsz_params;
|
| 1188 |
+
|
| 1189 |
+
typedef struct cuFuncGetAttribute_params_st {
|
| 1190 |
+
int *pi;
|
| 1191 |
+
CUfunction_attribute attrib;
|
| 1192 |
+
CUfunction hfunc;
|
| 1193 |
+
} cuFuncGetAttribute_params;
|
| 1194 |
+
|
| 1195 |
+
typedef struct cuFuncSetAttribute_params_st {
|
| 1196 |
+
CUfunction hfunc;
|
| 1197 |
+
CUfunction_attribute attrib;
|
| 1198 |
+
int value;
|
| 1199 |
+
} cuFuncSetAttribute_params;
|
| 1200 |
+
|
| 1201 |
+
typedef struct cuFuncSetCacheConfig_params_st {
|
| 1202 |
+
CUfunction hfunc;
|
| 1203 |
+
CUfunc_cache config;
|
| 1204 |
+
} cuFuncSetCacheConfig_params;
|
| 1205 |
+
|
| 1206 |
+
typedef struct cuFuncSetSharedMemConfig_params_st {
|
| 1207 |
+
CUfunction hfunc;
|
| 1208 |
+
CUsharedconfig config;
|
| 1209 |
+
} cuFuncSetSharedMemConfig_params;
|
| 1210 |
+
|
| 1211 |
+
typedef struct cuFuncGetModule_params_st {
|
| 1212 |
+
CUmodule *hmod;
|
| 1213 |
+
CUfunction hfunc;
|
| 1214 |
+
} cuFuncGetModule_params;
|
| 1215 |
+
|
| 1216 |
+
typedef struct cuLaunchKernel_ptsz_params_st {
|
| 1217 |
+
CUfunction f;
|
| 1218 |
+
unsigned int gridDimX;
|
| 1219 |
+
unsigned int gridDimY;
|
| 1220 |
+
unsigned int gridDimZ;
|
| 1221 |
+
unsigned int blockDimX;
|
| 1222 |
+
unsigned int blockDimY;
|
| 1223 |
+
unsigned int blockDimZ;
|
| 1224 |
+
unsigned int sharedMemBytes;
|
| 1225 |
+
CUstream hStream;
|
| 1226 |
+
void **kernelParams;
|
| 1227 |
+
void **extra;
|
| 1228 |
+
} cuLaunchKernel_ptsz_params;
|
| 1229 |
+
|
| 1230 |
+
typedef struct cuLaunchKernelEx_ptsz_params_st {
|
| 1231 |
+
const CUlaunchConfig *config;
|
| 1232 |
+
CUfunction f;
|
| 1233 |
+
void **kernelParams;
|
| 1234 |
+
void **extra;
|
| 1235 |
+
} cuLaunchKernelEx_ptsz_params;
|
| 1236 |
+
|
| 1237 |
+
typedef struct cuLaunchCooperativeKernel_ptsz_params_st {
|
| 1238 |
+
CUfunction f;
|
| 1239 |
+
unsigned int gridDimX;
|
| 1240 |
+
unsigned int gridDimY;
|
| 1241 |
+
unsigned int gridDimZ;
|
| 1242 |
+
unsigned int blockDimX;
|
| 1243 |
+
unsigned int blockDimY;
|
| 1244 |
+
unsigned int blockDimZ;
|
| 1245 |
+
unsigned int sharedMemBytes;
|
| 1246 |
+
CUstream hStream;
|
| 1247 |
+
void **kernelParams;
|
| 1248 |
+
} cuLaunchCooperativeKernel_ptsz_params;
|
| 1249 |
+
|
| 1250 |
+
typedef struct cuLaunchCooperativeKernelMultiDevice_params_st {
|
| 1251 |
+
CUDA_LAUNCH_PARAMS *launchParamsList;
|
| 1252 |
+
unsigned int numDevices;
|
| 1253 |
+
unsigned int flags;
|
| 1254 |
+
} cuLaunchCooperativeKernelMultiDevice_params;
|
| 1255 |
+
|
| 1256 |
+
typedef struct cuLaunchHostFunc_ptsz_params_st {
|
| 1257 |
+
CUstream hStream;
|
| 1258 |
+
CUhostFn fn;
|
| 1259 |
+
void *userData;
|
| 1260 |
+
} cuLaunchHostFunc_ptsz_params;
|
| 1261 |
+
|
| 1262 |
+
typedef struct cuFuncSetBlockShape_params_st {
|
| 1263 |
+
CUfunction hfunc;
|
| 1264 |
+
int x;
|
| 1265 |
+
int y;
|
| 1266 |
+
int z;
|
| 1267 |
+
} cuFuncSetBlockShape_params;
|
| 1268 |
+
|
| 1269 |
+
typedef struct cuFuncSetSharedSize_params_st {
|
| 1270 |
+
CUfunction hfunc;
|
| 1271 |
+
unsigned int bytes;
|
| 1272 |
+
} cuFuncSetSharedSize_params;
|
| 1273 |
+
|
| 1274 |
+
typedef struct cuParamSetSize_params_st {
|
| 1275 |
+
CUfunction hfunc;
|
| 1276 |
+
unsigned int numbytes;
|
| 1277 |
+
} cuParamSetSize_params;
|
| 1278 |
+
|
| 1279 |
+
typedef struct cuParamSeti_params_st {
|
| 1280 |
+
CUfunction hfunc;
|
| 1281 |
+
int offset;
|
| 1282 |
+
unsigned int value;
|
| 1283 |
+
} cuParamSeti_params;
|
| 1284 |
+
|
| 1285 |
+
typedef struct cuParamSetf_params_st {
|
| 1286 |
+
CUfunction hfunc;
|
| 1287 |
+
int offset;
|
| 1288 |
+
float value;
|
| 1289 |
+
} cuParamSetf_params;
|
| 1290 |
+
|
| 1291 |
+
typedef struct cuParamSetv_params_st {
|
| 1292 |
+
CUfunction hfunc;
|
| 1293 |
+
int offset;
|
| 1294 |
+
void *ptr;
|
| 1295 |
+
unsigned int numbytes;
|
| 1296 |
+
} cuParamSetv_params;
|
| 1297 |
+
|
| 1298 |
+
typedef struct cuLaunch_params_st {
|
| 1299 |
+
CUfunction f;
|
| 1300 |
+
} cuLaunch_params;
|
| 1301 |
+
|
| 1302 |
+
typedef struct cuLaunchGrid_params_st {
|
| 1303 |
+
CUfunction f;
|
| 1304 |
+
int grid_width;
|
| 1305 |
+
int grid_height;
|
| 1306 |
+
} cuLaunchGrid_params;
|
| 1307 |
+
|
| 1308 |
+
typedef struct cuLaunchGridAsync_params_st {
|
| 1309 |
+
CUfunction f;
|
| 1310 |
+
int grid_width;
|
| 1311 |
+
int grid_height;
|
| 1312 |
+
CUstream hStream;
|
| 1313 |
+
} cuLaunchGridAsync_params;
|
| 1314 |
+
|
| 1315 |
+
typedef struct cuParamSetTexRef_params_st {
|
| 1316 |
+
CUfunction hfunc;
|
| 1317 |
+
int texunit;
|
| 1318 |
+
CUtexref hTexRef;
|
| 1319 |
+
} cuParamSetTexRef_params;
|
| 1320 |
+
|
| 1321 |
+
typedef struct cuGraphCreate_params_st {
|
| 1322 |
+
CUgraph *phGraph;
|
| 1323 |
+
unsigned int flags;
|
| 1324 |
+
} cuGraphCreate_params;
|
| 1325 |
+
|
| 1326 |
+
typedef struct cuGraphAddKernelNode_params_st {
|
| 1327 |
+
CUgraphNode *phGraphNode;
|
| 1328 |
+
CUgraph hGraph;
|
| 1329 |
+
const CUgraphNode *dependencies;
|
| 1330 |
+
size_t numDependencies;
|
| 1331 |
+
const CUDA_KERNEL_NODE_PARAMS *nodeParams;
|
| 1332 |
+
} cuGraphAddKernelNode_params;
|
| 1333 |
+
|
| 1334 |
+
typedef struct cuGraphKernelNodeGetParams_params_st {
|
| 1335 |
+
CUgraphNode hNode;
|
| 1336 |
+
CUDA_KERNEL_NODE_PARAMS *nodeParams;
|
| 1337 |
+
} cuGraphKernelNodeGetParams_params;
|
| 1338 |
+
|
| 1339 |
+
typedef struct cuGraphKernelNodeSetParams_params_st {
|
| 1340 |
+
CUgraphNode hNode;
|
| 1341 |
+
const CUDA_KERNEL_NODE_PARAMS *nodeParams;
|
| 1342 |
+
} cuGraphKernelNodeSetParams_params;
|
| 1343 |
+
|
| 1344 |
+
typedef struct cuGraphAddMemcpyNode_params_st {
|
| 1345 |
+
CUgraphNode *phGraphNode;
|
| 1346 |
+
CUgraph hGraph;
|
| 1347 |
+
const CUgraphNode *dependencies;
|
| 1348 |
+
size_t numDependencies;
|
| 1349 |
+
const CUDA_MEMCPY3D *copyParams;
|
| 1350 |
+
CUcontext ctx;
|
| 1351 |
+
} cuGraphAddMemcpyNode_params;
|
| 1352 |
+
|
| 1353 |
+
typedef struct cuGraphMemcpyNodeGetParams_params_st {
|
| 1354 |
+
CUgraphNode hNode;
|
| 1355 |
+
CUDA_MEMCPY3D *nodeParams;
|
| 1356 |
+
} cuGraphMemcpyNodeGetParams_params;
|
| 1357 |
+
|
| 1358 |
+
typedef struct cuGraphMemcpyNodeSetParams_params_st {
|
| 1359 |
+
CUgraphNode hNode;
|
| 1360 |
+
const CUDA_MEMCPY3D *nodeParams;
|
| 1361 |
+
} cuGraphMemcpyNodeSetParams_params;
|
| 1362 |
+
|
| 1363 |
+
typedef struct cuGraphAddMemsetNode_params_st {
|
| 1364 |
+
CUgraphNode *phGraphNode;
|
| 1365 |
+
CUgraph hGraph;
|
| 1366 |
+
const CUgraphNode *dependencies;
|
| 1367 |
+
size_t numDependencies;
|
| 1368 |
+
const CUDA_MEMSET_NODE_PARAMS *memsetParams;
|
| 1369 |
+
CUcontext ctx;
|
| 1370 |
+
} cuGraphAddMemsetNode_params;
|
| 1371 |
+
|
| 1372 |
+
typedef struct cuGraphMemsetNodeGetParams_params_st {
|
| 1373 |
+
CUgraphNode hNode;
|
| 1374 |
+
CUDA_MEMSET_NODE_PARAMS *nodeParams;
|
| 1375 |
+
} cuGraphMemsetNodeGetParams_params;
|
| 1376 |
+
|
| 1377 |
+
typedef struct cuGraphMemsetNodeSetParams_params_st {
|
| 1378 |
+
CUgraphNode hNode;
|
| 1379 |
+
const CUDA_MEMSET_NODE_PARAMS *nodeParams;
|
| 1380 |
+
} cuGraphMemsetNodeSetParams_params;
|
| 1381 |
+
|
| 1382 |
+
typedef struct cuGraphAddHostNode_params_st {
|
| 1383 |
+
CUgraphNode *phGraphNode;
|
| 1384 |
+
CUgraph hGraph;
|
| 1385 |
+
const CUgraphNode *dependencies;
|
| 1386 |
+
size_t numDependencies;
|
| 1387 |
+
const CUDA_HOST_NODE_PARAMS *nodeParams;
|
| 1388 |
+
} cuGraphAddHostNode_params;
|
| 1389 |
+
|
| 1390 |
+
typedef struct cuGraphHostNodeGetParams_params_st {
|
| 1391 |
+
CUgraphNode hNode;
|
| 1392 |
+
CUDA_HOST_NODE_PARAMS *nodeParams;
|
| 1393 |
+
} cuGraphHostNodeGetParams_params;
|
| 1394 |
+
|
| 1395 |
+
typedef struct cuGraphHostNodeSetParams_params_st {
|
| 1396 |
+
CUgraphNode hNode;
|
| 1397 |
+
const CUDA_HOST_NODE_PARAMS *nodeParams;
|
| 1398 |
+
} cuGraphHostNodeSetParams_params;
|
| 1399 |
+
|
| 1400 |
+
typedef struct cuGraphAddChildGraphNode_params_st {
|
| 1401 |
+
CUgraphNode *phGraphNode;
|
| 1402 |
+
CUgraph hGraph;
|
| 1403 |
+
const CUgraphNode *dependencies;
|
| 1404 |
+
size_t numDependencies;
|
| 1405 |
+
CUgraph childGraph;
|
| 1406 |
+
} cuGraphAddChildGraphNode_params;
|
| 1407 |
+
|
| 1408 |
+
typedef struct cuGraphChildGraphNodeGetGraph_params_st {
|
| 1409 |
+
CUgraphNode hNode;
|
| 1410 |
+
CUgraph *phGraph;
|
| 1411 |
+
} cuGraphChildGraphNodeGetGraph_params;
|
| 1412 |
+
|
| 1413 |
+
typedef struct cuGraphAddEmptyNode_params_st {
|
| 1414 |
+
CUgraphNode *phGraphNode;
|
| 1415 |
+
CUgraph hGraph;
|
| 1416 |
+
const CUgraphNode *dependencies;
|
| 1417 |
+
size_t numDependencies;
|
| 1418 |
+
} cuGraphAddEmptyNode_params;
|
| 1419 |
+
|
| 1420 |
+
typedef struct cuGraphAddEventRecordNode_params_st {
|
| 1421 |
+
CUgraphNode *phGraphNode;
|
| 1422 |
+
CUgraph hGraph;
|
| 1423 |
+
const CUgraphNode *dependencies;
|
| 1424 |
+
size_t numDependencies;
|
| 1425 |
+
CUevent event;
|
| 1426 |
+
} cuGraphAddEventRecordNode_params;
|
| 1427 |
+
|
| 1428 |
+
typedef struct cuGraphEventRecordNodeGetEvent_params_st {
|
| 1429 |
+
CUgraphNode hNode;
|
| 1430 |
+
CUevent *event_out;
|
| 1431 |
+
} cuGraphEventRecordNodeGetEvent_params;
|
| 1432 |
+
|
| 1433 |
+
typedef struct cuGraphEventRecordNodeSetEvent_params_st {
|
| 1434 |
+
CUgraphNode hNode;
|
| 1435 |
+
CUevent event;
|
| 1436 |
+
} cuGraphEventRecordNodeSetEvent_params;
|
| 1437 |
+
|
| 1438 |
+
typedef struct cuGraphAddEventWaitNode_params_st {
|
| 1439 |
+
CUgraphNode *phGraphNode;
|
| 1440 |
+
CUgraph hGraph;
|
| 1441 |
+
const CUgraphNode *dependencies;
|
| 1442 |
+
size_t numDependencies;
|
| 1443 |
+
CUevent event;
|
| 1444 |
+
} cuGraphAddEventWaitNode_params;
|
| 1445 |
+
|
| 1446 |
+
typedef struct cuGraphEventWaitNodeGetEvent_params_st {
|
| 1447 |
+
CUgraphNode hNode;
|
| 1448 |
+
CUevent *event_out;
|
| 1449 |
+
} cuGraphEventWaitNodeGetEvent_params;
|
| 1450 |
+
|
| 1451 |
+
typedef struct cuGraphEventWaitNodeSetEvent_params_st {
|
| 1452 |
+
CUgraphNode hNode;
|
| 1453 |
+
CUevent event;
|
| 1454 |
+
} cuGraphEventWaitNodeSetEvent_params;
|
| 1455 |
+
|
| 1456 |
+
typedef struct cuGraphAddExternalSemaphoresSignalNode_params_st {
|
| 1457 |
+
CUgraphNode *phGraphNode;
|
| 1458 |
+
CUgraph hGraph;
|
| 1459 |
+
const CUgraphNode *dependencies;
|
| 1460 |
+
size_t numDependencies;
|
| 1461 |
+
const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
|
| 1462 |
+
} cuGraphAddExternalSemaphoresSignalNode_params;
|
| 1463 |
+
|
| 1464 |
+
typedef struct cuGraphExternalSemaphoresSignalNodeGetParams_params_st {
|
| 1465 |
+
CUgraphNode hNode;
|
| 1466 |
+
CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out;
|
| 1467 |
+
} cuGraphExternalSemaphoresSignalNodeGetParams_params;
|
| 1468 |
+
|
| 1469 |
+
typedef struct cuGraphExternalSemaphoresSignalNodeSetParams_params_st {
|
| 1470 |
+
CUgraphNode hNode;
|
| 1471 |
+
const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
|
| 1472 |
+
} cuGraphExternalSemaphoresSignalNodeSetParams_params;
|
| 1473 |
+
|
| 1474 |
+
typedef struct cuGraphAddExternalSemaphoresWaitNode_params_st {
|
| 1475 |
+
CUgraphNode *phGraphNode;
|
| 1476 |
+
CUgraph hGraph;
|
| 1477 |
+
const CUgraphNode *dependencies;
|
| 1478 |
+
size_t numDependencies;
|
| 1479 |
+
const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
|
| 1480 |
+
} cuGraphAddExternalSemaphoresWaitNode_params;
|
| 1481 |
+
|
| 1482 |
+
typedef struct cuGraphExternalSemaphoresWaitNodeGetParams_params_st {
|
| 1483 |
+
CUgraphNode hNode;
|
| 1484 |
+
CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out;
|
| 1485 |
+
} cuGraphExternalSemaphoresWaitNodeGetParams_params;
|
| 1486 |
+
|
| 1487 |
+
typedef struct cuGraphExternalSemaphoresWaitNodeSetParams_params_st {
|
| 1488 |
+
CUgraphNode hNode;
|
| 1489 |
+
const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
|
| 1490 |
+
} cuGraphExternalSemaphoresWaitNodeSetParams_params;
|
| 1491 |
+
|
| 1492 |
+
typedef struct cuGraphAddMemAllocNode_params_st {
|
| 1493 |
+
CUgraphNode *phGraphNode;
|
| 1494 |
+
CUgraph hGraph;
|
| 1495 |
+
const CUgraphNode *dependencies;
|
| 1496 |
+
size_t numDependencies;
|
| 1497 |
+
CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams;
|
| 1498 |
+
} cuGraphAddMemAllocNode_params;
|
| 1499 |
+
|
| 1500 |
+
typedef struct cuGraphMemAllocNodeGetParams_params_st {
|
| 1501 |
+
CUgraphNode hNode;
|
| 1502 |
+
CUDA_MEM_ALLOC_NODE_PARAMS *params_out;
|
| 1503 |
+
} cuGraphMemAllocNodeGetParams_params;
|
| 1504 |
+
|
| 1505 |
+
typedef struct cuGraphAddMemFreeNode_params_st {
|
| 1506 |
+
CUgraphNode *phGraphNode;
|
| 1507 |
+
CUgraph hGraph;
|
| 1508 |
+
const CUgraphNode *dependencies;
|
| 1509 |
+
size_t numDependencies;
|
| 1510 |
+
CUdeviceptr dptr;
|
| 1511 |
+
} cuGraphAddMemFreeNode_params;
|
| 1512 |
+
|
| 1513 |
+
typedef struct cuGraphMemFreeNodeGetParams_params_st {
|
| 1514 |
+
CUgraphNode hNode;
|
| 1515 |
+
CUdeviceptr *dptr_out;
|
| 1516 |
+
} cuGraphMemFreeNodeGetParams_params;
|
| 1517 |
+
|
| 1518 |
+
typedef struct cuDeviceGraphMemTrim_params_st {
|
| 1519 |
+
CUdevice device;
|
| 1520 |
+
} cuDeviceGraphMemTrim_params;
|
| 1521 |
+
|
| 1522 |
+
typedef struct cuDeviceGetGraphMemAttribute_params_st {
|
| 1523 |
+
CUdevice device;
|
| 1524 |
+
CUgraphMem_attribute attr;
|
| 1525 |
+
void *value;
|
| 1526 |
+
} cuDeviceGetGraphMemAttribute_params;
|
| 1527 |
+
|
| 1528 |
+
typedef struct cuDeviceSetGraphMemAttribute_params_st {
|
| 1529 |
+
CUdevice device;
|
| 1530 |
+
CUgraphMem_attribute attr;
|
| 1531 |
+
void *value;
|
| 1532 |
+
} cuDeviceSetGraphMemAttribute_params;
|
| 1533 |
+
|
| 1534 |
+
typedef struct cuGraphClone_params_st {
|
| 1535 |
+
CUgraph *phGraphClone;
|
| 1536 |
+
CUgraph originalGraph;
|
| 1537 |
+
} cuGraphClone_params;
|
| 1538 |
+
|
| 1539 |
+
typedef struct cuGraphNodeFindInClone_params_st {
|
| 1540 |
+
CUgraphNode *phNode;
|
| 1541 |
+
CUgraphNode hOriginalNode;
|
| 1542 |
+
CUgraph hClonedGraph;
|
| 1543 |
+
} cuGraphNodeFindInClone_params;
|
| 1544 |
+
|
| 1545 |
+
typedef struct cuGraphNodeGetType_params_st {
|
| 1546 |
+
CUgraphNode hNode;
|
| 1547 |
+
CUgraphNodeType *type;
|
| 1548 |
+
} cuGraphNodeGetType_params;
|
| 1549 |
+
|
| 1550 |
+
typedef struct cuGraphGetNodes_params_st {
|
| 1551 |
+
CUgraph hGraph;
|
| 1552 |
+
CUgraphNode *nodes;
|
| 1553 |
+
size_t *numNodes;
|
| 1554 |
+
} cuGraphGetNodes_params;
|
| 1555 |
+
|
| 1556 |
+
typedef struct cuGraphGetRootNodes_params_st {
|
| 1557 |
+
CUgraph hGraph;
|
| 1558 |
+
CUgraphNode *rootNodes;
|
| 1559 |
+
size_t *numRootNodes;
|
| 1560 |
+
} cuGraphGetRootNodes_params;
|
| 1561 |
+
|
| 1562 |
+
typedef struct cuGraphGetEdges_params_st {
|
| 1563 |
+
CUgraph hGraph;
|
| 1564 |
+
CUgraphNode *from;
|
| 1565 |
+
CUgraphNode *to;
|
| 1566 |
+
size_t *numEdges;
|
| 1567 |
+
} cuGraphGetEdges_params;
|
| 1568 |
+
|
| 1569 |
+
typedef struct cuGraphNodeGetDependencies_params_st {
|
| 1570 |
+
CUgraphNode hNode;
|
| 1571 |
+
CUgraphNode *dependencies;
|
| 1572 |
+
size_t *numDependencies;
|
| 1573 |
+
} cuGraphNodeGetDependencies_params;
|
| 1574 |
+
|
| 1575 |
+
typedef struct cuGraphNodeGetDependentNodes_params_st {
|
| 1576 |
+
CUgraphNode hNode;
|
| 1577 |
+
CUgraphNode *dependentNodes;
|
| 1578 |
+
size_t *numDependentNodes;
|
| 1579 |
+
} cuGraphNodeGetDependentNodes_params;
|
| 1580 |
+
|
| 1581 |
+
typedef struct cuGraphAddDependencies_params_st {
|
| 1582 |
+
CUgraph hGraph;
|
| 1583 |
+
const CUgraphNode *from;
|
| 1584 |
+
const CUgraphNode *to;
|
| 1585 |
+
size_t numDependencies;
|
| 1586 |
+
} cuGraphAddDependencies_params;
|
| 1587 |
+
|
| 1588 |
+
typedef struct cuGraphRemoveDependencies_params_st {
|
| 1589 |
+
CUgraph hGraph;
|
| 1590 |
+
const CUgraphNode *from;
|
| 1591 |
+
const CUgraphNode *to;
|
| 1592 |
+
size_t numDependencies;
|
| 1593 |
+
} cuGraphRemoveDependencies_params;
|
| 1594 |
+
|
| 1595 |
+
typedef struct cuGraphDestroyNode_params_st {
|
| 1596 |
+
CUgraphNode hNode;
|
| 1597 |
+
} cuGraphDestroyNode_params;
|
| 1598 |
+
|
| 1599 |
+
typedef struct cuGraphInstantiate_v2_params_st {
|
| 1600 |
+
CUgraphExec *phGraphExec;
|
| 1601 |
+
CUgraph hGraph;
|
| 1602 |
+
CUgraphNode *phErrorNode;
|
| 1603 |
+
char *logBuffer;
|
| 1604 |
+
size_t bufferSize;
|
| 1605 |
+
} cuGraphInstantiate_v2_params;
|
| 1606 |
+
|
| 1607 |
+
typedef struct cuGraphInstantiateWithFlags_params_st {
|
| 1608 |
+
CUgraphExec *phGraphExec;
|
| 1609 |
+
CUgraph hGraph;
|
| 1610 |
+
unsigned long long flags;
|
| 1611 |
+
} cuGraphInstantiateWithFlags_params;
|
| 1612 |
+
|
| 1613 |
+
typedef struct cuGraphExecKernelNodeSetParams_params_st {
|
| 1614 |
+
CUgraphExec hGraphExec;
|
| 1615 |
+
CUgraphNode hNode;
|
| 1616 |
+
const CUDA_KERNEL_NODE_PARAMS *nodeParams;
|
| 1617 |
+
} cuGraphExecKernelNodeSetParams_params;
|
| 1618 |
+
|
| 1619 |
+
typedef struct cuGraphExecMemcpyNodeSetParams_params_st {
|
| 1620 |
+
CUgraphExec hGraphExec;
|
| 1621 |
+
CUgraphNode hNode;
|
| 1622 |
+
const CUDA_MEMCPY3D *copyParams;
|
| 1623 |
+
CUcontext ctx;
|
| 1624 |
+
} cuGraphExecMemcpyNodeSetParams_params;
|
| 1625 |
+
|
| 1626 |
+
typedef struct cuGraphExecMemsetNodeSetParams_params_st {
|
| 1627 |
+
CUgraphExec hGraphExec;
|
| 1628 |
+
CUgraphNode hNode;
|
| 1629 |
+
const CUDA_MEMSET_NODE_PARAMS *memsetParams;
|
| 1630 |
+
CUcontext ctx;
|
| 1631 |
+
} cuGraphExecMemsetNodeSetParams_params;
|
| 1632 |
+
|
| 1633 |
+
typedef struct cuGraphExecHostNodeSetParams_params_st {
|
| 1634 |
+
CUgraphExec hGraphExec;
|
| 1635 |
+
CUgraphNode hNode;
|
| 1636 |
+
const CUDA_HOST_NODE_PARAMS *nodeParams;
|
| 1637 |
+
} cuGraphExecHostNodeSetParams_params;
|
| 1638 |
+
|
| 1639 |
+
typedef struct cuGraphExecChildGraphNodeSetParams_params_st {
|
| 1640 |
+
CUgraphExec hGraphExec;
|
| 1641 |
+
CUgraphNode hNode;
|
| 1642 |
+
CUgraph childGraph;
|
| 1643 |
+
} cuGraphExecChildGraphNodeSetParams_params;
|
| 1644 |
+
|
| 1645 |
+
typedef struct cuGraphExecEventRecordNodeSetEvent_params_st {
|
| 1646 |
+
CUgraphExec hGraphExec;
|
| 1647 |
+
CUgraphNode hNode;
|
| 1648 |
+
CUevent event;
|
| 1649 |
+
} cuGraphExecEventRecordNodeSetEvent_params;
|
| 1650 |
+
|
| 1651 |
+
typedef struct cuGraphExecEventWaitNodeSetEvent_params_st {
|
| 1652 |
+
CUgraphExec hGraphExec;
|
| 1653 |
+
CUgraphNode hNode;
|
| 1654 |
+
CUevent event;
|
| 1655 |
+
} cuGraphExecEventWaitNodeSetEvent_params;
|
| 1656 |
+
|
| 1657 |
+
typedef struct cuGraphExecExternalSemaphoresSignalNodeSetParams_params_st {
|
| 1658 |
+
CUgraphExec hGraphExec;
|
| 1659 |
+
CUgraphNode hNode;
|
| 1660 |
+
const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
|
| 1661 |
+
} cuGraphExecExternalSemaphoresSignalNodeSetParams_params;
|
| 1662 |
+
|
| 1663 |
+
typedef struct cuGraphExecExternalSemaphoresWaitNodeSetParams_params_st {
|
| 1664 |
+
CUgraphExec hGraphExec;
|
| 1665 |
+
CUgraphNode hNode;
|
| 1666 |
+
const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
|
| 1667 |
+
} cuGraphExecExternalSemaphoresWaitNodeSetParams_params;
|
| 1668 |
+
|
| 1669 |
+
typedef struct cuGraphNodeSetEnabled_params_st {
|
| 1670 |
+
CUgraphExec hGraphExec;
|
| 1671 |
+
CUgraphNode hNode;
|
| 1672 |
+
unsigned int isEnabled;
|
| 1673 |
+
} cuGraphNodeSetEnabled_params;
|
| 1674 |
+
|
| 1675 |
+
typedef struct cuGraphNodeGetEnabled_params_st {
|
| 1676 |
+
CUgraphExec hGraphExec;
|
| 1677 |
+
CUgraphNode hNode;
|
| 1678 |
+
unsigned int *isEnabled;
|
| 1679 |
+
} cuGraphNodeGetEnabled_params;
|
| 1680 |
+
|
| 1681 |
+
typedef struct cuGraphUpload_ptsz_params_st {
|
| 1682 |
+
CUgraphExec hGraphExec;
|
| 1683 |
+
CUstream hStream;
|
| 1684 |
+
} cuGraphUpload_ptsz_params;
|
| 1685 |
+
|
| 1686 |
+
typedef struct cuGraphLaunch_ptsz_params_st {
|
| 1687 |
+
CUgraphExec hGraphExec;
|
| 1688 |
+
CUstream hStream;
|
| 1689 |
+
} cuGraphLaunch_ptsz_params;
|
| 1690 |
+
|
| 1691 |
+
typedef struct cuGraphExecDestroy_params_st {
|
| 1692 |
+
CUgraphExec hGraphExec;
|
| 1693 |
+
} cuGraphExecDestroy_params;
|
| 1694 |
+
|
| 1695 |
+
typedef struct cuGraphDestroy_params_st {
|
| 1696 |
+
CUgraph hGraph;
|
| 1697 |
+
} cuGraphDestroy_params;
|
| 1698 |
+
|
| 1699 |
+
typedef struct cuGraphExecUpdate_params_st {
|
| 1700 |
+
CUgraphExec hGraphExec;
|
| 1701 |
+
CUgraph hGraph;
|
| 1702 |
+
CUgraphNode *hErrorNode_out;
|
| 1703 |
+
CUgraphExecUpdateResult *updateResult_out;
|
| 1704 |
+
} cuGraphExecUpdate_params;
|
| 1705 |
+
|
| 1706 |
+
typedef struct cuGraphKernelNodeCopyAttributes_params_st {
|
| 1707 |
+
CUgraphNode dst;
|
| 1708 |
+
CUgraphNode src;
|
| 1709 |
+
} cuGraphKernelNodeCopyAttributes_params;
|
| 1710 |
+
|
| 1711 |
+
typedef struct cuGraphKernelNodeGetAttribute_params_st {
|
| 1712 |
+
CUgraphNode hNode;
|
| 1713 |
+
CUkernelNodeAttrID attr;
|
| 1714 |
+
CUkernelNodeAttrValue *value_out;
|
| 1715 |
+
} cuGraphKernelNodeGetAttribute_params;
|
| 1716 |
+
|
| 1717 |
+
typedef struct cuGraphKernelNodeSetAttribute_params_st {
|
| 1718 |
+
CUgraphNode hNode;
|
| 1719 |
+
CUkernelNodeAttrID attr;
|
| 1720 |
+
const CUkernelNodeAttrValue *value;
|
| 1721 |
+
} cuGraphKernelNodeSetAttribute_params;
|
| 1722 |
+
|
| 1723 |
+
typedef struct cuGraphDebugDotPrint_params_st {
|
| 1724 |
+
CUgraph hGraph;
|
| 1725 |
+
const char *path;
|
| 1726 |
+
unsigned int flags;
|
| 1727 |
+
} cuGraphDebugDotPrint_params;
|
| 1728 |
+
|
| 1729 |
+
typedef struct cuUserObjectCreate_params_st {
|
| 1730 |
+
CUuserObject *object_out;
|
| 1731 |
+
void *ptr;
|
| 1732 |
+
CUhostFn destroy;
|
| 1733 |
+
unsigned int initialRefcount;
|
| 1734 |
+
unsigned int flags;
|
| 1735 |
+
} cuUserObjectCreate_params;
|
| 1736 |
+
|
| 1737 |
+
typedef struct cuUserObjectRetain_params_st {
|
| 1738 |
+
CUuserObject object;
|
| 1739 |
+
unsigned int count;
|
| 1740 |
+
} cuUserObjectRetain_params;
|
| 1741 |
+
|
| 1742 |
+
typedef struct cuUserObjectRelease_params_st {
|
| 1743 |
+
CUuserObject object;
|
| 1744 |
+
unsigned int count;
|
| 1745 |
+
} cuUserObjectRelease_params;
|
| 1746 |
+
|
| 1747 |
+
typedef struct cuGraphRetainUserObject_params_st {
|
| 1748 |
+
CUgraph graph;
|
| 1749 |
+
CUuserObject object;
|
| 1750 |
+
unsigned int count;
|
| 1751 |
+
unsigned int flags;
|
| 1752 |
+
} cuGraphRetainUserObject_params;
|
| 1753 |
+
|
| 1754 |
+
typedef struct cuGraphReleaseUserObject_params_st {
|
| 1755 |
+
CUgraph graph;
|
| 1756 |
+
CUuserObject object;
|
| 1757 |
+
unsigned int count;
|
| 1758 |
+
} cuGraphReleaseUserObject_params;
|
| 1759 |
+
|
| 1760 |
+
typedef struct cuOccupancyMaxActiveBlocksPerMultiprocessor_params_st {
|
| 1761 |
+
int *numBlocks;
|
| 1762 |
+
CUfunction func;
|
| 1763 |
+
int blockSize;
|
| 1764 |
+
size_t dynamicSMemSize;
|
| 1765 |
+
} cuOccupancyMaxActiveBlocksPerMultiprocessor_params;
|
| 1766 |
+
|
| 1767 |
+
typedef struct cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_params_st {
|
| 1768 |
+
int *numBlocks;
|
| 1769 |
+
CUfunction func;
|
| 1770 |
+
int blockSize;
|
| 1771 |
+
size_t dynamicSMemSize;
|
| 1772 |
+
unsigned int flags;
|
| 1773 |
+
} cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_params;
|
| 1774 |
+
|
| 1775 |
+
typedef struct cuOccupancyMaxPotentialBlockSize_params_st {
|
| 1776 |
+
int *minGridSize;
|
| 1777 |
+
int *blockSize;
|
| 1778 |
+
CUfunction func;
|
| 1779 |
+
CUoccupancyB2DSize blockSizeToDynamicSMemSize;
|
| 1780 |
+
size_t dynamicSMemSize;
|
| 1781 |
+
int blockSizeLimit;
|
| 1782 |
+
} cuOccupancyMaxPotentialBlockSize_params;
|
| 1783 |
+
|
| 1784 |
+
typedef struct cuOccupancyMaxPotentialBlockSizeWithFlags_params_st {
|
| 1785 |
+
int *minGridSize;
|
| 1786 |
+
int *blockSize;
|
| 1787 |
+
CUfunction func;
|
| 1788 |
+
CUoccupancyB2DSize blockSizeToDynamicSMemSize;
|
| 1789 |
+
size_t dynamicSMemSize;
|
| 1790 |
+
int blockSizeLimit;
|
| 1791 |
+
unsigned int flags;
|
| 1792 |
+
} cuOccupancyMaxPotentialBlockSizeWithFlags_params;
|
| 1793 |
+
|
| 1794 |
+
typedef struct cuOccupancyAvailableDynamicSMemPerBlock_params_st {
|
| 1795 |
+
size_t *dynamicSmemSize;
|
| 1796 |
+
CUfunction func;
|
| 1797 |
+
int numBlocks;
|
| 1798 |
+
int blockSize;
|
| 1799 |
+
} cuOccupancyAvailableDynamicSMemPerBlock_params;
|
| 1800 |
+
|
| 1801 |
+
typedef struct cuOccupancyMaxPotentialClusterSize_params_st {
|
| 1802 |
+
int *clusterSize;
|
| 1803 |
+
CUfunction func;
|
| 1804 |
+
const CUlaunchConfig *config;
|
| 1805 |
+
} cuOccupancyMaxPotentialClusterSize_params;
|
| 1806 |
+
|
| 1807 |
+
typedef struct cuOccupancyMaxActiveClusters_params_st {
|
| 1808 |
+
int *numClusters;
|
| 1809 |
+
CUfunction func;
|
| 1810 |
+
const CUlaunchConfig *config;
|
| 1811 |
+
} cuOccupancyMaxActiveClusters_params;
|
| 1812 |
+
|
| 1813 |
+
typedef struct cuTexRefSetArray_params_st {
|
| 1814 |
+
CUtexref hTexRef;
|
| 1815 |
+
CUarray hArray;
|
| 1816 |
+
unsigned int Flags;
|
| 1817 |
+
} cuTexRefSetArray_params;
|
| 1818 |
+
|
| 1819 |
+
typedef struct cuTexRefSetMipmappedArray_params_st {
|
| 1820 |
+
CUtexref hTexRef;
|
| 1821 |
+
CUmipmappedArray hMipmappedArray;
|
| 1822 |
+
unsigned int Flags;
|
| 1823 |
+
} cuTexRefSetMipmappedArray_params;
|
| 1824 |
+
|
| 1825 |
+
typedef struct cuTexRefSetAddress_v2_params_st {
|
| 1826 |
+
size_t *ByteOffset;
|
| 1827 |
+
CUtexref hTexRef;
|
| 1828 |
+
CUdeviceptr dptr;
|
| 1829 |
+
size_t bytes;
|
| 1830 |
+
} cuTexRefSetAddress_v2_params;
|
| 1831 |
+
|
| 1832 |
+
typedef struct cuTexRefSetAddress2D_v3_params_st {
|
| 1833 |
+
CUtexref hTexRef;
|
| 1834 |
+
const CUDA_ARRAY_DESCRIPTOR *desc;
|
| 1835 |
+
CUdeviceptr dptr;
|
| 1836 |
+
size_t Pitch;
|
| 1837 |
+
} cuTexRefSetAddress2D_v3_params;
|
| 1838 |
+
|
| 1839 |
+
typedef struct cuTexRefSetFormat_params_st {
|
| 1840 |
+
CUtexref hTexRef;
|
| 1841 |
+
CUarray_format fmt;
|
| 1842 |
+
int NumPackedComponents;
|
| 1843 |
+
} cuTexRefSetFormat_params;
|
| 1844 |
+
|
| 1845 |
+
typedef struct cuTexRefSetAddressMode_params_st {
|
| 1846 |
+
CUtexref hTexRef;
|
| 1847 |
+
int dim;
|
| 1848 |
+
CUaddress_mode am;
|
| 1849 |
+
} cuTexRefSetAddressMode_params;
|
| 1850 |
+
|
| 1851 |
+
typedef struct cuTexRefSetFilterMode_params_st {
|
| 1852 |
+
CUtexref hTexRef;
|
| 1853 |
+
CUfilter_mode fm;
|
| 1854 |
+
} cuTexRefSetFilterMode_params;
|
| 1855 |
+
|
| 1856 |
+
typedef struct cuTexRefSetMipmapFilterMode_params_st {
|
| 1857 |
+
CUtexref hTexRef;
|
| 1858 |
+
CUfilter_mode fm;
|
| 1859 |
+
} cuTexRefSetMipmapFilterMode_params;
|
| 1860 |
+
|
| 1861 |
+
typedef struct cuTexRefSetMipmapLevelBias_params_st {
|
| 1862 |
+
CUtexref hTexRef;
|
| 1863 |
+
float bias;
|
| 1864 |
+
} cuTexRefSetMipmapLevelBias_params;
|
| 1865 |
+
|
| 1866 |
+
typedef struct cuTexRefSetMipmapLevelClamp_params_st {
|
| 1867 |
+
CUtexref hTexRef;
|
| 1868 |
+
float minMipmapLevelClamp;
|
| 1869 |
+
float maxMipmapLevelClamp;
|
| 1870 |
+
} cuTexRefSetMipmapLevelClamp_params;
|
| 1871 |
+
|
| 1872 |
+
typedef struct cuTexRefSetMaxAnisotropy_params_st {
|
| 1873 |
+
CUtexref hTexRef;
|
| 1874 |
+
unsigned int maxAniso;
|
| 1875 |
+
} cuTexRefSetMaxAnisotropy_params;
|
| 1876 |
+
|
| 1877 |
+
typedef struct cuTexRefSetBorderColor_params_st {
|
| 1878 |
+
CUtexref hTexRef;
|
| 1879 |
+
float *pBorderColor;
|
| 1880 |
+
} cuTexRefSetBorderColor_params;
|
| 1881 |
+
|
| 1882 |
+
typedef struct cuTexRefSetFlags_params_st {
|
| 1883 |
+
CUtexref hTexRef;
|
| 1884 |
+
unsigned int Flags;
|
| 1885 |
+
} cuTexRefSetFlags_params;
|
| 1886 |
+
|
| 1887 |
+
typedef struct cuTexRefGetAddress_v2_params_st {
|
| 1888 |
+
CUdeviceptr *pdptr;
|
| 1889 |
+
CUtexref hTexRef;
|
| 1890 |
+
} cuTexRefGetAddress_v2_params;
|
| 1891 |
+
|
| 1892 |
+
typedef struct cuTexRefGetArray_params_st {
|
| 1893 |
+
CUarray *phArray;
|
| 1894 |
+
CUtexref hTexRef;
|
| 1895 |
+
} cuTexRefGetArray_params;
|
| 1896 |
+
|
| 1897 |
+
typedef struct cuTexRefGetMipmappedArray_params_st {
|
| 1898 |
+
CUmipmappedArray *phMipmappedArray;
|
| 1899 |
+
CUtexref hTexRef;
|
| 1900 |
+
} cuTexRefGetMipmappedArray_params;
|
| 1901 |
+
|
| 1902 |
+
typedef struct cuTexRefGetAddressMode_params_st {
|
| 1903 |
+
CUaddress_mode *pam;
|
| 1904 |
+
CUtexref hTexRef;
|
| 1905 |
+
int dim;
|
| 1906 |
+
} cuTexRefGetAddressMode_params;
|
| 1907 |
+
|
| 1908 |
+
typedef struct cuTexRefGetFilterMode_params_st {
|
| 1909 |
+
CUfilter_mode *pfm;
|
| 1910 |
+
CUtexref hTexRef;
|
| 1911 |
+
} cuTexRefGetFilterMode_params;
|
| 1912 |
+
|
| 1913 |
+
typedef struct cuTexRefGetFormat_params_st {
|
| 1914 |
+
CUarray_format *pFormat;
|
| 1915 |
+
int *pNumChannels;
|
| 1916 |
+
CUtexref hTexRef;
|
| 1917 |
+
} cuTexRefGetFormat_params;
|
| 1918 |
+
|
| 1919 |
+
typedef struct cuTexRefGetMipmapFilterMode_params_st {
|
| 1920 |
+
CUfilter_mode *pfm;
|
| 1921 |
+
CUtexref hTexRef;
|
| 1922 |
+
} cuTexRefGetMipmapFilterMode_params;
|
| 1923 |
+
|
| 1924 |
+
typedef struct cuTexRefGetMipmapLevelBias_params_st {
|
| 1925 |
+
float *pbias;
|
| 1926 |
+
CUtexref hTexRef;
|
| 1927 |
+
} cuTexRefGetMipmapLevelBias_params;
|
| 1928 |
+
|
| 1929 |
+
typedef struct cuTexRefGetMipmapLevelClamp_params_st {
|
| 1930 |
+
float *pminMipmapLevelClamp;
|
| 1931 |
+
float *pmaxMipmapLevelClamp;
|
| 1932 |
+
CUtexref hTexRef;
|
| 1933 |
+
} cuTexRefGetMipmapLevelClamp_params;
|
| 1934 |
+
|
| 1935 |
+
typedef struct cuTexRefGetMaxAnisotropy_params_st {
|
| 1936 |
+
int *pmaxAniso;
|
| 1937 |
+
CUtexref hTexRef;
|
| 1938 |
+
} cuTexRefGetMaxAnisotropy_params;
|
| 1939 |
+
|
| 1940 |
+
typedef struct cuTexRefGetBorderColor_params_st {
|
| 1941 |
+
float *pBorderColor;
|
| 1942 |
+
CUtexref hTexRef;
|
| 1943 |
+
} cuTexRefGetBorderColor_params;
|
| 1944 |
+
|
| 1945 |
+
typedef struct cuTexRefGetFlags_params_st {
|
| 1946 |
+
unsigned int *pFlags;
|
| 1947 |
+
CUtexref hTexRef;
|
| 1948 |
+
} cuTexRefGetFlags_params;
|
| 1949 |
+
|
| 1950 |
+
typedef struct cuTexRefCreate_params_st {
|
| 1951 |
+
CUtexref *pTexRef;
|
| 1952 |
+
} cuTexRefCreate_params;
|
| 1953 |
+
|
| 1954 |
+
typedef struct cuTexRefDestroy_params_st {
|
| 1955 |
+
CUtexref hTexRef;
|
| 1956 |
+
} cuTexRefDestroy_params;
|
| 1957 |
+
|
| 1958 |
+
typedef struct cuSurfRefSetArray_params_st {
|
| 1959 |
+
CUsurfref hSurfRef;
|
| 1960 |
+
CUarray hArray;
|
| 1961 |
+
unsigned int Flags;
|
| 1962 |
+
} cuSurfRefSetArray_params;
|
| 1963 |
+
|
| 1964 |
+
typedef struct cuSurfRefGetArray_params_st {
|
| 1965 |
+
CUarray *phArray;
|
| 1966 |
+
CUsurfref hSurfRef;
|
| 1967 |
+
} cuSurfRefGetArray_params;
|
| 1968 |
+
|
| 1969 |
+
typedef struct cuTexObjectCreate_params_st {
|
| 1970 |
+
CUtexObject *pTexObject;
|
| 1971 |
+
const CUDA_RESOURCE_DESC *pResDesc;
|
| 1972 |
+
const CUDA_TEXTURE_DESC *pTexDesc;
|
| 1973 |
+
const CUDA_RESOURCE_VIEW_DESC *pResViewDesc;
|
| 1974 |
+
} cuTexObjectCreate_params;
|
| 1975 |
+
|
| 1976 |
+
typedef struct cuTexObjectDestroy_params_st {
|
| 1977 |
+
CUtexObject texObject;
|
| 1978 |
+
} cuTexObjectDestroy_params;
|
| 1979 |
+
|
| 1980 |
+
typedef struct cuTexObjectGetResourceDesc_params_st {
|
| 1981 |
+
CUDA_RESOURCE_DESC *pResDesc;
|
| 1982 |
+
CUtexObject texObject;
|
| 1983 |
+
} cuTexObjectGetResourceDesc_params;
|
| 1984 |
+
|
| 1985 |
+
typedef struct cuTexObjectGetTextureDesc_params_st {
|
| 1986 |
+
CUDA_TEXTURE_DESC *pTexDesc;
|
| 1987 |
+
CUtexObject texObject;
|
| 1988 |
+
} cuTexObjectGetTextureDesc_params;
|
| 1989 |
+
|
| 1990 |
+
typedef struct cuTexObjectGetResourceViewDesc_params_st {
|
| 1991 |
+
CUDA_RESOURCE_VIEW_DESC *pResViewDesc;
|
| 1992 |
+
CUtexObject texObject;
|
| 1993 |
+
} cuTexObjectGetResourceViewDesc_params;
|
| 1994 |
+
|
| 1995 |
+
typedef struct cuSurfObjectCreate_params_st {
|
| 1996 |
+
CUsurfObject *pSurfObject;
|
| 1997 |
+
const CUDA_RESOURCE_DESC *pResDesc;
|
| 1998 |
+
} cuSurfObjectCreate_params;
|
| 1999 |
+
|
| 2000 |
+
typedef struct cuSurfObjectDestroy_params_st {
|
| 2001 |
+
CUsurfObject surfObject;
|
| 2002 |
+
} cuSurfObjectDestroy_params;
|
| 2003 |
+
|
| 2004 |
+
typedef struct cuSurfObjectGetResourceDesc_params_st {
|
| 2005 |
+
CUDA_RESOURCE_DESC *pResDesc;
|
| 2006 |
+
CUsurfObject surfObject;
|
| 2007 |
+
} cuSurfObjectGetResourceDesc_params;
|
| 2008 |
+
|
| 2009 |
+
typedef struct cuDeviceCanAccessPeer_params_st {
|
| 2010 |
+
int *canAccessPeer;
|
| 2011 |
+
CUdevice dev;
|
| 2012 |
+
CUdevice peerDev;
|
| 2013 |
+
} cuDeviceCanAccessPeer_params;
|
| 2014 |
+
|
| 2015 |
+
typedef struct cuCtxEnablePeerAccess_params_st {
|
| 2016 |
+
CUcontext peerContext;
|
| 2017 |
+
unsigned int Flags;
|
| 2018 |
+
} cuCtxEnablePeerAccess_params;
|
| 2019 |
+
|
| 2020 |
+
typedef struct cuCtxDisablePeerAccess_params_st {
|
| 2021 |
+
CUcontext peerContext;
|
| 2022 |
+
} cuCtxDisablePeerAccess_params;
|
| 2023 |
+
|
| 2024 |
+
typedef struct cuDeviceGetP2PAttribute_params_st {
|
| 2025 |
+
int *value;
|
| 2026 |
+
CUdevice_P2PAttribute attrib;
|
| 2027 |
+
CUdevice srcDevice;
|
| 2028 |
+
CUdevice dstDevice;
|
| 2029 |
+
} cuDeviceGetP2PAttribute_params;
|
| 2030 |
+
|
| 2031 |
+
typedef struct cuGraphicsUnregisterResource_params_st {
|
| 2032 |
+
CUgraphicsResource resource;
|
| 2033 |
+
} cuGraphicsUnregisterResource_params;
|
| 2034 |
+
|
| 2035 |
+
typedef struct cuGraphicsSubResourceGetMappedArray_params_st {
|
| 2036 |
+
CUarray *pArray;
|
| 2037 |
+
CUgraphicsResource resource;
|
| 2038 |
+
unsigned int arrayIndex;
|
| 2039 |
+
unsigned int mipLevel;
|
| 2040 |
+
} cuGraphicsSubResourceGetMappedArray_params;
|
| 2041 |
+
|
| 2042 |
+
typedef struct cuGraphicsResourceGetMappedMipmappedArray_params_st {
|
| 2043 |
+
CUmipmappedArray *pMipmappedArray;
|
| 2044 |
+
CUgraphicsResource resource;
|
| 2045 |
+
} cuGraphicsResourceGetMappedMipmappedArray_params;
|
| 2046 |
+
|
| 2047 |
+
typedef struct cuGraphicsResourceGetMappedPointer_v2_params_st {
|
| 2048 |
+
CUdeviceptr *pDevPtr;
|
| 2049 |
+
size_t *pSize;
|
| 2050 |
+
CUgraphicsResource resource;
|
| 2051 |
+
} cuGraphicsResourceGetMappedPointer_v2_params;
|
| 2052 |
+
|
| 2053 |
+
typedef struct cuGraphicsResourceSetMapFlags_v2_params_st {
|
| 2054 |
+
CUgraphicsResource resource;
|
| 2055 |
+
unsigned int flags;
|
| 2056 |
+
} cuGraphicsResourceSetMapFlags_v2_params;
|
| 2057 |
+
|
| 2058 |
+
typedef struct cuGraphicsMapResources_ptsz_params_st {
|
| 2059 |
+
unsigned int count;
|
| 2060 |
+
CUgraphicsResource *resources;
|
| 2061 |
+
CUstream hStream;
|
| 2062 |
+
} cuGraphicsMapResources_ptsz_params;
|
| 2063 |
+
|
| 2064 |
+
typedef struct cuGraphicsUnmapResources_ptsz_params_st {
|
| 2065 |
+
unsigned int count;
|
| 2066 |
+
CUgraphicsResource *resources;
|
| 2067 |
+
CUstream hStream;
|
| 2068 |
+
} cuGraphicsUnmapResources_ptsz_params;
|
| 2069 |
+
|
| 2070 |
+
typedef struct cuGetProcAddress_params_st {
|
| 2071 |
+
const char *symbol;
|
| 2072 |
+
void **pfn;
|
| 2073 |
+
int cudaVersion;
|
| 2074 |
+
cuuint64_t flags;
|
| 2075 |
+
} cuGetProcAddress_params;
|
| 2076 |
+
|
| 2077 |
+
typedef struct cuModuleGetLoadingMode_params_st {
|
| 2078 |
+
CUmoduleLoadingMode *mode;
|
| 2079 |
+
} cuModuleGetLoadingMode_params;
|
| 2080 |
+
|
| 2081 |
+
typedef struct cuMemGetHandleForAddressRange_params_st {
|
| 2082 |
+
void *handle;
|
| 2083 |
+
CUdeviceptr dptr;
|
| 2084 |
+
size_t size;
|
| 2085 |
+
CUmemRangeHandleType handleType;
|
| 2086 |
+
unsigned long long flags;
|
| 2087 |
+
} cuMemGetHandleForAddressRange_params;
|
| 2088 |
+
|
| 2089 |
+
typedef struct cuGetExportTable_params_st {
|
| 2090 |
+
const void **ppExportTable;
|
| 2091 |
+
const CUuuid *pExportTableId;
|
| 2092 |
+
} cuGetExportTable_params;
|
| 2093 |
+
|
| 2094 |
+
typedef struct cuMemHostRegister_params_st {
|
| 2095 |
+
void *p;
|
| 2096 |
+
size_t bytesize;
|
| 2097 |
+
unsigned int Flags;
|
| 2098 |
+
} cuMemHostRegister_params;
|
| 2099 |
+
|
| 2100 |
+
typedef struct cuGraphicsResourceSetMapFlags_params_st {
|
| 2101 |
+
CUgraphicsResource resource;
|
| 2102 |
+
unsigned int flags;
|
| 2103 |
+
} cuGraphicsResourceSetMapFlags_params;
|
| 2104 |
+
|
| 2105 |
+
typedef struct cuLinkCreate_params_st {
|
| 2106 |
+
unsigned int numOptions;
|
| 2107 |
+
CUjit_option *options;
|
| 2108 |
+
void **optionValues;
|
| 2109 |
+
CUlinkState *stateOut;
|
| 2110 |
+
} cuLinkCreate_params;
|
| 2111 |
+
|
| 2112 |
+
typedef struct cuLinkAddData_params_st {
|
| 2113 |
+
CUlinkState state;
|
| 2114 |
+
CUjitInputType type;
|
| 2115 |
+
void *data;
|
| 2116 |
+
size_t size;
|
| 2117 |
+
const char *name;
|
| 2118 |
+
unsigned int numOptions;
|
| 2119 |
+
CUjit_option *options;
|
| 2120 |
+
void **optionValues;
|
| 2121 |
+
} cuLinkAddData_params;
|
| 2122 |
+
|
| 2123 |
+
typedef struct cuLinkAddFile_params_st {
|
| 2124 |
+
CUlinkState state;
|
| 2125 |
+
CUjitInputType type;
|
| 2126 |
+
const char *path;
|
| 2127 |
+
unsigned int numOptions;
|
| 2128 |
+
CUjit_option *options;
|
| 2129 |
+
void **optionValues;
|
| 2130 |
+
} cuLinkAddFile_params;
|
| 2131 |
+
|
| 2132 |
+
typedef struct cuTexRefSetAddress2D_v2_params_st {
|
| 2133 |
+
CUtexref hTexRef;
|
| 2134 |
+
const CUDA_ARRAY_DESCRIPTOR *desc;
|
| 2135 |
+
CUdeviceptr dptr;
|
| 2136 |
+
size_t Pitch;
|
| 2137 |
+
} cuTexRefSetAddress2D_v2_params;
|
| 2138 |
+
|
| 2139 |
+
typedef struct cuDeviceTotalMem_params_st {
|
| 2140 |
+
unsigned int *bytes;
|
| 2141 |
+
CUdevice dev;
|
| 2142 |
+
} cuDeviceTotalMem_params;
|
| 2143 |
+
|
| 2144 |
+
typedef struct cuCtxCreate_params_st {
|
| 2145 |
+
CUcontext *pctx;
|
| 2146 |
+
unsigned int flags;
|
| 2147 |
+
CUdevice dev;
|
| 2148 |
+
} cuCtxCreate_params;
|
| 2149 |
+
|
| 2150 |
+
typedef struct cuModuleGetGlobal_params_st {
|
| 2151 |
+
CUdeviceptr_v1 *dptr;
|
| 2152 |
+
unsigned int *bytes;
|
| 2153 |
+
CUmodule hmod;
|
| 2154 |
+
const char *name;
|
| 2155 |
+
} cuModuleGetGlobal_params;
|
| 2156 |
+
|
| 2157 |
+
typedef struct cuMemGetInfo_params_st {
|
| 2158 |
+
unsigned int *free;
|
| 2159 |
+
unsigned int *total;
|
| 2160 |
+
} cuMemGetInfo_params;
|
| 2161 |
+
|
| 2162 |
+
typedef struct cuMemAlloc_params_st {
|
| 2163 |
+
CUdeviceptr_v1 *dptr;
|
| 2164 |
+
unsigned int bytesize;
|
| 2165 |
+
} cuMemAlloc_params;
|
| 2166 |
+
|
| 2167 |
+
typedef struct cuMemAllocPitch_params_st {
|
| 2168 |
+
CUdeviceptr_v1 *dptr;
|
| 2169 |
+
unsigned int *pPitch;
|
| 2170 |
+
unsigned int WidthInBytes;
|
| 2171 |
+
unsigned int Height;
|
| 2172 |
+
unsigned int ElementSizeBytes;
|
| 2173 |
+
} cuMemAllocPitch_params;
|
| 2174 |
+
|
| 2175 |
+
typedef struct cuMemFree_params_st {
|
| 2176 |
+
CUdeviceptr_v1 dptr;
|
| 2177 |
+
} cuMemFree_params;
|
| 2178 |
+
|
| 2179 |
+
typedef struct cuMemGetAddressRange_params_st {
|
| 2180 |
+
CUdeviceptr_v1 *pbase;
|
| 2181 |
+
unsigned int *psize;
|
| 2182 |
+
CUdeviceptr_v1 dptr;
|
| 2183 |
+
} cuMemGetAddressRange_params;
|
| 2184 |
+
|
| 2185 |
+
typedef struct cuMemAllocHost_params_st {
|
| 2186 |
+
void **pp;
|
| 2187 |
+
unsigned int bytesize;
|
| 2188 |
+
} cuMemAllocHost_params;
|
| 2189 |
+
|
| 2190 |
+
typedef struct cuMemHostGetDevicePointer_params_st {
|
| 2191 |
+
CUdeviceptr_v1 *pdptr;
|
| 2192 |
+
void *p;
|
| 2193 |
+
unsigned int Flags;
|
| 2194 |
+
} cuMemHostGetDevicePointer_params;
|
| 2195 |
+
|
| 2196 |
+
typedef struct cuMemcpyHtoD_params_st {
|
| 2197 |
+
CUdeviceptr_v1 dstDevice;
|
| 2198 |
+
const void *srcHost;
|
| 2199 |
+
unsigned int ByteCount;
|
| 2200 |
+
} cuMemcpyHtoD_params;
|
| 2201 |
+
|
| 2202 |
+
typedef struct cuMemcpyDtoH_params_st {
|
| 2203 |
+
void *dstHost;
|
| 2204 |
+
CUdeviceptr_v1 srcDevice;
|
| 2205 |
+
unsigned int ByteCount;
|
| 2206 |
+
} cuMemcpyDtoH_params;
|
| 2207 |
+
|
| 2208 |
+
typedef struct cuMemcpyDtoD_params_st {
|
| 2209 |
+
CUdeviceptr_v1 dstDevice;
|
| 2210 |
+
CUdeviceptr_v1 srcDevice;
|
| 2211 |
+
unsigned int ByteCount;
|
| 2212 |
+
} cuMemcpyDtoD_params;
|
| 2213 |
+
|
| 2214 |
+
typedef struct cuMemcpyDtoA_params_st {
|
| 2215 |
+
CUarray dstArray;
|
| 2216 |
+
unsigned int dstOffset;
|
| 2217 |
+
CUdeviceptr_v1 srcDevice;
|
| 2218 |
+
unsigned int ByteCount;
|
| 2219 |
+
} cuMemcpyDtoA_params;
|
| 2220 |
+
|
| 2221 |
+
typedef struct cuMemcpyAtoD_params_st {
|
| 2222 |
+
CUdeviceptr_v1 dstDevice;
|
| 2223 |
+
CUarray srcArray;
|
| 2224 |
+
unsigned int srcOffset;
|
| 2225 |
+
unsigned int ByteCount;
|
| 2226 |
+
} cuMemcpyAtoD_params;
|
| 2227 |
+
|
| 2228 |
+
typedef struct cuMemcpyHtoA_params_st {
|
| 2229 |
+
CUarray dstArray;
|
| 2230 |
+
unsigned int dstOffset;
|
| 2231 |
+
const void *srcHost;
|
| 2232 |
+
unsigned int ByteCount;
|
| 2233 |
+
} cuMemcpyHtoA_params;
|
| 2234 |
+
|
| 2235 |
+
typedef struct cuMemcpyAtoH_params_st {
|
| 2236 |
+
void *dstHost;
|
| 2237 |
+
CUarray srcArray;
|
| 2238 |
+
unsigned int srcOffset;
|
| 2239 |
+
unsigned int ByteCount;
|
| 2240 |
+
} cuMemcpyAtoH_params;
|
| 2241 |
+
|
| 2242 |
+
typedef struct cuMemcpyAtoA_params_st {
|
| 2243 |
+
CUarray dstArray;
|
| 2244 |
+
unsigned int dstOffset;
|
| 2245 |
+
CUarray srcArray;
|
| 2246 |
+
unsigned int srcOffset;
|
| 2247 |
+
unsigned int ByteCount;
|
| 2248 |
+
} cuMemcpyAtoA_params;
|
| 2249 |
+
|
| 2250 |
+
typedef struct cuMemcpyHtoAAsync_params_st {
|
| 2251 |
+
CUarray dstArray;
|
| 2252 |
+
unsigned int dstOffset;
|
| 2253 |
+
const void *srcHost;
|
| 2254 |
+
unsigned int ByteCount;
|
| 2255 |
+
CUstream hStream;
|
| 2256 |
+
} cuMemcpyHtoAAsync_params;
|
| 2257 |
+
|
| 2258 |
+
typedef struct cuMemcpyAtoHAsync_params_st {
|
| 2259 |
+
void *dstHost;
|
| 2260 |
+
CUarray srcArray;
|
| 2261 |
+
unsigned int srcOffset;
|
| 2262 |
+
unsigned int ByteCount;
|
| 2263 |
+
CUstream hStream;
|
| 2264 |
+
} cuMemcpyAtoHAsync_params;
|
| 2265 |
+
|
| 2266 |
+
typedef struct cuMemcpy2D_params_st {
|
| 2267 |
+
const CUDA_MEMCPY2D_v1 *pCopy;
|
| 2268 |
+
} cuMemcpy2D_params;
|
| 2269 |
+
|
| 2270 |
+
typedef struct cuMemcpy2DUnaligned_params_st {
|
| 2271 |
+
const CUDA_MEMCPY2D_v1 *pCopy;
|
| 2272 |
+
} cuMemcpy2DUnaligned_params;
|
| 2273 |
+
|
| 2274 |
+
typedef struct cuMemcpy3D_params_st {
|
| 2275 |
+
const CUDA_MEMCPY3D_v1 *pCopy;
|
| 2276 |
+
} cuMemcpy3D_params;
|
| 2277 |
+
|
| 2278 |
+
typedef struct cuMemcpyHtoDAsync_params_st {
|
| 2279 |
+
CUdeviceptr_v1 dstDevice;
|
| 2280 |
+
const void *srcHost;
|
| 2281 |
+
unsigned int ByteCount;
|
| 2282 |
+
CUstream hStream;
|
| 2283 |
+
} cuMemcpyHtoDAsync_params;
|
| 2284 |
+
|
| 2285 |
+
typedef struct cuMemcpyDtoHAsync_params_st {
|
| 2286 |
+
void *dstHost;
|
| 2287 |
+
CUdeviceptr_v1 srcDevice;
|
| 2288 |
+
unsigned int ByteCount;
|
| 2289 |
+
CUstream hStream;
|
| 2290 |
+
} cuMemcpyDtoHAsync_params;
|
| 2291 |
+
|
| 2292 |
+
typedef struct cuMemcpyDtoDAsync_params_st {
|
| 2293 |
+
CUdeviceptr_v1 dstDevice;
|
| 2294 |
+
CUdeviceptr_v1 srcDevice;
|
| 2295 |
+
unsigned int ByteCount;
|
| 2296 |
+
CUstream hStream;
|
| 2297 |
+
} cuMemcpyDtoDAsync_params;
|
| 2298 |
+
|
| 2299 |
+
typedef struct cuMemcpy2DAsync_params_st {
|
| 2300 |
+
const CUDA_MEMCPY2D_v1 *pCopy;
|
| 2301 |
+
CUstream hStream;
|
| 2302 |
+
} cuMemcpy2DAsync_params;
|
| 2303 |
+
|
| 2304 |
+
typedef struct cuMemcpy3DAsync_params_st {
|
| 2305 |
+
const CUDA_MEMCPY3D_v1 *pCopy;
|
| 2306 |
+
CUstream hStream;
|
| 2307 |
+
} cuMemcpy3DAsync_params;
|
| 2308 |
+
|
| 2309 |
+
typedef struct cuMemsetD8_params_st {
|
| 2310 |
+
CUdeviceptr_v1 dstDevice;
|
| 2311 |
+
unsigned char uc;
|
| 2312 |
+
unsigned int N;
|
| 2313 |
+
} cuMemsetD8_params;
|
| 2314 |
+
|
| 2315 |
+
typedef struct cuMemsetD16_params_st {
|
| 2316 |
+
CUdeviceptr_v1 dstDevice;
|
| 2317 |
+
unsigned short us;
|
| 2318 |
+
unsigned int N;
|
| 2319 |
+
} cuMemsetD16_params;
|
| 2320 |
+
|
| 2321 |
+
typedef struct cuMemsetD32_params_st {
|
| 2322 |
+
CUdeviceptr_v1 dstDevice;
|
| 2323 |
+
unsigned int ui;
|
| 2324 |
+
unsigned int N;
|
| 2325 |
+
} cuMemsetD32_params;
|
| 2326 |
+
|
| 2327 |
+
typedef struct cuMemsetD2D8_params_st {
|
| 2328 |
+
CUdeviceptr_v1 dstDevice;
|
| 2329 |
+
unsigned int dstPitch;
|
| 2330 |
+
unsigned char uc;
|
| 2331 |
+
unsigned int Width;
|
| 2332 |
+
unsigned int Height;
|
| 2333 |
+
} cuMemsetD2D8_params;
|
| 2334 |
+
|
| 2335 |
+
typedef struct cuMemsetD2D16_params_st {
|
| 2336 |
+
CUdeviceptr_v1 dstDevice;
|
| 2337 |
+
unsigned int dstPitch;
|
| 2338 |
+
unsigned short us;
|
| 2339 |
+
unsigned int Width;
|
| 2340 |
+
unsigned int Height;
|
| 2341 |
+
} cuMemsetD2D16_params;
|
| 2342 |
+
|
| 2343 |
+
typedef struct cuMemsetD2D32_params_st {
|
| 2344 |
+
CUdeviceptr_v1 dstDevice;
|
| 2345 |
+
unsigned int dstPitch;
|
| 2346 |
+
unsigned int ui;
|
| 2347 |
+
unsigned int Width;
|
| 2348 |
+
unsigned int Height;
|
| 2349 |
+
} cuMemsetD2D32_params;
|
| 2350 |
+
|
| 2351 |
+
typedef struct cuArrayCreate_params_st {
|
| 2352 |
+
CUarray *pHandle;
|
| 2353 |
+
const CUDA_ARRAY_DESCRIPTOR_v1 *pAllocateArray;
|
| 2354 |
+
} cuArrayCreate_params;
|
| 2355 |
+
|
| 2356 |
+
typedef struct cuArrayGetDescriptor_params_st {
|
| 2357 |
+
CUDA_ARRAY_DESCRIPTOR_v1 *pArrayDescriptor;
|
| 2358 |
+
CUarray hArray;
|
| 2359 |
+
} cuArrayGetDescriptor_params;
|
| 2360 |
+
|
| 2361 |
+
typedef struct cuArray3DCreate_params_st {
|
| 2362 |
+
CUarray *pHandle;
|
| 2363 |
+
const CUDA_ARRAY3D_DESCRIPTOR_v1 *pAllocateArray;
|
| 2364 |
+
} cuArray3DCreate_params;
|
| 2365 |
+
|
| 2366 |
+
typedef struct cuArray3DGetDescriptor_params_st {
|
| 2367 |
+
CUDA_ARRAY3D_DESCRIPTOR_v1 *pArrayDescriptor;
|
| 2368 |
+
CUarray hArray;
|
| 2369 |
+
} cuArray3DGetDescriptor_params;
|
| 2370 |
+
|
| 2371 |
+
typedef struct cuTexRefSetAddress_params_st {
|
| 2372 |
+
unsigned int *ByteOffset;
|
| 2373 |
+
CUtexref hTexRef;
|
| 2374 |
+
CUdeviceptr_v1 dptr;
|
| 2375 |
+
unsigned int bytes;
|
| 2376 |
+
} cuTexRefSetAddress_params;
|
| 2377 |
+
|
| 2378 |
+
typedef struct cuTexRefSetAddress2D_params_st {
|
| 2379 |
+
CUtexref hTexRef;
|
| 2380 |
+
const CUDA_ARRAY_DESCRIPTOR_v1 *desc;
|
| 2381 |
+
CUdeviceptr_v1 dptr;
|
| 2382 |
+
unsigned int Pitch;
|
| 2383 |
+
} cuTexRefSetAddress2D_params;
|
| 2384 |
+
|
| 2385 |
+
typedef struct cuTexRefGetAddress_params_st {
|
| 2386 |
+
CUdeviceptr_v1 *pdptr;
|
| 2387 |
+
CUtexref hTexRef;
|
| 2388 |
+
} cuTexRefGetAddress_params;
|
| 2389 |
+
|
| 2390 |
+
typedef struct cuGraphicsResourceGetMappedPointer_params_st {
|
| 2391 |
+
CUdeviceptr_v1 *pDevPtr;
|
| 2392 |
+
unsigned int *pSize;
|
| 2393 |
+
CUgraphicsResource resource;
|
| 2394 |
+
} cuGraphicsResourceGetMappedPointer_params;
|
| 2395 |
+
|
| 2396 |
+
typedef struct cuCtxDestroy_params_st {
|
| 2397 |
+
CUcontext ctx;
|
| 2398 |
+
} cuCtxDestroy_params;
|
| 2399 |
+
|
| 2400 |
+
typedef struct cuCtxPopCurrent_params_st {
|
| 2401 |
+
CUcontext *pctx;
|
| 2402 |
+
} cuCtxPopCurrent_params;
|
| 2403 |
+
|
| 2404 |
+
typedef struct cuCtxPushCurrent_params_st {
|
| 2405 |
+
CUcontext ctx;
|
| 2406 |
+
} cuCtxPushCurrent_params;
|
| 2407 |
+
|
| 2408 |
+
typedef struct cuStreamDestroy_params_st {
|
| 2409 |
+
CUstream hStream;
|
| 2410 |
+
} cuStreamDestroy_params;
|
| 2411 |
+
|
| 2412 |
+
typedef struct cuEventDestroy_params_st {
|
| 2413 |
+
CUevent hEvent;
|
| 2414 |
+
} cuEventDestroy_params;
|
| 2415 |
+
|
| 2416 |
+
typedef struct cuDevicePrimaryCtxRelease_params_st {
|
| 2417 |
+
CUdevice dev;
|
| 2418 |
+
} cuDevicePrimaryCtxRelease_params;
|
| 2419 |
+
|
| 2420 |
+
typedef struct cuDevicePrimaryCtxReset_params_st {
|
| 2421 |
+
CUdevice dev;
|
| 2422 |
+
} cuDevicePrimaryCtxReset_params;
|
| 2423 |
+
|
| 2424 |
+
typedef struct cuDevicePrimaryCtxSetFlags_params_st {
|
| 2425 |
+
CUdevice dev;
|
| 2426 |
+
unsigned int flags;
|
| 2427 |
+
} cuDevicePrimaryCtxSetFlags_params;
|
| 2428 |
+
|
| 2429 |
+
typedef struct cuMemcpyHtoD_v2_params_st {
|
| 2430 |
+
CUdeviceptr dstDevice;
|
| 2431 |
+
const void *srcHost;
|
| 2432 |
+
size_t ByteCount;
|
| 2433 |
+
} cuMemcpyHtoD_v2_params;
|
| 2434 |
+
|
| 2435 |
+
typedef struct cuMemcpyDtoH_v2_params_st {
|
| 2436 |
+
void *dstHost;
|
| 2437 |
+
CUdeviceptr srcDevice;
|
| 2438 |
+
size_t ByteCount;
|
| 2439 |
+
} cuMemcpyDtoH_v2_params;
|
| 2440 |
+
|
| 2441 |
+
typedef struct cuMemcpyDtoD_v2_params_st {
|
| 2442 |
+
CUdeviceptr dstDevice;
|
| 2443 |
+
CUdeviceptr srcDevice;
|
| 2444 |
+
size_t ByteCount;
|
| 2445 |
+
} cuMemcpyDtoD_v2_params;
|
| 2446 |
+
|
| 2447 |
+
typedef struct cuMemcpyDtoA_v2_params_st {
|
| 2448 |
+
CUarray dstArray;
|
| 2449 |
+
size_t dstOffset;
|
| 2450 |
+
CUdeviceptr srcDevice;
|
| 2451 |
+
size_t ByteCount;
|
| 2452 |
+
} cuMemcpyDtoA_v2_params;
|
| 2453 |
+
|
| 2454 |
+
typedef struct cuMemcpyAtoD_v2_params_st {
|
| 2455 |
+
CUdeviceptr dstDevice;
|
| 2456 |
+
CUarray srcArray;
|
| 2457 |
+
size_t srcOffset;
|
| 2458 |
+
size_t ByteCount;
|
| 2459 |
+
} cuMemcpyAtoD_v2_params;
|
| 2460 |
+
|
| 2461 |
+
typedef struct cuMemcpyHtoA_v2_params_st {
|
| 2462 |
+
CUarray dstArray;
|
| 2463 |
+
size_t dstOffset;
|
| 2464 |
+
const void *srcHost;
|
| 2465 |
+
size_t ByteCount;
|
| 2466 |
+
} cuMemcpyHtoA_v2_params;
|
| 2467 |
+
|
| 2468 |
+
typedef struct cuMemcpyAtoH_v2_params_st {
|
| 2469 |
+
void *dstHost;
|
| 2470 |
+
CUarray srcArray;
|
| 2471 |
+
size_t srcOffset;
|
| 2472 |
+
size_t ByteCount;
|
| 2473 |
+
} cuMemcpyAtoH_v2_params;
|
| 2474 |
+
|
| 2475 |
+
typedef struct cuMemcpyAtoA_v2_params_st {
|
| 2476 |
+
CUarray dstArray;
|
| 2477 |
+
size_t dstOffset;
|
| 2478 |
+
CUarray srcArray;
|
| 2479 |
+
size_t srcOffset;
|
| 2480 |
+
size_t ByteCount;
|
| 2481 |
+
} cuMemcpyAtoA_v2_params;
|
| 2482 |
+
|
| 2483 |
+
typedef struct cuMemcpyHtoAAsync_v2_params_st {
|
| 2484 |
+
CUarray dstArray;
|
| 2485 |
+
size_t dstOffset;
|
| 2486 |
+
const void *srcHost;
|
| 2487 |
+
size_t ByteCount;
|
| 2488 |
+
CUstream hStream;
|
| 2489 |
+
} cuMemcpyHtoAAsync_v2_params;
|
| 2490 |
+
|
| 2491 |
+
typedef struct cuMemcpyAtoHAsync_v2_params_st {
|
| 2492 |
+
void *dstHost;
|
| 2493 |
+
CUarray srcArray;
|
| 2494 |
+
size_t srcOffset;
|
| 2495 |
+
size_t ByteCount;
|
| 2496 |
+
CUstream hStream;
|
| 2497 |
+
} cuMemcpyAtoHAsync_v2_params;
|
| 2498 |
+
|
| 2499 |
+
typedef struct cuMemcpy2D_v2_params_st {
|
| 2500 |
+
const CUDA_MEMCPY2D *pCopy;
|
| 2501 |
+
} cuMemcpy2D_v2_params;
|
| 2502 |
+
|
| 2503 |
+
typedef struct cuMemcpy2DUnaligned_v2_params_st {
|
| 2504 |
+
const CUDA_MEMCPY2D *pCopy;
|
| 2505 |
+
} cuMemcpy2DUnaligned_v2_params;
|
| 2506 |
+
|
| 2507 |
+
typedef struct cuMemcpy3D_v2_params_st {
|
| 2508 |
+
const CUDA_MEMCPY3D *pCopy;
|
| 2509 |
+
} cuMemcpy3D_v2_params;
|
| 2510 |
+
|
| 2511 |
+
typedef struct cuMemcpyHtoDAsync_v2_params_st {
|
| 2512 |
+
CUdeviceptr dstDevice;
|
| 2513 |
+
const void *srcHost;
|
| 2514 |
+
size_t ByteCount;
|
| 2515 |
+
CUstream hStream;
|
| 2516 |
+
} cuMemcpyHtoDAsync_v2_params;
|
| 2517 |
+
|
| 2518 |
+
typedef struct cuMemcpyDtoHAsync_v2_params_st {
|
| 2519 |
+
void *dstHost;
|
| 2520 |
+
CUdeviceptr srcDevice;
|
| 2521 |
+
size_t ByteCount;
|
| 2522 |
+
CUstream hStream;
|
| 2523 |
+
} cuMemcpyDtoHAsync_v2_params;
|
| 2524 |
+
|
| 2525 |
+
typedef struct cuMemcpyDtoDAsync_v2_params_st {
|
| 2526 |
+
CUdeviceptr dstDevice;
|
| 2527 |
+
CUdeviceptr srcDevice;
|
| 2528 |
+
size_t ByteCount;
|
| 2529 |
+
CUstream hStream;
|
| 2530 |
+
} cuMemcpyDtoDAsync_v2_params;
|
| 2531 |
+
|
| 2532 |
+
typedef struct cuMemcpy2DAsync_v2_params_st {
|
| 2533 |
+
const CUDA_MEMCPY2D *pCopy;
|
| 2534 |
+
CUstream hStream;
|
| 2535 |
+
} cuMemcpy2DAsync_v2_params;
|
| 2536 |
+
|
| 2537 |
+
typedef struct cuMemcpy3DAsync_v2_params_st {
|
| 2538 |
+
const CUDA_MEMCPY3D *pCopy;
|
| 2539 |
+
CUstream hStream;
|
| 2540 |
+
} cuMemcpy3DAsync_v2_params;
|
| 2541 |
+
|
| 2542 |
+
typedef struct cuMemsetD8_v2_params_st {
|
| 2543 |
+
CUdeviceptr dstDevice;
|
| 2544 |
+
unsigned char uc;
|
| 2545 |
+
size_t N;
|
| 2546 |
+
} cuMemsetD8_v2_params;
|
| 2547 |
+
|
| 2548 |
+
typedef struct cuMemsetD16_v2_params_st {
|
| 2549 |
+
CUdeviceptr dstDevice;
|
| 2550 |
+
unsigned short us;
|
| 2551 |
+
size_t N;
|
| 2552 |
+
} cuMemsetD16_v2_params;
|
| 2553 |
+
|
| 2554 |
+
typedef struct cuMemsetD32_v2_params_st {
|
| 2555 |
+
CUdeviceptr dstDevice;
|
| 2556 |
+
unsigned int ui;
|
| 2557 |
+
size_t N;
|
| 2558 |
+
} cuMemsetD32_v2_params;
|
| 2559 |
+
|
| 2560 |
+
typedef struct cuMemsetD2D8_v2_params_st {
|
| 2561 |
+
CUdeviceptr dstDevice;
|
| 2562 |
+
size_t dstPitch;
|
| 2563 |
+
unsigned char uc;
|
| 2564 |
+
size_t Width;
|
| 2565 |
+
size_t Height;
|
| 2566 |
+
} cuMemsetD2D8_v2_params;
|
| 2567 |
+
|
| 2568 |
+
typedef struct cuMemsetD2D16_v2_params_st {
|
| 2569 |
+
CUdeviceptr dstDevice;
|
| 2570 |
+
size_t dstPitch;
|
| 2571 |
+
unsigned short us;
|
| 2572 |
+
size_t Width;
|
| 2573 |
+
size_t Height;
|
| 2574 |
+
} cuMemsetD2D16_v2_params;
|
| 2575 |
+
|
| 2576 |
+
typedef struct cuMemsetD2D32_v2_params_st {
|
| 2577 |
+
CUdeviceptr dstDevice;
|
| 2578 |
+
size_t dstPitch;
|
| 2579 |
+
unsigned int ui;
|
| 2580 |
+
size_t Width;
|
| 2581 |
+
size_t Height;
|
| 2582 |
+
} cuMemsetD2D32_v2_params;
|
| 2583 |
+
|
| 2584 |
+
typedef struct cuMemcpy_params_st {
|
| 2585 |
+
CUdeviceptr dst;
|
| 2586 |
+
CUdeviceptr src;
|
| 2587 |
+
size_t ByteCount;
|
| 2588 |
+
} cuMemcpy_params;
|
| 2589 |
+
|
| 2590 |
+
typedef struct cuMemcpyAsync_params_st {
|
| 2591 |
+
CUdeviceptr dst;
|
| 2592 |
+
CUdeviceptr src;
|
| 2593 |
+
size_t ByteCount;
|
| 2594 |
+
CUstream hStream;
|
| 2595 |
+
} cuMemcpyAsync_params;
|
| 2596 |
+
|
| 2597 |
+
typedef struct cuMemcpyPeer_params_st {
|
| 2598 |
+
CUdeviceptr dstDevice;
|
| 2599 |
+
CUcontext dstContext;
|
| 2600 |
+
CUdeviceptr srcDevice;
|
| 2601 |
+
CUcontext srcContext;
|
| 2602 |
+
size_t ByteCount;
|
| 2603 |
+
} cuMemcpyPeer_params;
|
| 2604 |
+
|
| 2605 |
+
typedef struct cuMemcpyPeerAsync_params_st {
|
| 2606 |
+
CUdeviceptr dstDevice;
|
| 2607 |
+
CUcontext dstContext;
|
| 2608 |
+
CUdeviceptr srcDevice;
|
| 2609 |
+
CUcontext srcContext;
|
| 2610 |
+
size_t ByteCount;
|
| 2611 |
+
CUstream hStream;
|
| 2612 |
+
} cuMemcpyPeerAsync_params;
|
| 2613 |
+
|
| 2614 |
+
typedef struct cuMemcpy3DPeer_params_st {
|
| 2615 |
+
const CUDA_MEMCPY3D_PEER *pCopy;
|
| 2616 |
+
} cuMemcpy3DPeer_params;
|
| 2617 |
+
|
| 2618 |
+
typedef struct cuMemcpy3DPeerAsync_params_st {
|
| 2619 |
+
const CUDA_MEMCPY3D_PEER *pCopy;
|
| 2620 |
+
CUstream hStream;
|
| 2621 |
+
} cuMemcpy3DPeerAsync_params;
|
| 2622 |
+
|
| 2623 |
+
typedef struct cuMemsetD8Async_params_st {
|
| 2624 |
+
CUdeviceptr dstDevice;
|
| 2625 |
+
unsigned char uc;
|
| 2626 |
+
size_t N;
|
| 2627 |
+
CUstream hStream;
|
| 2628 |
+
} cuMemsetD8Async_params;
|
| 2629 |
+
|
| 2630 |
+
typedef struct cuMemsetD16Async_params_st {
|
| 2631 |
+
CUdeviceptr dstDevice;
|
| 2632 |
+
unsigned short us;
|
| 2633 |
+
size_t N;
|
| 2634 |
+
CUstream hStream;
|
| 2635 |
+
} cuMemsetD16Async_params;
|
| 2636 |
+
|
| 2637 |
+
typedef struct cuMemsetD32Async_params_st {
|
| 2638 |
+
CUdeviceptr dstDevice;
|
| 2639 |
+
unsigned int ui;
|
| 2640 |
+
size_t N;
|
| 2641 |
+
CUstream hStream;
|
| 2642 |
+
} cuMemsetD32Async_params;
|
| 2643 |
+
|
| 2644 |
+
typedef struct cuMemsetD2D8Async_params_st {
|
| 2645 |
+
CUdeviceptr dstDevice;
|
| 2646 |
+
size_t dstPitch;
|
| 2647 |
+
unsigned char uc;
|
| 2648 |
+
size_t Width;
|
| 2649 |
+
size_t Height;
|
| 2650 |
+
CUstream hStream;
|
| 2651 |
+
} cuMemsetD2D8Async_params;
|
| 2652 |
+
|
| 2653 |
+
typedef struct cuMemsetD2D16Async_params_st {
|
| 2654 |
+
CUdeviceptr dstDevice;
|
| 2655 |
+
size_t dstPitch;
|
| 2656 |
+
unsigned short us;
|
| 2657 |
+
size_t Width;
|
| 2658 |
+
size_t Height;
|
| 2659 |
+
CUstream hStream;
|
| 2660 |
+
} cuMemsetD2D16Async_params;
|
| 2661 |
+
|
| 2662 |
+
typedef struct cuMemsetD2D32Async_params_st {
|
| 2663 |
+
CUdeviceptr dstDevice;
|
| 2664 |
+
size_t dstPitch;
|
| 2665 |
+
unsigned int ui;
|
| 2666 |
+
size_t Width;
|
| 2667 |
+
size_t Height;
|
| 2668 |
+
CUstream hStream;
|
| 2669 |
+
} cuMemsetD2D32Async_params;
|
| 2670 |
+
|
| 2671 |
+
typedef struct cuStreamGetPriority_params_st {
|
| 2672 |
+
CUstream hStream;
|
| 2673 |
+
int *priority;
|
| 2674 |
+
} cuStreamGetPriority_params;
|
| 2675 |
+
|
| 2676 |
+
typedef struct cuStreamGetFlags_params_st {
|
| 2677 |
+
CUstream hStream;
|
| 2678 |
+
unsigned int *flags;
|
| 2679 |
+
} cuStreamGetFlags_params;
|
| 2680 |
+
|
| 2681 |
+
typedef struct cuStreamGetCtx_params_st {
|
| 2682 |
+
CUstream hStream;
|
| 2683 |
+
CUcontext *pctx;
|
| 2684 |
+
} cuStreamGetCtx_params;
|
| 2685 |
+
|
| 2686 |
+
typedef struct cuStreamWaitEvent_params_st {
|
| 2687 |
+
CUstream hStream;
|
| 2688 |
+
CUevent hEvent;
|
| 2689 |
+
unsigned int Flags;
|
| 2690 |
+
} cuStreamWaitEvent_params;
|
| 2691 |
+
|
| 2692 |
+
typedef struct cuStreamAddCallback_params_st {
|
| 2693 |
+
CUstream hStream;
|
| 2694 |
+
CUstreamCallback callback;
|
| 2695 |
+
void *userData;
|
| 2696 |
+
unsigned int flags;
|
| 2697 |
+
} cuStreamAddCallback_params;
|
| 2698 |
+
|
| 2699 |
+
typedef struct cuStreamAttachMemAsync_params_st {
|
| 2700 |
+
CUstream hStream;
|
| 2701 |
+
CUdeviceptr dptr;
|
| 2702 |
+
size_t length;
|
| 2703 |
+
unsigned int flags;
|
| 2704 |
+
} cuStreamAttachMemAsync_params;
|
| 2705 |
+
|
| 2706 |
+
typedef struct cuStreamQuery_params_st {
|
| 2707 |
+
CUstream hStream;
|
| 2708 |
+
} cuStreamQuery_params;
|
| 2709 |
+
|
| 2710 |
+
typedef struct cuStreamSynchronize_params_st {
|
| 2711 |
+
CUstream hStream;
|
| 2712 |
+
} cuStreamSynchronize_params;
|
| 2713 |
+
|
| 2714 |
+
typedef struct cuEventRecord_params_st {
|
| 2715 |
+
CUevent hEvent;
|
| 2716 |
+
CUstream hStream;
|
| 2717 |
+
} cuEventRecord_params;
|
| 2718 |
+
|
| 2719 |
+
typedef struct cuEventRecordWithFlags_params_st {
|
| 2720 |
+
CUevent hEvent;
|
| 2721 |
+
CUstream hStream;
|
| 2722 |
+
unsigned int flags;
|
| 2723 |
+
} cuEventRecordWithFlags_params;
|
| 2724 |
+
|
| 2725 |
+
typedef struct cuLaunchKernel_params_st {
|
| 2726 |
+
CUfunction f;
|
| 2727 |
+
unsigned int gridDimX;
|
| 2728 |
+
unsigned int gridDimY;
|
| 2729 |
+
unsigned int gridDimZ;
|
| 2730 |
+
unsigned int blockDimX;
|
| 2731 |
+
unsigned int blockDimY;
|
| 2732 |
+
unsigned int blockDimZ;
|
| 2733 |
+
unsigned int sharedMemBytes;
|
| 2734 |
+
CUstream hStream;
|
| 2735 |
+
void **kernelParams;
|
| 2736 |
+
void **extra;
|
| 2737 |
+
} cuLaunchKernel_params;
|
| 2738 |
+
|
| 2739 |
+
typedef struct cuLaunchKernelEx_params_st {
|
| 2740 |
+
const CUlaunchConfig *config;
|
| 2741 |
+
CUfunction f;
|
| 2742 |
+
void **kernelParams;
|
| 2743 |
+
void **extra;
|
| 2744 |
+
} cuLaunchKernelEx_params;
|
| 2745 |
+
|
| 2746 |
+
typedef struct cuLaunchHostFunc_params_st {
|
| 2747 |
+
CUstream hStream;
|
| 2748 |
+
CUhostFn fn;
|
| 2749 |
+
void *userData;
|
| 2750 |
+
} cuLaunchHostFunc_params;
|
| 2751 |
+
|
| 2752 |
+
typedef struct cuGraphicsMapResources_params_st {
|
| 2753 |
+
unsigned int count;
|
| 2754 |
+
CUgraphicsResource *resources;
|
| 2755 |
+
CUstream hStream;
|
| 2756 |
+
} cuGraphicsMapResources_params;
|
| 2757 |
+
|
| 2758 |
+
typedef struct cuGraphicsUnmapResources_params_st {
|
| 2759 |
+
unsigned int count;
|
| 2760 |
+
CUgraphicsResource *resources;
|
| 2761 |
+
CUstream hStream;
|
| 2762 |
+
} cuGraphicsUnmapResources_params;
|
| 2763 |
+
|
| 2764 |
+
typedef struct cuStreamWriteValue32_params_st {
|
| 2765 |
+
CUstream stream;
|
| 2766 |
+
CUdeviceptr addr;
|
| 2767 |
+
cuuint32_t value;
|
| 2768 |
+
unsigned int flags;
|
| 2769 |
+
} cuStreamWriteValue32_params;
|
| 2770 |
+
|
| 2771 |
+
typedef struct cuStreamWaitValue32_params_st {
|
| 2772 |
+
CUstream stream;
|
| 2773 |
+
CUdeviceptr addr;
|
| 2774 |
+
cuuint32_t value;
|
| 2775 |
+
unsigned int flags;
|
| 2776 |
+
} cuStreamWaitValue32_params;
|
| 2777 |
+
|
| 2778 |
+
typedef struct cuStreamWriteValue64_params_st {
|
| 2779 |
+
CUstream stream;
|
| 2780 |
+
CUdeviceptr addr;
|
| 2781 |
+
cuuint64_t value;
|
| 2782 |
+
unsigned int flags;
|
| 2783 |
+
} cuStreamWriteValue64_params;
|
| 2784 |
+
|
| 2785 |
+
typedef struct cuStreamWaitValue64_params_st {
|
| 2786 |
+
CUstream stream;
|
| 2787 |
+
CUdeviceptr addr;
|
| 2788 |
+
cuuint64_t value;
|
| 2789 |
+
unsigned int flags;
|
| 2790 |
+
} cuStreamWaitValue64_params;
|
| 2791 |
+
|
| 2792 |
+
typedef struct cuStreamBatchMemOp_params_st {
|
| 2793 |
+
CUstream stream;
|
| 2794 |
+
unsigned int count;
|
| 2795 |
+
CUstreamBatchMemOpParams *paramArray;
|
| 2796 |
+
unsigned int flags;
|
| 2797 |
+
} cuStreamBatchMemOp_params;
|
| 2798 |
+
|
| 2799 |
+
typedef struct cuMemPrefetchAsync_params_st {
|
| 2800 |
+
CUdeviceptr devPtr;
|
| 2801 |
+
size_t count;
|
| 2802 |
+
CUdevice dstDevice;
|
| 2803 |
+
CUstream hStream;
|
| 2804 |
+
} cuMemPrefetchAsync_params;
|
| 2805 |
+
|
| 2806 |
+
typedef struct cuLaunchCooperativeKernel_params_st {
|
| 2807 |
+
CUfunction f;
|
| 2808 |
+
unsigned int gridDimX;
|
| 2809 |
+
unsigned int gridDimY;
|
| 2810 |
+
unsigned int gridDimZ;
|
| 2811 |
+
unsigned int blockDimX;
|
| 2812 |
+
unsigned int blockDimY;
|
| 2813 |
+
unsigned int blockDimZ;
|
| 2814 |
+
unsigned int sharedMemBytes;
|
| 2815 |
+
CUstream hStream;
|
| 2816 |
+
void **kernelParams;
|
| 2817 |
+
} cuLaunchCooperativeKernel_params;
|
| 2818 |
+
|
| 2819 |
+
typedef struct cuSignalExternalSemaphoresAsync_params_st {
|
| 2820 |
+
const CUexternalSemaphore *extSemArray;
|
| 2821 |
+
const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray;
|
| 2822 |
+
unsigned int numExtSems;
|
| 2823 |
+
CUstream stream;
|
| 2824 |
+
} cuSignalExternalSemaphoresAsync_params;
|
| 2825 |
+
|
| 2826 |
+
typedef struct cuWaitExternalSemaphoresAsync_params_st {
|
| 2827 |
+
const CUexternalSemaphore *extSemArray;
|
| 2828 |
+
const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray;
|
| 2829 |
+
unsigned int numExtSems;
|
| 2830 |
+
CUstream stream;
|
| 2831 |
+
} cuWaitExternalSemaphoresAsync_params;
|
| 2832 |
+
|
| 2833 |
+
typedef struct cuStreamBeginCapture_params_st {
|
| 2834 |
+
CUstream hStream;
|
| 2835 |
+
} cuStreamBeginCapture_params;
|
| 2836 |
+
|
| 2837 |
+
typedef struct cuStreamBeginCapture_ptsz_params_st {
|
| 2838 |
+
CUstream hStream;
|
| 2839 |
+
} cuStreamBeginCapture_ptsz_params;
|
| 2840 |
+
|
| 2841 |
+
typedef struct cuStreamBeginCapture_v2_params_st {
|
| 2842 |
+
CUstream hStream;
|
| 2843 |
+
CUstreamCaptureMode mode;
|
| 2844 |
+
} cuStreamBeginCapture_v2_params;
|
| 2845 |
+
|
| 2846 |
+
typedef struct cuStreamEndCapture_params_st {
|
| 2847 |
+
CUstream hStream;
|
| 2848 |
+
CUgraph *phGraph;
|
| 2849 |
+
} cuStreamEndCapture_params;
|
| 2850 |
+
|
| 2851 |
+
typedef struct cuStreamIsCapturing_params_st {
|
| 2852 |
+
CUstream hStream;
|
| 2853 |
+
CUstreamCaptureStatus *captureStatus;
|
| 2854 |
+
} cuStreamIsCapturing_params;
|
| 2855 |
+
|
| 2856 |
+
typedef struct cuStreamGetCaptureInfo_params_st {
|
| 2857 |
+
CUstream hStream;
|
| 2858 |
+
CUstreamCaptureStatus *captureStatus_out;
|
| 2859 |
+
cuuint64_t *id_out;
|
| 2860 |
+
} cuStreamGetCaptureInfo_params;
|
| 2861 |
+
|
| 2862 |
+
typedef struct cuStreamGetCaptureInfo_v2_params_st {
|
| 2863 |
+
CUstream hStream;
|
| 2864 |
+
CUstreamCaptureStatus *captureStatus_out;
|
| 2865 |
+
cuuint64_t *id_out;
|
| 2866 |
+
CUgraph *graph_out;
|
| 2867 |
+
const CUgraphNode **dependencies_out;
|
| 2868 |
+
size_t *numDependencies_out;
|
| 2869 |
+
} cuStreamGetCaptureInfo_v2_params;
|
| 2870 |
+
|
| 2871 |
+
typedef struct cuGraphUpload_params_st {
|
| 2872 |
+
CUgraphExec hGraph;
|
| 2873 |
+
CUstream hStream;
|
| 2874 |
+
} cuGraphUpload_params;
|
| 2875 |
+
|
| 2876 |
+
typedef struct cuGraphLaunch_params_st {
|
| 2877 |
+
CUgraphExec hGraph;
|
| 2878 |
+
CUstream hStream;
|
| 2879 |
+
} cuGraphLaunch_params;
|
| 2880 |
+
|
| 2881 |
+
typedef struct cuStreamCopyAttributes_params_st {
|
| 2882 |
+
CUstream dstStream;
|
| 2883 |
+
CUstream srcStream;
|
| 2884 |
+
} cuStreamCopyAttributes_params;
|
| 2885 |
+
|
| 2886 |
+
typedef struct cuStreamGetAttribute_params_st {
|
| 2887 |
+
CUstream hStream;
|
| 2888 |
+
CUstreamAttrID attr;
|
| 2889 |
+
CUstreamAttrValue *value;
|
| 2890 |
+
} cuStreamGetAttribute_params;
|
| 2891 |
+
|
| 2892 |
+
typedef struct cuStreamSetAttribute_params_st {
|
| 2893 |
+
CUstream hStream;
|
| 2894 |
+
CUstreamAttrID attr;
|
| 2895 |
+
const CUstreamAttrValue *param;
|
| 2896 |
+
} cuStreamSetAttribute_params;
|
| 2897 |
+
|
| 2898 |
+
typedef struct cuIpcOpenMemHandle_params_st {
|
| 2899 |
+
CUdeviceptr *pdptr;
|
| 2900 |
+
CUipcMemHandle handle;
|
| 2901 |
+
unsigned int Flags;
|
| 2902 |
+
} cuIpcOpenMemHandle_params;
|
| 2903 |
+
|
| 2904 |
+
typedef struct cuGraphInstantiate_params_st {
|
| 2905 |
+
CUgraphExec *phGraphExec;
|
| 2906 |
+
CUgraph hGraph;
|
| 2907 |
+
CUgraphNode *phErrorNode;
|
| 2908 |
+
char *logBuffer;
|
| 2909 |
+
size_t bufferSize;
|
| 2910 |
+
} cuGraphInstantiate_params;
|
| 2911 |
+
|
| 2912 |
+
typedef struct cuMemMapArrayAsync_params_st {
|
| 2913 |
+
CUarrayMapInfo *mapInfoList;
|
| 2914 |
+
unsigned int count;
|
| 2915 |
+
CUstream hStream;
|
| 2916 |
+
} cuMemMapArrayAsync_params;
|
| 2917 |
+
|
| 2918 |
+
typedef struct cuMemFreeAsync_params_st {
|
| 2919 |
+
CUdeviceptr dptr;
|
| 2920 |
+
CUstream hStream;
|
| 2921 |
+
} cuMemFreeAsync_params;
|
| 2922 |
+
|
| 2923 |
+
typedef struct cuMemAllocAsync_params_st {
|
| 2924 |
+
CUdeviceptr *dptr;
|
| 2925 |
+
size_t bytesize;
|
| 2926 |
+
CUstream hStream;
|
| 2927 |
+
} cuMemAllocAsync_params;
|
| 2928 |
+
|
| 2929 |
+
typedef struct cuMemAllocFromPoolAsync_params_st {
|
| 2930 |
+
CUdeviceptr *dptr;
|
| 2931 |
+
size_t bytesize;
|
| 2932 |
+
CUmemoryPool pool;
|
| 2933 |
+
CUstream hStream;
|
| 2934 |
+
} cuMemAllocFromPoolAsync_params;
|
| 2935 |
+
|
| 2936 |
+
typedef struct cuStreamUpdateCaptureDependencies_params_st {
|
| 2937 |
+
CUstream hStream;
|
| 2938 |
+
CUgraphNode *dependencies;
|
| 2939 |
+
size_t numDependencies;
|
| 2940 |
+
unsigned int flags;
|
| 2941 |
+
} cuStreamUpdateCaptureDependencies_params;
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h
ADDED
|
@@ -0,0 +1,2139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// This file is generated. Any changes you make will be lost during the next clean build.
|
| 2 |
+
|
| 3 |
+
// CUDA public interface, for type definitions and api function prototypes
|
| 4 |
+
#include "cuda_runtime_api.h"
|
| 5 |
+
|
| 6 |
+
// *************************************************************************
|
| 7 |
+
// Definitions of structs to hold parameters for each function
|
| 8 |
+
// *************************************************************************
|
| 9 |
+
|
| 10 |
+
// Currently used parameter trace structures
|
| 11 |
+
typedef struct cudaDeviceSetLimit_v3020_params_st {
|
| 12 |
+
enum cudaLimit limit;
|
| 13 |
+
size_t value;
|
| 14 |
+
} cudaDeviceSetLimit_v3020_params;
|
| 15 |
+
|
| 16 |
+
typedef struct cudaDeviceGetLimit_v3020_params_st {
|
| 17 |
+
size_t *pValue;
|
| 18 |
+
enum cudaLimit limit;
|
| 19 |
+
} cudaDeviceGetLimit_v3020_params;
|
| 20 |
+
|
| 21 |
+
typedef struct cudaDeviceGetTexture1DLinearMaxWidth_v11010_params_st {
|
| 22 |
+
size_t *maxWidthInElements;
|
| 23 |
+
const struct cudaChannelFormatDesc *fmtDesc;
|
| 24 |
+
int device;
|
| 25 |
+
} cudaDeviceGetTexture1DLinearMaxWidth_v11010_params;
|
| 26 |
+
|
| 27 |
+
typedef struct cudaDeviceGetCacheConfig_v3020_params_st {
|
| 28 |
+
enum cudaFuncCache *pCacheConfig;
|
| 29 |
+
} cudaDeviceGetCacheConfig_v3020_params;
|
| 30 |
+
|
| 31 |
+
typedef struct cudaDeviceGetStreamPriorityRange_v5050_params_st {
|
| 32 |
+
int *leastPriority;
|
| 33 |
+
int *greatestPriority;
|
| 34 |
+
} cudaDeviceGetStreamPriorityRange_v5050_params;
|
| 35 |
+
|
| 36 |
+
typedef struct cudaDeviceSetCacheConfig_v3020_params_st {
|
| 37 |
+
enum cudaFuncCache cacheConfig;
|
| 38 |
+
} cudaDeviceSetCacheConfig_v3020_params;
|
| 39 |
+
|
| 40 |
+
typedef struct cudaDeviceGetSharedMemConfig_v4020_params_st {
|
| 41 |
+
enum cudaSharedMemConfig *pConfig;
|
| 42 |
+
} cudaDeviceGetSharedMemConfig_v4020_params;
|
| 43 |
+
|
| 44 |
+
typedef struct cudaDeviceSetSharedMemConfig_v4020_params_st {
|
| 45 |
+
enum cudaSharedMemConfig config;
|
| 46 |
+
} cudaDeviceSetSharedMemConfig_v4020_params;
|
| 47 |
+
|
| 48 |
+
typedef struct cudaDeviceGetByPCIBusId_v4010_params_st {
|
| 49 |
+
int *device;
|
| 50 |
+
const char *pciBusId;
|
| 51 |
+
} cudaDeviceGetByPCIBusId_v4010_params;
|
| 52 |
+
|
| 53 |
+
typedef struct cudaDeviceGetPCIBusId_v4010_params_st {
|
| 54 |
+
char *pciBusId;
|
| 55 |
+
int len;
|
| 56 |
+
int device;
|
| 57 |
+
} cudaDeviceGetPCIBusId_v4010_params;
|
| 58 |
+
|
| 59 |
+
typedef struct cudaIpcGetEventHandle_v4010_params_st {
|
| 60 |
+
cudaIpcEventHandle_t *handle;
|
| 61 |
+
cudaEvent_t event;
|
| 62 |
+
} cudaIpcGetEventHandle_v4010_params;
|
| 63 |
+
|
| 64 |
+
typedef struct cudaIpcOpenEventHandle_v4010_params_st {
|
| 65 |
+
cudaEvent_t *event;
|
| 66 |
+
cudaIpcEventHandle_t handle;
|
| 67 |
+
} cudaIpcOpenEventHandle_v4010_params;
|
| 68 |
+
|
| 69 |
+
typedef struct cudaIpcGetMemHandle_v4010_params_st {
|
| 70 |
+
cudaIpcMemHandle_t *handle;
|
| 71 |
+
void *devPtr;
|
| 72 |
+
} cudaIpcGetMemHandle_v4010_params;
|
| 73 |
+
|
| 74 |
+
typedef struct cudaIpcOpenMemHandle_v4010_params_st {
|
| 75 |
+
void **devPtr;
|
| 76 |
+
cudaIpcMemHandle_t handle;
|
| 77 |
+
unsigned int flags;
|
| 78 |
+
} cudaIpcOpenMemHandle_v4010_params;
|
| 79 |
+
|
| 80 |
+
typedef struct cudaIpcCloseMemHandle_v4010_params_st {
|
| 81 |
+
void *devPtr;
|
| 82 |
+
} cudaIpcCloseMemHandle_v4010_params;
|
| 83 |
+
|
| 84 |
+
typedef struct cudaDeviceFlushGPUDirectRDMAWrites_v11030_params_st {
|
| 85 |
+
enum cudaFlushGPUDirectRDMAWritesTarget target;
|
| 86 |
+
enum cudaFlushGPUDirectRDMAWritesScope scope;
|
| 87 |
+
} cudaDeviceFlushGPUDirectRDMAWrites_v11030_params;
|
| 88 |
+
|
| 89 |
+
typedef struct cudaGetErrorName_v6050_params_st {
|
| 90 |
+
cudaError_t error;
|
| 91 |
+
} cudaGetErrorName_v6050_params;
|
| 92 |
+
|
| 93 |
+
typedef struct cudaGetErrorString_v3020_params_st {
|
| 94 |
+
cudaError_t error;
|
| 95 |
+
} cudaGetErrorString_v3020_params;
|
| 96 |
+
|
| 97 |
+
typedef struct cudaGetDeviceCount_v3020_params_st {
|
| 98 |
+
int *count;
|
| 99 |
+
} cudaGetDeviceCount_v3020_params;
|
| 100 |
+
|
| 101 |
+
typedef struct cudaGetDeviceProperties_v3020_params_st {
|
| 102 |
+
struct cudaDeviceProp *prop;
|
| 103 |
+
int device;
|
| 104 |
+
} cudaGetDeviceProperties_v3020_params;
|
| 105 |
+
|
| 106 |
+
typedef struct cudaDeviceGetAttribute_v5000_params_st {
|
| 107 |
+
int *value;
|
| 108 |
+
enum cudaDeviceAttr attr;
|
| 109 |
+
int device;
|
| 110 |
+
} cudaDeviceGetAttribute_v5000_params;
|
| 111 |
+
|
| 112 |
+
typedef struct cudaDeviceGetDefaultMemPool_v11020_params_st {
|
| 113 |
+
cudaMemPool_t *memPool;
|
| 114 |
+
int device;
|
| 115 |
+
} cudaDeviceGetDefaultMemPool_v11020_params;
|
| 116 |
+
|
| 117 |
+
typedef struct cudaDeviceSetMemPool_v11020_params_st {
|
| 118 |
+
int device;
|
| 119 |
+
cudaMemPool_t memPool;
|
| 120 |
+
} cudaDeviceSetMemPool_v11020_params;
|
| 121 |
+
|
| 122 |
+
typedef struct cudaDeviceGetMemPool_v11020_params_st {
|
| 123 |
+
cudaMemPool_t *memPool;
|
| 124 |
+
int device;
|
| 125 |
+
} cudaDeviceGetMemPool_v11020_params;
|
| 126 |
+
|
| 127 |
+
typedef struct cudaDeviceGetNvSciSyncAttributes_v10020_params_st {
|
| 128 |
+
void *nvSciSyncAttrList;
|
| 129 |
+
int device;
|
| 130 |
+
int flags;
|
| 131 |
+
} cudaDeviceGetNvSciSyncAttributes_v10020_params;
|
| 132 |
+
|
| 133 |
+
typedef struct cudaDeviceGetP2PAttribute_v8000_params_st {
|
| 134 |
+
int *value;
|
| 135 |
+
enum cudaDeviceP2PAttr attr;
|
| 136 |
+
int srcDevice;
|
| 137 |
+
int dstDevice;
|
| 138 |
+
} cudaDeviceGetP2PAttribute_v8000_params;
|
| 139 |
+
|
| 140 |
+
typedef struct cudaChooseDevice_v3020_params_st {
|
| 141 |
+
int *device;
|
| 142 |
+
const struct cudaDeviceProp *prop;
|
| 143 |
+
} cudaChooseDevice_v3020_params;
|
| 144 |
+
|
| 145 |
+
typedef struct cudaSetDevice_v3020_params_st {
|
| 146 |
+
int device;
|
| 147 |
+
} cudaSetDevice_v3020_params;
|
| 148 |
+
|
| 149 |
+
typedef struct cudaGetDevice_v3020_params_st {
|
| 150 |
+
int *device;
|
| 151 |
+
} cudaGetDevice_v3020_params;
|
| 152 |
+
|
| 153 |
+
typedef struct cudaSetValidDevices_v3020_params_st {
|
| 154 |
+
int *device_arr;
|
| 155 |
+
int len;
|
| 156 |
+
} cudaSetValidDevices_v3020_params;
|
| 157 |
+
|
| 158 |
+
typedef struct cudaSetDeviceFlags_v3020_params_st {
|
| 159 |
+
unsigned int flags;
|
| 160 |
+
} cudaSetDeviceFlags_v3020_params;
|
| 161 |
+
|
| 162 |
+
typedef struct cudaGetDeviceFlags_v7000_params_st {
|
| 163 |
+
unsigned int *flags;
|
| 164 |
+
} cudaGetDeviceFlags_v7000_params;
|
| 165 |
+
|
| 166 |
+
typedef struct cudaStreamCreate_v3020_params_st {
|
| 167 |
+
cudaStream_t *pStream;
|
| 168 |
+
} cudaStreamCreate_v3020_params;
|
| 169 |
+
|
| 170 |
+
typedef struct cudaStreamCreateWithFlags_v5000_params_st {
|
| 171 |
+
cudaStream_t *pStream;
|
| 172 |
+
unsigned int flags;
|
| 173 |
+
} cudaStreamCreateWithFlags_v5000_params;
|
| 174 |
+
|
| 175 |
+
typedef struct cudaStreamCreateWithPriority_v5050_params_st {
|
| 176 |
+
cudaStream_t *pStream;
|
| 177 |
+
unsigned int flags;
|
| 178 |
+
int priority;
|
| 179 |
+
} cudaStreamCreateWithPriority_v5050_params;
|
| 180 |
+
|
| 181 |
+
typedef struct cudaStreamGetPriority_ptsz_v7000_params_st {
|
| 182 |
+
cudaStream_t hStream;
|
| 183 |
+
int *priority;
|
| 184 |
+
} cudaStreamGetPriority_ptsz_v7000_params;
|
| 185 |
+
|
| 186 |
+
typedef struct cudaStreamGetFlags_ptsz_v7000_params_st {
|
| 187 |
+
cudaStream_t hStream;
|
| 188 |
+
unsigned int *flags;
|
| 189 |
+
} cudaStreamGetFlags_ptsz_v7000_params;
|
| 190 |
+
|
| 191 |
+
typedef struct cudaStreamCopyAttributes_ptsz_v11000_params_st {
|
| 192 |
+
cudaStream_t dst;
|
| 193 |
+
cudaStream_t src;
|
| 194 |
+
} cudaStreamCopyAttributes_ptsz_v11000_params;
|
| 195 |
+
|
| 196 |
+
typedef struct cudaStreamGetAttribute_ptsz_v11000_params_st {
|
| 197 |
+
cudaStream_t hStream;
|
| 198 |
+
cudaStreamAttrID attr;
|
| 199 |
+
cudaStreamAttrValue *value_out;
|
| 200 |
+
} cudaStreamGetAttribute_ptsz_v11000_params;
|
| 201 |
+
|
| 202 |
+
typedef struct cudaStreamSetAttribute_ptsz_v11000_params_st {
|
| 203 |
+
cudaStream_t hStream;
|
| 204 |
+
cudaStreamAttrID attr;
|
| 205 |
+
const cudaStreamAttrValue *value;
|
| 206 |
+
} cudaStreamSetAttribute_ptsz_v11000_params;
|
| 207 |
+
|
| 208 |
+
typedef struct cudaStreamDestroy_v5050_params_st {
|
| 209 |
+
cudaStream_t stream;
|
| 210 |
+
} cudaStreamDestroy_v5050_params;
|
| 211 |
+
|
| 212 |
+
typedef struct cudaStreamWaitEvent_ptsz_v7000_params_st {
|
| 213 |
+
cudaStream_t stream;
|
| 214 |
+
cudaEvent_t event;
|
| 215 |
+
unsigned int flags;
|
| 216 |
+
} cudaStreamWaitEvent_ptsz_v7000_params;
|
| 217 |
+
|
| 218 |
+
typedef struct cudaStreamAddCallback_ptsz_v7000_params_st {
|
| 219 |
+
cudaStream_t stream;
|
| 220 |
+
cudaStreamCallback_t callback;
|
| 221 |
+
void *userData;
|
| 222 |
+
unsigned int flags;
|
| 223 |
+
} cudaStreamAddCallback_ptsz_v7000_params;
|
| 224 |
+
|
| 225 |
+
typedef struct cudaStreamSynchronize_ptsz_v7000_params_st {
|
| 226 |
+
cudaStream_t stream;
|
| 227 |
+
} cudaStreamSynchronize_ptsz_v7000_params;
|
| 228 |
+
|
| 229 |
+
typedef struct cudaStreamQuery_ptsz_v7000_params_st {
|
| 230 |
+
cudaStream_t stream;
|
| 231 |
+
} cudaStreamQuery_ptsz_v7000_params;
|
| 232 |
+
|
| 233 |
+
typedef struct cudaStreamAttachMemAsync_ptsz_v7000_params_st {
|
| 234 |
+
cudaStream_t stream;
|
| 235 |
+
void *devPtr;
|
| 236 |
+
size_t length;
|
| 237 |
+
unsigned int flags;
|
| 238 |
+
} cudaStreamAttachMemAsync_ptsz_v7000_params;
|
| 239 |
+
|
| 240 |
+
typedef struct cudaStreamBeginCapture_ptsz_v10000_params_st {
|
| 241 |
+
cudaStream_t stream;
|
| 242 |
+
enum cudaStreamCaptureMode mode;
|
| 243 |
+
} cudaStreamBeginCapture_ptsz_v10000_params;
|
| 244 |
+
|
| 245 |
+
typedef struct cudaThreadExchangeStreamCaptureMode_v10010_params_st {
|
| 246 |
+
enum cudaStreamCaptureMode *mode;
|
| 247 |
+
} cudaThreadExchangeStreamCaptureMode_v10010_params;
|
| 248 |
+
|
| 249 |
+
typedef struct cudaStreamEndCapture_ptsz_v10000_params_st {
|
| 250 |
+
cudaStream_t stream;
|
| 251 |
+
cudaGraph_t *pGraph;
|
| 252 |
+
} cudaStreamEndCapture_ptsz_v10000_params;
|
| 253 |
+
|
| 254 |
+
typedef struct cudaStreamIsCapturing_ptsz_v10000_params_st {
|
| 255 |
+
cudaStream_t stream;
|
| 256 |
+
enum cudaStreamCaptureStatus *pCaptureStatus;
|
| 257 |
+
} cudaStreamIsCapturing_ptsz_v10000_params;
|
| 258 |
+
|
| 259 |
+
typedef struct cudaStreamGetCaptureInfo_ptsz_v10010_params_st {
|
| 260 |
+
cudaStream_t stream;
|
| 261 |
+
enum cudaStreamCaptureStatus *pCaptureStatus;
|
| 262 |
+
unsigned long long *pId;
|
| 263 |
+
} cudaStreamGetCaptureInfo_ptsz_v10010_params;
|
| 264 |
+
|
| 265 |
+
typedef struct cudaStreamGetCaptureInfo_v2_ptsz_v11030_params_st {
|
| 266 |
+
cudaStream_t stream;
|
| 267 |
+
enum cudaStreamCaptureStatus *captureStatus_out;
|
| 268 |
+
unsigned long long *id_out;
|
| 269 |
+
cudaGraph_t *graph_out;
|
| 270 |
+
const cudaGraphNode_t **dependencies_out;
|
| 271 |
+
size_t *numDependencies_out;
|
| 272 |
+
} cudaStreamGetCaptureInfo_v2_ptsz_v11030_params;
|
| 273 |
+
|
| 274 |
+
typedef struct cudaStreamUpdateCaptureDependencies_v11030_params_st {
|
| 275 |
+
cudaStream_t stream;
|
| 276 |
+
cudaGraphNode_t *dependencies;
|
| 277 |
+
size_t numDependencies;
|
| 278 |
+
unsigned int flags;
|
| 279 |
+
} cudaStreamUpdateCaptureDependencies_v11030_params;
|
| 280 |
+
|
| 281 |
+
typedef struct cudaEventCreate_v3020_params_st {
|
| 282 |
+
cudaEvent_t *event;
|
| 283 |
+
} cudaEventCreate_v3020_params;
|
| 284 |
+
|
| 285 |
+
typedef struct cudaEventCreateWithFlags_v3020_params_st {
|
| 286 |
+
cudaEvent_t *event;
|
| 287 |
+
unsigned int flags;
|
| 288 |
+
} cudaEventCreateWithFlags_v3020_params;
|
| 289 |
+
|
| 290 |
+
typedef struct cudaEventRecord_ptsz_v7000_params_st {
|
| 291 |
+
cudaEvent_t event;
|
| 292 |
+
cudaStream_t stream;
|
| 293 |
+
} cudaEventRecord_ptsz_v7000_params;
|
| 294 |
+
|
| 295 |
+
typedef struct cudaEventRecordWithFlags_ptsz_v11010_params_st {
|
| 296 |
+
cudaEvent_t event;
|
| 297 |
+
cudaStream_t stream;
|
| 298 |
+
unsigned int flags;
|
| 299 |
+
} cudaEventRecordWithFlags_ptsz_v11010_params;
|
| 300 |
+
|
| 301 |
+
typedef struct cudaEventQuery_v3020_params_st {
|
| 302 |
+
cudaEvent_t event;
|
| 303 |
+
} cudaEventQuery_v3020_params;
|
| 304 |
+
|
| 305 |
+
typedef struct cudaEventSynchronize_v3020_params_st {
|
| 306 |
+
cudaEvent_t event;
|
| 307 |
+
} cudaEventSynchronize_v3020_params;
|
| 308 |
+
|
| 309 |
+
typedef struct cudaEventDestroy_v3020_params_st {
|
| 310 |
+
cudaEvent_t event;
|
| 311 |
+
} cudaEventDestroy_v3020_params;
|
| 312 |
+
|
| 313 |
+
typedef struct cudaEventElapsedTime_v3020_params_st {
|
| 314 |
+
float *ms;
|
| 315 |
+
cudaEvent_t start;
|
| 316 |
+
cudaEvent_t end;
|
| 317 |
+
} cudaEventElapsedTime_v3020_params;
|
| 318 |
+
|
| 319 |
+
typedef struct cudaImportExternalMemory_v10000_params_st {
|
| 320 |
+
cudaExternalMemory_t *extMem_out;
|
| 321 |
+
const struct cudaExternalMemoryHandleDesc *memHandleDesc;
|
| 322 |
+
} cudaImportExternalMemory_v10000_params;
|
| 323 |
+
|
| 324 |
+
typedef struct cudaExternalMemoryGetMappedBuffer_v10000_params_st {
|
| 325 |
+
void **devPtr;
|
| 326 |
+
cudaExternalMemory_t extMem;
|
| 327 |
+
const struct cudaExternalMemoryBufferDesc *bufferDesc;
|
| 328 |
+
} cudaExternalMemoryGetMappedBuffer_v10000_params;
|
| 329 |
+
|
| 330 |
+
typedef struct cudaExternalMemoryGetMappedMipmappedArray_v10000_params_st {
|
| 331 |
+
cudaMipmappedArray_t *mipmap;
|
| 332 |
+
cudaExternalMemory_t extMem;
|
| 333 |
+
const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc;
|
| 334 |
+
} cudaExternalMemoryGetMappedMipmappedArray_v10000_params;
|
| 335 |
+
|
| 336 |
+
typedef struct cudaDestroyExternalMemory_v10000_params_st {
|
| 337 |
+
cudaExternalMemory_t extMem;
|
| 338 |
+
} cudaDestroyExternalMemory_v10000_params;
|
| 339 |
+
|
| 340 |
+
typedef struct cudaImportExternalSemaphore_v10000_params_st {
|
| 341 |
+
cudaExternalSemaphore_t *extSem_out;
|
| 342 |
+
const struct cudaExternalSemaphoreHandleDesc *semHandleDesc;
|
| 343 |
+
} cudaImportExternalSemaphore_v10000_params;
|
| 344 |
+
|
| 345 |
+
typedef struct cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
|
| 346 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 347 |
+
const struct cudaExternalSemaphoreSignalParams *paramsArray;
|
| 348 |
+
unsigned int numExtSems;
|
| 349 |
+
cudaStream_t stream;
|
| 350 |
+
} cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params;
|
| 351 |
+
|
| 352 |
+
typedef struct cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
|
| 353 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 354 |
+
const struct cudaExternalSemaphoreWaitParams *paramsArray;
|
| 355 |
+
unsigned int numExtSems;
|
| 356 |
+
cudaStream_t stream;
|
| 357 |
+
} cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params;
|
| 358 |
+
|
| 359 |
+
typedef struct cudaDestroyExternalSemaphore_v10000_params_st {
|
| 360 |
+
cudaExternalSemaphore_t extSem;
|
| 361 |
+
} cudaDestroyExternalSemaphore_v10000_params;
|
| 362 |
+
|
| 363 |
+
typedef struct cudaLaunchKernel_ptsz_v7000_params_st {
|
| 364 |
+
const void *func;
|
| 365 |
+
dim3 gridDim;
|
| 366 |
+
dim3 blockDim;
|
| 367 |
+
void **args;
|
| 368 |
+
size_t sharedMem;
|
| 369 |
+
cudaStream_t stream;
|
| 370 |
+
} cudaLaunchKernel_ptsz_v7000_params;
|
| 371 |
+
|
| 372 |
+
typedef struct cudaLaunchKernelExC_ptsz_v11060_params_st {
|
| 373 |
+
const cudaLaunchConfig_t *config;
|
| 374 |
+
const void *func;
|
| 375 |
+
void **args;
|
| 376 |
+
} cudaLaunchKernelExC_ptsz_v11060_params;
|
| 377 |
+
|
| 378 |
+
typedef struct cudaLaunchCooperativeKernel_ptsz_v9000_params_st {
|
| 379 |
+
const void *func;
|
| 380 |
+
dim3 gridDim;
|
| 381 |
+
dim3 blockDim;
|
| 382 |
+
void **args;
|
| 383 |
+
size_t sharedMem;
|
| 384 |
+
cudaStream_t stream;
|
| 385 |
+
} cudaLaunchCooperativeKernel_ptsz_v9000_params;
|
| 386 |
+
|
| 387 |
+
typedef struct cudaLaunchCooperativeKernelMultiDevice_v9000_params_st {
|
| 388 |
+
struct cudaLaunchParams *launchParamsList;
|
| 389 |
+
unsigned int numDevices;
|
| 390 |
+
unsigned int flags;
|
| 391 |
+
} cudaLaunchCooperativeKernelMultiDevice_v9000_params;
|
| 392 |
+
|
| 393 |
+
typedef struct cudaFuncSetCacheConfig_v3020_params_st {
|
| 394 |
+
const void *func;
|
| 395 |
+
enum cudaFuncCache cacheConfig;
|
| 396 |
+
} cudaFuncSetCacheConfig_v3020_params;
|
| 397 |
+
|
| 398 |
+
typedef struct cudaFuncSetSharedMemConfig_v4020_params_st {
|
| 399 |
+
const void *func;
|
| 400 |
+
enum cudaSharedMemConfig config;
|
| 401 |
+
} cudaFuncSetSharedMemConfig_v4020_params;
|
| 402 |
+
|
| 403 |
+
typedef struct cudaFuncGetAttributes_v3020_params_st {
|
| 404 |
+
struct cudaFuncAttributes *attr;
|
| 405 |
+
const void *func;
|
| 406 |
+
} cudaFuncGetAttributes_v3020_params;
|
| 407 |
+
|
| 408 |
+
typedef struct cudaFuncSetAttribute_v9000_params_st {
|
| 409 |
+
const void *func;
|
| 410 |
+
enum cudaFuncAttribute attr;
|
| 411 |
+
int value;
|
| 412 |
+
} cudaFuncSetAttribute_v9000_params;
|
| 413 |
+
|
| 414 |
+
typedef struct cudaLaunchHostFunc_ptsz_v10000_params_st {
|
| 415 |
+
cudaStream_t stream;
|
| 416 |
+
cudaHostFn_t fn;
|
| 417 |
+
void *userData;
|
| 418 |
+
} cudaLaunchHostFunc_ptsz_v10000_params;
|
| 419 |
+
|
| 420 |
+
typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params_st {
|
| 421 |
+
int *numBlocks;
|
| 422 |
+
const void *func;
|
| 423 |
+
int blockSize;
|
| 424 |
+
size_t dynamicSMemSize;
|
| 425 |
+
} cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params;
|
| 426 |
+
|
| 427 |
+
typedef struct cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params_st {
|
| 428 |
+
size_t *dynamicSmemSize;
|
| 429 |
+
const void *func;
|
| 430 |
+
int numBlocks;
|
| 431 |
+
int blockSize;
|
| 432 |
+
} cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params;
|
| 433 |
+
|
| 434 |
+
typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params_st {
|
| 435 |
+
int *numBlocks;
|
| 436 |
+
const void *func;
|
| 437 |
+
int blockSize;
|
| 438 |
+
size_t dynamicSMemSize;
|
| 439 |
+
unsigned int flags;
|
| 440 |
+
} cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params;
|
| 441 |
+
|
| 442 |
+
typedef struct cudaOccupancyMaxPotentialClusterSize_v11070_params_st {
|
| 443 |
+
int *clusterSize;
|
| 444 |
+
const void *func;
|
| 445 |
+
const cudaLaunchConfig_t *launchConfig;
|
| 446 |
+
} cudaOccupancyMaxPotentialClusterSize_v11070_params;
|
| 447 |
+
|
| 448 |
+
typedef struct cudaOccupancyMaxActiveClusters_v11070_params_st {
|
| 449 |
+
int *numClusters;
|
| 450 |
+
const void *func;
|
| 451 |
+
const cudaLaunchConfig_t *launchConfig;
|
| 452 |
+
} cudaOccupancyMaxActiveClusters_v11070_params;
|
| 453 |
+
|
| 454 |
+
typedef struct cudaMallocManaged_v6000_params_st {
|
| 455 |
+
void **devPtr;
|
| 456 |
+
size_t size;
|
| 457 |
+
unsigned int flags;
|
| 458 |
+
} cudaMallocManaged_v6000_params;
|
| 459 |
+
|
| 460 |
+
typedef struct cudaMalloc_v3020_params_st {
|
| 461 |
+
void **devPtr;
|
| 462 |
+
size_t size;
|
| 463 |
+
} cudaMalloc_v3020_params;
|
| 464 |
+
|
| 465 |
+
typedef struct cudaMallocHost_v3020_params_st {
|
| 466 |
+
void **ptr;
|
| 467 |
+
size_t size;
|
| 468 |
+
} cudaMallocHost_v3020_params;
|
| 469 |
+
|
| 470 |
+
typedef struct cudaMallocPitch_v3020_params_st {
|
| 471 |
+
void **devPtr;
|
| 472 |
+
size_t *pitch;
|
| 473 |
+
size_t width;
|
| 474 |
+
size_t height;
|
| 475 |
+
} cudaMallocPitch_v3020_params;
|
| 476 |
+
|
| 477 |
+
typedef struct cudaMallocArray_v3020_params_st {
|
| 478 |
+
cudaArray_t *array;
|
| 479 |
+
const struct cudaChannelFormatDesc *desc;
|
| 480 |
+
size_t width;
|
| 481 |
+
size_t height;
|
| 482 |
+
unsigned int flags;
|
| 483 |
+
} cudaMallocArray_v3020_params;
|
| 484 |
+
|
| 485 |
+
typedef struct cudaFree_v3020_params_st {
|
| 486 |
+
void *devPtr;
|
| 487 |
+
} cudaFree_v3020_params;
|
| 488 |
+
|
| 489 |
+
typedef struct cudaFreeHost_v3020_params_st {
|
| 490 |
+
void *ptr;
|
| 491 |
+
} cudaFreeHost_v3020_params;
|
| 492 |
+
|
| 493 |
+
typedef struct cudaFreeArray_v3020_params_st {
|
| 494 |
+
cudaArray_t array;
|
| 495 |
+
} cudaFreeArray_v3020_params;
|
| 496 |
+
|
| 497 |
+
typedef struct cudaFreeMipmappedArray_v5000_params_st {
|
| 498 |
+
cudaMipmappedArray_t mipmappedArray;
|
| 499 |
+
} cudaFreeMipmappedArray_v5000_params;
|
| 500 |
+
|
| 501 |
+
typedef struct cudaHostAlloc_v3020_params_st {
|
| 502 |
+
void **pHost;
|
| 503 |
+
size_t size;
|
| 504 |
+
unsigned int flags;
|
| 505 |
+
} cudaHostAlloc_v3020_params;
|
| 506 |
+
|
| 507 |
+
typedef struct cudaHostRegister_v4000_params_st {
|
| 508 |
+
void *ptr;
|
| 509 |
+
size_t size;
|
| 510 |
+
unsigned int flags;
|
| 511 |
+
} cudaHostRegister_v4000_params;
|
| 512 |
+
|
| 513 |
+
typedef struct cudaHostUnregister_v4000_params_st {
|
| 514 |
+
void *ptr;
|
| 515 |
+
} cudaHostUnregister_v4000_params;
|
| 516 |
+
|
| 517 |
+
typedef struct cudaHostGetDevicePointer_v3020_params_st {
|
| 518 |
+
void **pDevice;
|
| 519 |
+
void *pHost;
|
| 520 |
+
unsigned int flags;
|
| 521 |
+
} cudaHostGetDevicePointer_v3020_params;
|
| 522 |
+
|
| 523 |
+
typedef struct cudaHostGetFlags_v3020_params_st {
|
| 524 |
+
unsigned int *pFlags;
|
| 525 |
+
void *pHost;
|
| 526 |
+
} cudaHostGetFlags_v3020_params;
|
| 527 |
+
|
| 528 |
+
typedef struct cudaMalloc3D_v3020_params_st {
|
| 529 |
+
struct cudaPitchedPtr *pitchedDevPtr;
|
| 530 |
+
struct cudaExtent extent;
|
| 531 |
+
} cudaMalloc3D_v3020_params;
|
| 532 |
+
|
| 533 |
+
typedef struct cudaMalloc3DArray_v3020_params_st {
|
| 534 |
+
cudaArray_t *array;
|
| 535 |
+
const struct cudaChannelFormatDesc *desc;
|
| 536 |
+
struct cudaExtent extent;
|
| 537 |
+
unsigned int flags;
|
| 538 |
+
} cudaMalloc3DArray_v3020_params;
|
| 539 |
+
|
| 540 |
+
typedef struct cudaMallocMipmappedArray_v5000_params_st {
|
| 541 |
+
cudaMipmappedArray_t *mipmappedArray;
|
| 542 |
+
const struct cudaChannelFormatDesc *desc;
|
| 543 |
+
struct cudaExtent extent;
|
| 544 |
+
unsigned int numLevels;
|
| 545 |
+
unsigned int flags;
|
| 546 |
+
} cudaMallocMipmappedArray_v5000_params;
|
| 547 |
+
|
| 548 |
+
typedef struct cudaGetMipmappedArrayLevel_v5000_params_st {
|
| 549 |
+
cudaArray_t *levelArray;
|
| 550 |
+
cudaMipmappedArray_const_t mipmappedArray;
|
| 551 |
+
unsigned int level;
|
| 552 |
+
} cudaGetMipmappedArrayLevel_v5000_params;
|
| 553 |
+
|
| 554 |
+
typedef struct cudaMemcpy3D_ptds_v7000_params_st {
|
| 555 |
+
const struct cudaMemcpy3DParms *p;
|
| 556 |
+
} cudaMemcpy3D_ptds_v7000_params;
|
| 557 |
+
|
| 558 |
+
typedef struct cudaMemcpy3DPeer_ptds_v7000_params_st {
|
| 559 |
+
const struct cudaMemcpy3DPeerParms *p;
|
| 560 |
+
} cudaMemcpy3DPeer_ptds_v7000_params;
|
| 561 |
+
|
| 562 |
+
typedef struct cudaMemcpy3DAsync_ptsz_v7000_params_st {
|
| 563 |
+
const struct cudaMemcpy3DParms *p;
|
| 564 |
+
cudaStream_t stream;
|
| 565 |
+
} cudaMemcpy3DAsync_ptsz_v7000_params;
|
| 566 |
+
|
| 567 |
+
typedef struct cudaMemcpy3DPeerAsync_ptsz_v7000_params_st {
|
| 568 |
+
const struct cudaMemcpy3DPeerParms *p;
|
| 569 |
+
cudaStream_t stream;
|
| 570 |
+
} cudaMemcpy3DPeerAsync_ptsz_v7000_params;
|
| 571 |
+
|
| 572 |
+
typedef struct cudaMemGetInfo_v3020_params_st {
|
| 573 |
+
size_t *free;
|
| 574 |
+
size_t *total;
|
| 575 |
+
} cudaMemGetInfo_v3020_params;
|
| 576 |
+
|
| 577 |
+
typedef struct cudaArrayGetInfo_v4010_params_st {
|
| 578 |
+
struct cudaChannelFormatDesc *desc;
|
| 579 |
+
struct cudaExtent *extent;
|
| 580 |
+
unsigned int *flags;
|
| 581 |
+
cudaArray_t array;
|
| 582 |
+
} cudaArrayGetInfo_v4010_params;
|
| 583 |
+
|
| 584 |
+
typedef struct cudaArrayGetPlane_v11020_params_st {
|
| 585 |
+
cudaArray_t *pPlaneArray;
|
| 586 |
+
cudaArray_t hArray;
|
| 587 |
+
unsigned int planeIdx;
|
| 588 |
+
} cudaArrayGetPlane_v11020_params;
|
| 589 |
+
|
| 590 |
+
typedef struct cudaArrayGetMemoryRequirements_v11060_params_st {
|
| 591 |
+
struct cudaArrayMemoryRequirements *memoryRequirements;
|
| 592 |
+
cudaArray_t array;
|
| 593 |
+
int device;
|
| 594 |
+
} cudaArrayGetMemoryRequirements_v11060_params;
|
| 595 |
+
|
| 596 |
+
typedef struct cudaMipmappedArrayGetMemoryRequirements_v11060_params_st {
|
| 597 |
+
struct cudaArrayMemoryRequirements *memoryRequirements;
|
| 598 |
+
cudaMipmappedArray_t mipmap;
|
| 599 |
+
int device;
|
| 600 |
+
} cudaMipmappedArrayGetMemoryRequirements_v11060_params;
|
| 601 |
+
|
| 602 |
+
typedef struct cudaArrayGetSparseProperties_v11010_params_st {
|
| 603 |
+
struct cudaArraySparseProperties *sparseProperties;
|
| 604 |
+
cudaArray_t array;
|
| 605 |
+
} cudaArrayGetSparseProperties_v11010_params;
|
| 606 |
+
|
| 607 |
+
typedef struct cudaMipmappedArrayGetSparseProperties_v11010_params_st {
|
| 608 |
+
struct cudaArraySparseProperties *sparseProperties;
|
| 609 |
+
cudaMipmappedArray_t mipmap;
|
| 610 |
+
} cudaMipmappedArrayGetSparseProperties_v11010_params;
|
| 611 |
+
|
| 612 |
+
typedef struct cudaMemcpy_ptds_v7000_params_st {
|
| 613 |
+
void *dst;
|
| 614 |
+
const void *src;
|
| 615 |
+
size_t count;
|
| 616 |
+
enum cudaMemcpyKind kind;
|
| 617 |
+
} cudaMemcpy_ptds_v7000_params;
|
| 618 |
+
|
| 619 |
+
typedef struct cudaMemcpyPeer_v4000_params_st {
|
| 620 |
+
void *dst;
|
| 621 |
+
int dstDevice;
|
| 622 |
+
const void *src;
|
| 623 |
+
int srcDevice;
|
| 624 |
+
size_t count;
|
| 625 |
+
} cudaMemcpyPeer_v4000_params;
|
| 626 |
+
|
| 627 |
+
typedef struct cudaMemcpy2D_ptds_v7000_params_st {
|
| 628 |
+
void *dst;
|
| 629 |
+
size_t dpitch;
|
| 630 |
+
const void *src;
|
| 631 |
+
size_t spitch;
|
| 632 |
+
size_t width;
|
| 633 |
+
size_t height;
|
| 634 |
+
enum cudaMemcpyKind kind;
|
| 635 |
+
} cudaMemcpy2D_ptds_v7000_params;
|
| 636 |
+
|
| 637 |
+
typedef struct cudaMemcpy2DToArray_ptds_v7000_params_st {
|
| 638 |
+
cudaArray_t dst;
|
| 639 |
+
size_t wOffset;
|
| 640 |
+
size_t hOffset;
|
| 641 |
+
const void *src;
|
| 642 |
+
size_t spitch;
|
| 643 |
+
size_t width;
|
| 644 |
+
size_t height;
|
| 645 |
+
enum cudaMemcpyKind kind;
|
| 646 |
+
} cudaMemcpy2DToArray_ptds_v7000_params;
|
| 647 |
+
|
| 648 |
+
typedef struct cudaMemcpy2DFromArray_ptds_v7000_params_st {
|
| 649 |
+
void *dst;
|
| 650 |
+
size_t dpitch;
|
| 651 |
+
cudaArray_const_t src;
|
| 652 |
+
size_t wOffset;
|
| 653 |
+
size_t hOffset;
|
| 654 |
+
size_t width;
|
| 655 |
+
size_t height;
|
| 656 |
+
enum cudaMemcpyKind kind;
|
| 657 |
+
} cudaMemcpy2DFromArray_ptds_v7000_params;
|
| 658 |
+
|
| 659 |
+
typedef struct cudaMemcpy2DArrayToArray_ptds_v7000_params_st {
|
| 660 |
+
cudaArray_t dst;
|
| 661 |
+
size_t wOffsetDst;
|
| 662 |
+
size_t hOffsetDst;
|
| 663 |
+
cudaArray_const_t src;
|
| 664 |
+
size_t wOffsetSrc;
|
| 665 |
+
size_t hOffsetSrc;
|
| 666 |
+
size_t width;
|
| 667 |
+
size_t height;
|
| 668 |
+
enum cudaMemcpyKind kind;
|
| 669 |
+
} cudaMemcpy2DArrayToArray_ptds_v7000_params;
|
| 670 |
+
|
| 671 |
+
typedef struct cudaMemcpyToSymbol_ptds_v7000_params_st {
|
| 672 |
+
const void *symbol;
|
| 673 |
+
const void *src;
|
| 674 |
+
size_t count;
|
| 675 |
+
size_t offset;
|
| 676 |
+
enum cudaMemcpyKind kind;
|
| 677 |
+
} cudaMemcpyToSymbol_ptds_v7000_params;
|
| 678 |
+
|
| 679 |
+
typedef struct cudaMemcpyFromSymbol_ptds_v7000_params_st {
|
| 680 |
+
void *dst;
|
| 681 |
+
const void *symbol;
|
| 682 |
+
size_t count;
|
| 683 |
+
size_t offset;
|
| 684 |
+
enum cudaMemcpyKind kind;
|
| 685 |
+
} cudaMemcpyFromSymbol_ptds_v7000_params;
|
| 686 |
+
|
| 687 |
+
typedef struct cudaMemcpyAsync_ptsz_v7000_params_st {
|
| 688 |
+
void *dst;
|
| 689 |
+
const void *src;
|
| 690 |
+
size_t count;
|
| 691 |
+
enum cudaMemcpyKind kind;
|
| 692 |
+
cudaStream_t stream;
|
| 693 |
+
} cudaMemcpyAsync_ptsz_v7000_params;
|
| 694 |
+
|
| 695 |
+
typedef struct cudaMemcpyPeerAsync_v4000_params_st {
|
| 696 |
+
void *dst;
|
| 697 |
+
int dstDevice;
|
| 698 |
+
const void *src;
|
| 699 |
+
int srcDevice;
|
| 700 |
+
size_t count;
|
| 701 |
+
cudaStream_t stream;
|
| 702 |
+
} cudaMemcpyPeerAsync_v4000_params;
|
| 703 |
+
|
| 704 |
+
typedef struct cudaMemcpy2DAsync_ptsz_v7000_params_st {
|
| 705 |
+
void *dst;
|
| 706 |
+
size_t dpitch;
|
| 707 |
+
const void *src;
|
| 708 |
+
size_t spitch;
|
| 709 |
+
size_t width;
|
| 710 |
+
size_t height;
|
| 711 |
+
enum cudaMemcpyKind kind;
|
| 712 |
+
cudaStream_t stream;
|
| 713 |
+
} cudaMemcpy2DAsync_ptsz_v7000_params;
|
| 714 |
+
|
| 715 |
+
typedef struct cudaMemcpy2DToArrayAsync_ptsz_v7000_params_st {
|
| 716 |
+
cudaArray_t dst;
|
| 717 |
+
size_t wOffset;
|
| 718 |
+
size_t hOffset;
|
| 719 |
+
const void *src;
|
| 720 |
+
size_t spitch;
|
| 721 |
+
size_t width;
|
| 722 |
+
size_t height;
|
| 723 |
+
enum cudaMemcpyKind kind;
|
| 724 |
+
cudaStream_t stream;
|
| 725 |
+
} cudaMemcpy2DToArrayAsync_ptsz_v7000_params;
|
| 726 |
+
|
| 727 |
+
typedef struct cudaMemcpy2DFromArrayAsync_ptsz_v7000_params_st {
|
| 728 |
+
void *dst;
|
| 729 |
+
size_t dpitch;
|
| 730 |
+
cudaArray_const_t src;
|
| 731 |
+
size_t wOffset;
|
| 732 |
+
size_t hOffset;
|
| 733 |
+
size_t width;
|
| 734 |
+
size_t height;
|
| 735 |
+
enum cudaMemcpyKind kind;
|
| 736 |
+
cudaStream_t stream;
|
| 737 |
+
} cudaMemcpy2DFromArrayAsync_ptsz_v7000_params;
|
| 738 |
+
|
| 739 |
+
typedef struct cudaMemcpyToSymbolAsync_ptsz_v7000_params_st {
|
| 740 |
+
const void *symbol;
|
| 741 |
+
const void *src;
|
| 742 |
+
size_t count;
|
| 743 |
+
size_t offset;
|
| 744 |
+
enum cudaMemcpyKind kind;
|
| 745 |
+
cudaStream_t stream;
|
| 746 |
+
} cudaMemcpyToSymbolAsync_ptsz_v7000_params;
|
| 747 |
+
|
| 748 |
+
typedef struct cudaMemcpyFromSymbolAsync_ptsz_v7000_params_st {
|
| 749 |
+
void *dst;
|
| 750 |
+
const void *symbol;
|
| 751 |
+
size_t count;
|
| 752 |
+
size_t offset;
|
| 753 |
+
enum cudaMemcpyKind kind;
|
| 754 |
+
cudaStream_t stream;
|
| 755 |
+
} cudaMemcpyFromSymbolAsync_ptsz_v7000_params;
|
| 756 |
+
|
| 757 |
+
typedef struct cudaMemset_ptds_v7000_params_st {
|
| 758 |
+
void *devPtr;
|
| 759 |
+
int value;
|
| 760 |
+
size_t count;
|
| 761 |
+
} cudaMemset_ptds_v7000_params;
|
| 762 |
+
|
| 763 |
+
typedef struct cudaMemset2D_ptds_v7000_params_st {
|
| 764 |
+
void *devPtr;
|
| 765 |
+
size_t pitch;
|
| 766 |
+
int value;
|
| 767 |
+
size_t width;
|
| 768 |
+
size_t height;
|
| 769 |
+
} cudaMemset2D_ptds_v7000_params;
|
| 770 |
+
|
| 771 |
+
typedef struct cudaMemset3D_ptds_v7000_params_st {
|
| 772 |
+
struct cudaPitchedPtr pitchedDevPtr;
|
| 773 |
+
int value;
|
| 774 |
+
struct cudaExtent extent;
|
| 775 |
+
} cudaMemset3D_ptds_v7000_params;
|
| 776 |
+
|
| 777 |
+
typedef struct cudaMemsetAsync_ptsz_v7000_params_st {
|
| 778 |
+
void *devPtr;
|
| 779 |
+
int value;
|
| 780 |
+
size_t count;
|
| 781 |
+
cudaStream_t stream;
|
| 782 |
+
} cudaMemsetAsync_ptsz_v7000_params;
|
| 783 |
+
|
| 784 |
+
typedef struct cudaMemset2DAsync_ptsz_v7000_params_st {
|
| 785 |
+
void *devPtr;
|
| 786 |
+
size_t pitch;
|
| 787 |
+
int value;
|
| 788 |
+
size_t width;
|
| 789 |
+
size_t height;
|
| 790 |
+
cudaStream_t stream;
|
| 791 |
+
} cudaMemset2DAsync_ptsz_v7000_params;
|
| 792 |
+
|
| 793 |
+
typedef struct cudaMemset3DAsync_ptsz_v7000_params_st {
|
| 794 |
+
struct cudaPitchedPtr pitchedDevPtr;
|
| 795 |
+
int value;
|
| 796 |
+
struct cudaExtent extent;
|
| 797 |
+
cudaStream_t stream;
|
| 798 |
+
} cudaMemset3DAsync_ptsz_v7000_params;
|
| 799 |
+
|
| 800 |
+
typedef struct cudaGetSymbolAddress_v3020_params_st {
|
| 801 |
+
void **devPtr;
|
| 802 |
+
const void *symbol;
|
| 803 |
+
} cudaGetSymbolAddress_v3020_params;
|
| 804 |
+
|
| 805 |
+
typedef struct cudaGetSymbolSize_v3020_params_st {
|
| 806 |
+
size_t *size;
|
| 807 |
+
const void *symbol;
|
| 808 |
+
} cudaGetSymbolSize_v3020_params;
|
| 809 |
+
|
| 810 |
+
typedef struct cudaMemPrefetchAsync_ptsz_v8000_params_st {
|
| 811 |
+
const void *devPtr;
|
| 812 |
+
size_t count;
|
| 813 |
+
int dstDevice;
|
| 814 |
+
cudaStream_t stream;
|
| 815 |
+
} cudaMemPrefetchAsync_ptsz_v8000_params;
|
| 816 |
+
|
| 817 |
+
typedef struct cudaMemAdvise_v8000_params_st {
|
| 818 |
+
const void *devPtr;
|
| 819 |
+
size_t count;
|
| 820 |
+
enum cudaMemoryAdvise advice;
|
| 821 |
+
int device;
|
| 822 |
+
} cudaMemAdvise_v8000_params;
|
| 823 |
+
|
| 824 |
+
typedef struct cudaMemRangeGetAttribute_v8000_params_st {
|
| 825 |
+
void *data;
|
| 826 |
+
size_t dataSize;
|
| 827 |
+
enum cudaMemRangeAttribute attribute;
|
| 828 |
+
const void *devPtr;
|
| 829 |
+
size_t count;
|
| 830 |
+
} cudaMemRangeGetAttribute_v8000_params;
|
| 831 |
+
|
| 832 |
+
typedef struct cudaMemRangeGetAttributes_v8000_params_st {
|
| 833 |
+
void **data;
|
| 834 |
+
size_t *dataSizes;
|
| 835 |
+
enum cudaMemRangeAttribute *attributes;
|
| 836 |
+
size_t numAttributes;
|
| 837 |
+
const void *devPtr;
|
| 838 |
+
size_t count;
|
| 839 |
+
} cudaMemRangeGetAttributes_v8000_params;
|
| 840 |
+
|
| 841 |
+
typedef struct cudaMemcpyToArray_ptds_v7000_params_st {
|
| 842 |
+
cudaArray_t dst;
|
| 843 |
+
size_t wOffset;
|
| 844 |
+
size_t hOffset;
|
| 845 |
+
const void *src;
|
| 846 |
+
size_t count;
|
| 847 |
+
enum cudaMemcpyKind kind;
|
| 848 |
+
} cudaMemcpyToArray_ptds_v7000_params;
|
| 849 |
+
|
| 850 |
+
typedef struct cudaMemcpyFromArray_ptds_v7000_params_st {
|
| 851 |
+
void *dst;
|
| 852 |
+
cudaArray_const_t src;
|
| 853 |
+
size_t wOffset;
|
| 854 |
+
size_t hOffset;
|
| 855 |
+
size_t count;
|
| 856 |
+
enum cudaMemcpyKind kind;
|
| 857 |
+
} cudaMemcpyFromArray_ptds_v7000_params;
|
| 858 |
+
|
| 859 |
+
typedef struct cudaMemcpyArrayToArray_ptds_v7000_params_st {
|
| 860 |
+
cudaArray_t dst;
|
| 861 |
+
size_t wOffsetDst;
|
| 862 |
+
size_t hOffsetDst;
|
| 863 |
+
cudaArray_const_t src;
|
| 864 |
+
size_t wOffsetSrc;
|
| 865 |
+
size_t hOffsetSrc;
|
| 866 |
+
size_t count;
|
| 867 |
+
enum cudaMemcpyKind kind;
|
| 868 |
+
} cudaMemcpyArrayToArray_ptds_v7000_params;
|
| 869 |
+
|
| 870 |
+
typedef struct cudaMemcpyToArrayAsync_ptsz_v7000_params_st {
|
| 871 |
+
cudaArray_t dst;
|
| 872 |
+
size_t wOffset;
|
| 873 |
+
size_t hOffset;
|
| 874 |
+
const void *src;
|
| 875 |
+
size_t count;
|
| 876 |
+
enum cudaMemcpyKind kind;
|
| 877 |
+
cudaStream_t stream;
|
| 878 |
+
} cudaMemcpyToArrayAsync_ptsz_v7000_params;
|
| 879 |
+
|
| 880 |
+
typedef struct cudaMemcpyFromArrayAsync_ptsz_v7000_params_st {
|
| 881 |
+
void *dst;
|
| 882 |
+
cudaArray_const_t src;
|
| 883 |
+
size_t wOffset;
|
| 884 |
+
size_t hOffset;
|
| 885 |
+
size_t count;
|
| 886 |
+
enum cudaMemcpyKind kind;
|
| 887 |
+
cudaStream_t stream;
|
| 888 |
+
} cudaMemcpyFromArrayAsync_ptsz_v7000_params;
|
| 889 |
+
|
| 890 |
+
typedef struct cudaMallocAsync_ptsz_v11020_params_st {
|
| 891 |
+
void **devPtr;
|
| 892 |
+
size_t size;
|
| 893 |
+
cudaStream_t hStream;
|
| 894 |
+
} cudaMallocAsync_ptsz_v11020_params;
|
| 895 |
+
|
| 896 |
+
typedef struct cudaFreeAsync_ptsz_v11020_params_st {
|
| 897 |
+
void *devPtr;
|
| 898 |
+
cudaStream_t hStream;
|
| 899 |
+
} cudaFreeAsync_ptsz_v11020_params;
|
| 900 |
+
|
| 901 |
+
typedef struct cudaMemPoolTrimTo_v11020_params_st {
|
| 902 |
+
cudaMemPool_t memPool;
|
| 903 |
+
size_t minBytesToKeep;
|
| 904 |
+
} cudaMemPoolTrimTo_v11020_params;
|
| 905 |
+
|
| 906 |
+
typedef struct cudaMemPoolSetAttribute_v11020_params_st {
|
| 907 |
+
cudaMemPool_t memPool;
|
| 908 |
+
enum cudaMemPoolAttr attr;
|
| 909 |
+
void *value;
|
| 910 |
+
} cudaMemPoolSetAttribute_v11020_params;
|
| 911 |
+
|
| 912 |
+
typedef struct cudaMemPoolGetAttribute_v11020_params_st {
|
| 913 |
+
cudaMemPool_t memPool;
|
| 914 |
+
enum cudaMemPoolAttr attr;
|
| 915 |
+
void *value;
|
| 916 |
+
} cudaMemPoolGetAttribute_v11020_params;
|
| 917 |
+
|
| 918 |
+
typedef struct cudaMemPoolSetAccess_v11020_params_st {
|
| 919 |
+
cudaMemPool_t memPool;
|
| 920 |
+
const struct cudaMemAccessDesc *descList;
|
| 921 |
+
size_t count;
|
| 922 |
+
} cudaMemPoolSetAccess_v11020_params;
|
| 923 |
+
|
| 924 |
+
typedef struct cudaMemPoolGetAccess_v11020_params_st {
|
| 925 |
+
enum cudaMemAccessFlags *flags;
|
| 926 |
+
cudaMemPool_t memPool;
|
| 927 |
+
struct cudaMemLocation *location;
|
| 928 |
+
} cudaMemPoolGetAccess_v11020_params;
|
| 929 |
+
|
| 930 |
+
typedef struct cudaMemPoolCreate_v11020_params_st {
|
| 931 |
+
cudaMemPool_t *memPool;
|
| 932 |
+
const struct cudaMemPoolProps *poolProps;
|
| 933 |
+
} cudaMemPoolCreate_v11020_params;
|
| 934 |
+
|
| 935 |
+
typedef struct cudaMemPoolDestroy_v11020_params_st {
|
| 936 |
+
cudaMemPool_t memPool;
|
| 937 |
+
} cudaMemPoolDestroy_v11020_params;
|
| 938 |
+
|
| 939 |
+
typedef struct cudaMallocFromPoolAsync_ptsz_v11020_params_st {
|
| 940 |
+
void **ptr;
|
| 941 |
+
size_t size;
|
| 942 |
+
cudaMemPool_t memPool;
|
| 943 |
+
cudaStream_t stream;
|
| 944 |
+
} cudaMallocFromPoolAsync_ptsz_v11020_params;
|
| 945 |
+
|
| 946 |
+
typedef struct cudaMemPoolExportToShareableHandle_v11020_params_st {
|
| 947 |
+
void *shareableHandle;
|
| 948 |
+
cudaMemPool_t memPool;
|
| 949 |
+
enum cudaMemAllocationHandleType handleType;
|
| 950 |
+
unsigned int flags;
|
| 951 |
+
} cudaMemPoolExportToShareableHandle_v11020_params;
|
| 952 |
+
|
| 953 |
+
typedef struct cudaMemPoolImportFromShareableHandle_v11020_params_st {
|
| 954 |
+
cudaMemPool_t *memPool;
|
| 955 |
+
void *shareableHandle;
|
| 956 |
+
enum cudaMemAllocationHandleType handleType;
|
| 957 |
+
unsigned int flags;
|
| 958 |
+
} cudaMemPoolImportFromShareableHandle_v11020_params;
|
| 959 |
+
|
| 960 |
+
typedef struct cudaMemPoolExportPointer_v11020_params_st {
|
| 961 |
+
struct cudaMemPoolPtrExportData *exportData;
|
| 962 |
+
void *ptr;
|
| 963 |
+
} cudaMemPoolExportPointer_v11020_params;
|
| 964 |
+
|
| 965 |
+
typedef struct cudaMemPoolImportPointer_v11020_params_st {
|
| 966 |
+
void **ptr;
|
| 967 |
+
cudaMemPool_t memPool;
|
| 968 |
+
struct cudaMemPoolPtrExportData *exportData;
|
| 969 |
+
} cudaMemPoolImportPointer_v11020_params;
|
| 970 |
+
|
| 971 |
+
typedef struct cudaPointerGetAttributes_v4000_params_st {
|
| 972 |
+
struct cudaPointerAttributes *attributes;
|
| 973 |
+
const void *ptr;
|
| 974 |
+
} cudaPointerGetAttributes_v4000_params;
|
| 975 |
+
|
| 976 |
+
typedef struct cudaDeviceCanAccessPeer_v4000_params_st {
|
| 977 |
+
int *canAccessPeer;
|
| 978 |
+
int device;
|
| 979 |
+
int peerDevice;
|
| 980 |
+
} cudaDeviceCanAccessPeer_v4000_params;
|
| 981 |
+
|
| 982 |
+
typedef struct cudaDeviceEnablePeerAccess_v4000_params_st {
|
| 983 |
+
int peerDevice;
|
| 984 |
+
unsigned int flags;
|
| 985 |
+
} cudaDeviceEnablePeerAccess_v4000_params;
|
| 986 |
+
|
| 987 |
+
typedef struct cudaDeviceDisablePeerAccess_v4000_params_st {
|
| 988 |
+
int peerDevice;
|
| 989 |
+
} cudaDeviceDisablePeerAccess_v4000_params;
|
| 990 |
+
|
| 991 |
+
typedef struct cudaGraphicsUnregisterResource_v3020_params_st {
|
| 992 |
+
cudaGraphicsResource_t resource;
|
| 993 |
+
} cudaGraphicsUnregisterResource_v3020_params;
|
| 994 |
+
|
| 995 |
+
typedef struct cudaGraphicsResourceSetMapFlags_v3020_params_st {
|
| 996 |
+
cudaGraphicsResource_t resource;
|
| 997 |
+
unsigned int flags;
|
| 998 |
+
} cudaGraphicsResourceSetMapFlags_v3020_params;
|
| 999 |
+
|
| 1000 |
+
typedef struct cudaGraphicsMapResources_v3020_params_st {
|
| 1001 |
+
int count;
|
| 1002 |
+
cudaGraphicsResource_t *resources;
|
| 1003 |
+
cudaStream_t stream;
|
| 1004 |
+
} cudaGraphicsMapResources_v3020_params;
|
| 1005 |
+
|
| 1006 |
+
typedef struct cudaGraphicsUnmapResources_v3020_params_st {
|
| 1007 |
+
int count;
|
| 1008 |
+
cudaGraphicsResource_t *resources;
|
| 1009 |
+
cudaStream_t stream;
|
| 1010 |
+
} cudaGraphicsUnmapResources_v3020_params;
|
| 1011 |
+
|
| 1012 |
+
typedef struct cudaGraphicsResourceGetMappedPointer_v3020_params_st {
|
| 1013 |
+
void **devPtr;
|
| 1014 |
+
size_t *size;
|
| 1015 |
+
cudaGraphicsResource_t resource;
|
| 1016 |
+
} cudaGraphicsResourceGetMappedPointer_v3020_params;
|
| 1017 |
+
|
| 1018 |
+
typedef struct cudaGraphicsSubResourceGetMappedArray_v3020_params_st {
|
| 1019 |
+
cudaArray_t *array;
|
| 1020 |
+
cudaGraphicsResource_t resource;
|
| 1021 |
+
unsigned int arrayIndex;
|
| 1022 |
+
unsigned int mipLevel;
|
| 1023 |
+
} cudaGraphicsSubResourceGetMappedArray_v3020_params;
|
| 1024 |
+
|
| 1025 |
+
typedef struct cudaGraphicsResourceGetMappedMipmappedArray_v5000_params_st {
|
| 1026 |
+
cudaMipmappedArray_t *mipmappedArray;
|
| 1027 |
+
cudaGraphicsResource_t resource;
|
| 1028 |
+
} cudaGraphicsResourceGetMappedMipmappedArray_v5000_params;
|
| 1029 |
+
|
| 1030 |
+
typedef struct cudaBindTexture_v3020_params_st {
|
| 1031 |
+
size_t *offset;
|
| 1032 |
+
const struct textureReference *texref;
|
| 1033 |
+
const void *devPtr;
|
| 1034 |
+
const struct cudaChannelFormatDesc *desc;
|
| 1035 |
+
size_t size;
|
| 1036 |
+
} cudaBindTexture_v3020_params;
|
| 1037 |
+
|
| 1038 |
+
typedef struct cudaBindTexture2D_v3020_params_st {
|
| 1039 |
+
size_t *offset;
|
| 1040 |
+
const struct textureReference *texref;
|
| 1041 |
+
const void *devPtr;
|
| 1042 |
+
const struct cudaChannelFormatDesc *desc;
|
| 1043 |
+
size_t width;
|
| 1044 |
+
size_t height;
|
| 1045 |
+
size_t pitch;
|
| 1046 |
+
} cudaBindTexture2D_v3020_params;
|
| 1047 |
+
|
| 1048 |
+
typedef struct cudaBindTextureToArray_v3020_params_st {
|
| 1049 |
+
const struct textureReference *texref;
|
| 1050 |
+
cudaArray_const_t array;
|
| 1051 |
+
const struct cudaChannelFormatDesc *desc;
|
| 1052 |
+
} cudaBindTextureToArray_v3020_params;
|
| 1053 |
+
|
| 1054 |
+
typedef struct cudaBindTextureToMipmappedArray_v5000_params_st {
|
| 1055 |
+
const struct textureReference *texref;
|
| 1056 |
+
cudaMipmappedArray_const_t mipmappedArray;
|
| 1057 |
+
const struct cudaChannelFormatDesc *desc;
|
| 1058 |
+
} cudaBindTextureToMipmappedArray_v5000_params;
|
| 1059 |
+
|
| 1060 |
+
typedef struct cudaUnbindTexture_v3020_params_st {
|
| 1061 |
+
const struct textureReference *texref;
|
| 1062 |
+
} cudaUnbindTexture_v3020_params;
|
| 1063 |
+
|
| 1064 |
+
typedef struct cudaGetTextureAlignmentOffset_v3020_params_st {
|
| 1065 |
+
size_t *offset;
|
| 1066 |
+
const struct textureReference *texref;
|
| 1067 |
+
} cudaGetTextureAlignmentOffset_v3020_params;
|
| 1068 |
+
|
| 1069 |
+
typedef struct cudaGetTextureReference_v3020_params_st {
|
| 1070 |
+
const struct textureReference **texref;
|
| 1071 |
+
const void *symbol;
|
| 1072 |
+
} cudaGetTextureReference_v3020_params;
|
| 1073 |
+
|
| 1074 |
+
typedef struct cudaBindSurfaceToArray_v3020_params_st {
|
| 1075 |
+
const struct surfaceReference *surfref;
|
| 1076 |
+
cudaArray_const_t array;
|
| 1077 |
+
const struct cudaChannelFormatDesc *desc;
|
| 1078 |
+
} cudaBindSurfaceToArray_v3020_params;
|
| 1079 |
+
|
| 1080 |
+
typedef struct cudaGetSurfaceReference_v3020_params_st {
|
| 1081 |
+
const struct surfaceReference **surfref;
|
| 1082 |
+
const void *symbol;
|
| 1083 |
+
} cudaGetSurfaceReference_v3020_params;
|
| 1084 |
+
|
| 1085 |
+
typedef struct cudaGetChannelDesc_v3020_params_st {
|
| 1086 |
+
struct cudaChannelFormatDesc *desc;
|
| 1087 |
+
cudaArray_const_t array;
|
| 1088 |
+
} cudaGetChannelDesc_v3020_params;
|
| 1089 |
+
|
| 1090 |
+
typedef struct cudaCreateChannelDesc_v3020_params_st {
|
| 1091 |
+
int x;
|
| 1092 |
+
int y;
|
| 1093 |
+
int z;
|
| 1094 |
+
int w;
|
| 1095 |
+
enum cudaChannelFormatKind f;
|
| 1096 |
+
} cudaCreateChannelDesc_v3020_params;
|
| 1097 |
+
|
| 1098 |
+
typedef struct cudaCreateTextureObject_v5000_params_st {
|
| 1099 |
+
cudaTextureObject_t *pTexObject;
|
| 1100 |
+
const struct cudaResourceDesc *pResDesc;
|
| 1101 |
+
const struct cudaTextureDesc *pTexDesc;
|
| 1102 |
+
const struct cudaResourceViewDesc *pResViewDesc;
|
| 1103 |
+
} cudaCreateTextureObject_v5000_params;
|
| 1104 |
+
|
| 1105 |
+
typedef struct cudaDestroyTextureObject_v5000_params_st {
|
| 1106 |
+
cudaTextureObject_t texObject;
|
| 1107 |
+
} cudaDestroyTextureObject_v5000_params;
|
| 1108 |
+
|
| 1109 |
+
typedef struct cudaGetTextureObjectResourceDesc_v5000_params_st {
|
| 1110 |
+
struct cudaResourceDesc *pResDesc;
|
| 1111 |
+
cudaTextureObject_t texObject;
|
| 1112 |
+
} cudaGetTextureObjectResourceDesc_v5000_params;
|
| 1113 |
+
|
| 1114 |
+
typedef struct cudaGetTextureObjectTextureDesc_v5000_params_st {
|
| 1115 |
+
struct cudaTextureDesc *pTexDesc;
|
| 1116 |
+
cudaTextureObject_t texObject;
|
| 1117 |
+
} cudaGetTextureObjectTextureDesc_v5000_params;
|
| 1118 |
+
|
| 1119 |
+
typedef struct cudaGetTextureObjectResourceViewDesc_v5000_params_st {
|
| 1120 |
+
struct cudaResourceViewDesc *pResViewDesc;
|
| 1121 |
+
cudaTextureObject_t texObject;
|
| 1122 |
+
} cudaGetTextureObjectResourceViewDesc_v5000_params;
|
| 1123 |
+
|
| 1124 |
+
typedef struct cudaCreateSurfaceObject_v5000_params_st {
|
| 1125 |
+
cudaSurfaceObject_t *pSurfObject;
|
| 1126 |
+
const struct cudaResourceDesc *pResDesc;
|
| 1127 |
+
} cudaCreateSurfaceObject_v5000_params;
|
| 1128 |
+
|
| 1129 |
+
typedef struct cudaDestroySurfaceObject_v5000_params_st {
|
| 1130 |
+
cudaSurfaceObject_t surfObject;
|
| 1131 |
+
} cudaDestroySurfaceObject_v5000_params;
|
| 1132 |
+
|
| 1133 |
+
typedef struct cudaGetSurfaceObjectResourceDesc_v5000_params_st {
|
| 1134 |
+
struct cudaResourceDesc *pResDesc;
|
| 1135 |
+
cudaSurfaceObject_t surfObject;
|
| 1136 |
+
} cudaGetSurfaceObjectResourceDesc_v5000_params;
|
| 1137 |
+
|
| 1138 |
+
typedef struct cudaDriverGetVersion_v3020_params_st {
|
| 1139 |
+
int *driverVersion;
|
| 1140 |
+
} cudaDriverGetVersion_v3020_params;
|
| 1141 |
+
|
| 1142 |
+
typedef struct cudaRuntimeGetVersion_v3020_params_st {
|
| 1143 |
+
int *runtimeVersion;
|
| 1144 |
+
} cudaRuntimeGetVersion_v3020_params;
|
| 1145 |
+
|
| 1146 |
+
typedef struct cudaGraphCreate_v10000_params_st {
|
| 1147 |
+
cudaGraph_t *pGraph;
|
| 1148 |
+
unsigned int flags;
|
| 1149 |
+
} cudaGraphCreate_v10000_params;
|
| 1150 |
+
|
| 1151 |
+
typedef struct cudaGraphAddKernelNode_v10000_params_st {
|
| 1152 |
+
cudaGraphNode_t *pGraphNode;
|
| 1153 |
+
cudaGraph_t graph;
|
| 1154 |
+
const cudaGraphNode_t *pDependencies;
|
| 1155 |
+
size_t numDependencies;
|
| 1156 |
+
const struct cudaKernelNodeParams *pNodeParams;
|
| 1157 |
+
} cudaGraphAddKernelNode_v10000_params;
|
| 1158 |
+
|
| 1159 |
+
typedef struct cudaGraphKernelNodeGetParams_v10000_params_st {
|
| 1160 |
+
cudaGraphNode_t node;
|
| 1161 |
+
struct cudaKernelNodeParams *pNodeParams;
|
| 1162 |
+
} cudaGraphKernelNodeGetParams_v10000_params;
|
| 1163 |
+
|
| 1164 |
+
typedef struct cudaGraphKernelNodeSetParams_v10000_params_st {
|
| 1165 |
+
cudaGraphNode_t node;
|
| 1166 |
+
const struct cudaKernelNodeParams *pNodeParams;
|
| 1167 |
+
} cudaGraphKernelNodeSetParams_v10000_params;
|
| 1168 |
+
|
| 1169 |
+
typedef struct cudaGraphKernelNodeCopyAttributes_v11000_params_st {
|
| 1170 |
+
cudaGraphNode_t hSrc;
|
| 1171 |
+
cudaGraphNode_t hDst;
|
| 1172 |
+
} cudaGraphKernelNodeCopyAttributes_v11000_params;
|
| 1173 |
+
|
| 1174 |
+
typedef struct cudaGraphKernelNodeGetAttribute_v11000_params_st {
|
| 1175 |
+
cudaGraphNode_t hNode;
|
| 1176 |
+
cudaKernelNodeAttrID attr;
|
| 1177 |
+
cudaKernelNodeAttrValue *value_out;
|
| 1178 |
+
} cudaGraphKernelNodeGetAttribute_v11000_params;
|
| 1179 |
+
|
| 1180 |
+
typedef struct cudaGraphKernelNodeSetAttribute_v11000_params_st {
|
| 1181 |
+
cudaGraphNode_t hNode;
|
| 1182 |
+
cudaKernelNodeAttrID attr;
|
| 1183 |
+
const cudaKernelNodeAttrValue *value;
|
| 1184 |
+
} cudaGraphKernelNodeSetAttribute_v11000_params;
|
| 1185 |
+
|
| 1186 |
+
typedef struct cudaGraphAddMemcpyNode_v10000_params_st {
|
| 1187 |
+
cudaGraphNode_t *pGraphNode;
|
| 1188 |
+
cudaGraph_t graph;
|
| 1189 |
+
const cudaGraphNode_t *pDependencies;
|
| 1190 |
+
size_t numDependencies;
|
| 1191 |
+
const struct cudaMemcpy3DParms *pCopyParams;
|
| 1192 |
+
} cudaGraphAddMemcpyNode_v10000_params;
|
| 1193 |
+
|
| 1194 |
+
typedef struct cudaGraphAddMemcpyNodeToSymbol_v11010_params_st {
|
| 1195 |
+
cudaGraphNode_t *pGraphNode;
|
| 1196 |
+
cudaGraph_t graph;
|
| 1197 |
+
const cudaGraphNode_t *pDependencies;
|
| 1198 |
+
size_t numDependencies;
|
| 1199 |
+
const void *symbol;
|
| 1200 |
+
const void *src;
|
| 1201 |
+
size_t count;
|
| 1202 |
+
size_t offset;
|
| 1203 |
+
enum cudaMemcpyKind kind;
|
| 1204 |
+
} cudaGraphAddMemcpyNodeToSymbol_v11010_params;
|
| 1205 |
+
|
| 1206 |
+
typedef struct cudaGraphAddMemcpyNodeFromSymbol_v11010_params_st {
|
| 1207 |
+
cudaGraphNode_t *pGraphNode;
|
| 1208 |
+
cudaGraph_t graph;
|
| 1209 |
+
const cudaGraphNode_t *pDependencies;
|
| 1210 |
+
size_t numDependencies;
|
| 1211 |
+
void *dst;
|
| 1212 |
+
const void *symbol;
|
| 1213 |
+
size_t count;
|
| 1214 |
+
size_t offset;
|
| 1215 |
+
enum cudaMemcpyKind kind;
|
| 1216 |
+
} cudaGraphAddMemcpyNodeFromSymbol_v11010_params;
|
| 1217 |
+
|
| 1218 |
+
typedef struct cudaGraphAddMemcpyNode1D_v11010_params_st {
|
| 1219 |
+
cudaGraphNode_t *pGraphNode;
|
| 1220 |
+
cudaGraph_t graph;
|
| 1221 |
+
const cudaGraphNode_t *pDependencies;
|
| 1222 |
+
size_t numDependencies;
|
| 1223 |
+
void *dst;
|
| 1224 |
+
const void *src;
|
| 1225 |
+
size_t count;
|
| 1226 |
+
enum cudaMemcpyKind kind;
|
| 1227 |
+
} cudaGraphAddMemcpyNode1D_v11010_params;
|
| 1228 |
+
|
| 1229 |
+
typedef struct cudaGraphMemcpyNodeGetParams_v10000_params_st {
|
| 1230 |
+
cudaGraphNode_t node;
|
| 1231 |
+
struct cudaMemcpy3DParms *pNodeParams;
|
| 1232 |
+
} cudaGraphMemcpyNodeGetParams_v10000_params;
|
| 1233 |
+
|
| 1234 |
+
typedef struct cudaGraphMemcpyNodeSetParams_v10000_params_st {
|
| 1235 |
+
cudaGraphNode_t node;
|
| 1236 |
+
const struct cudaMemcpy3DParms *pNodeParams;
|
| 1237 |
+
} cudaGraphMemcpyNodeSetParams_v10000_params;
|
| 1238 |
+
|
| 1239 |
+
typedef struct cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params_st {
|
| 1240 |
+
cudaGraphNode_t node;
|
| 1241 |
+
const void *symbol;
|
| 1242 |
+
const void *src;
|
| 1243 |
+
size_t count;
|
| 1244 |
+
size_t offset;
|
| 1245 |
+
enum cudaMemcpyKind kind;
|
| 1246 |
+
} cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params;
|
| 1247 |
+
|
| 1248 |
+
typedef struct cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params_st {
|
| 1249 |
+
cudaGraphNode_t node;
|
| 1250 |
+
void *dst;
|
| 1251 |
+
const void *symbol;
|
| 1252 |
+
size_t count;
|
| 1253 |
+
size_t offset;
|
| 1254 |
+
enum cudaMemcpyKind kind;
|
| 1255 |
+
} cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params;
|
| 1256 |
+
|
| 1257 |
+
typedef struct cudaGraphMemcpyNodeSetParams1D_v11010_params_st {
|
| 1258 |
+
cudaGraphNode_t node;
|
| 1259 |
+
void *dst;
|
| 1260 |
+
const void *src;
|
| 1261 |
+
size_t count;
|
| 1262 |
+
enum cudaMemcpyKind kind;
|
| 1263 |
+
} cudaGraphMemcpyNodeSetParams1D_v11010_params;
|
| 1264 |
+
|
| 1265 |
+
typedef struct cudaGraphAddMemsetNode_v10000_params_st {
|
| 1266 |
+
cudaGraphNode_t *pGraphNode;
|
| 1267 |
+
cudaGraph_t graph;
|
| 1268 |
+
const cudaGraphNode_t *pDependencies;
|
| 1269 |
+
size_t numDependencies;
|
| 1270 |
+
const struct cudaMemsetParams *pMemsetParams;
|
| 1271 |
+
} cudaGraphAddMemsetNode_v10000_params;
|
| 1272 |
+
|
| 1273 |
+
typedef struct cudaGraphMemsetNodeGetParams_v10000_params_st {
|
| 1274 |
+
cudaGraphNode_t node;
|
| 1275 |
+
struct cudaMemsetParams *pNodeParams;
|
| 1276 |
+
} cudaGraphMemsetNodeGetParams_v10000_params;
|
| 1277 |
+
|
| 1278 |
+
typedef struct cudaGraphMemsetNodeSetParams_v10000_params_st {
|
| 1279 |
+
cudaGraphNode_t node;
|
| 1280 |
+
const struct cudaMemsetParams *pNodeParams;
|
| 1281 |
+
} cudaGraphMemsetNodeSetParams_v10000_params;
|
| 1282 |
+
|
| 1283 |
+
typedef struct cudaGraphAddHostNode_v10000_params_st {
|
| 1284 |
+
cudaGraphNode_t *pGraphNode;
|
| 1285 |
+
cudaGraph_t graph;
|
| 1286 |
+
const cudaGraphNode_t *pDependencies;
|
| 1287 |
+
size_t numDependencies;
|
| 1288 |
+
const struct cudaHostNodeParams *pNodeParams;
|
| 1289 |
+
} cudaGraphAddHostNode_v10000_params;
|
| 1290 |
+
|
| 1291 |
+
typedef struct cudaGraphHostNodeGetParams_v10000_params_st {
|
| 1292 |
+
cudaGraphNode_t node;
|
| 1293 |
+
struct cudaHostNodeParams *pNodeParams;
|
| 1294 |
+
} cudaGraphHostNodeGetParams_v10000_params;
|
| 1295 |
+
|
| 1296 |
+
typedef struct cudaGraphHostNodeSetParams_v10000_params_st {
|
| 1297 |
+
cudaGraphNode_t node;
|
| 1298 |
+
const struct cudaHostNodeParams *pNodeParams;
|
| 1299 |
+
} cudaGraphHostNodeSetParams_v10000_params;
|
| 1300 |
+
|
| 1301 |
+
typedef struct cudaGraphAddChildGraphNode_v10000_params_st {
|
| 1302 |
+
cudaGraphNode_t *pGraphNode;
|
| 1303 |
+
cudaGraph_t graph;
|
| 1304 |
+
const cudaGraphNode_t *pDependencies;
|
| 1305 |
+
size_t numDependencies;
|
| 1306 |
+
cudaGraph_t childGraph;
|
| 1307 |
+
} cudaGraphAddChildGraphNode_v10000_params;
|
| 1308 |
+
|
| 1309 |
+
typedef struct cudaGraphChildGraphNodeGetGraph_v10000_params_st {
|
| 1310 |
+
cudaGraphNode_t node;
|
| 1311 |
+
cudaGraph_t *pGraph;
|
| 1312 |
+
} cudaGraphChildGraphNodeGetGraph_v10000_params;
|
| 1313 |
+
|
| 1314 |
+
typedef struct cudaGraphAddEmptyNode_v10000_params_st {
|
| 1315 |
+
cudaGraphNode_t *pGraphNode;
|
| 1316 |
+
cudaGraph_t graph;
|
| 1317 |
+
const cudaGraphNode_t *pDependencies;
|
| 1318 |
+
size_t numDependencies;
|
| 1319 |
+
} cudaGraphAddEmptyNode_v10000_params;
|
| 1320 |
+
|
| 1321 |
+
typedef struct cudaGraphAddEventRecordNode_v11010_params_st {
|
| 1322 |
+
cudaGraphNode_t *pGraphNode;
|
| 1323 |
+
cudaGraph_t graph;
|
| 1324 |
+
const cudaGraphNode_t *pDependencies;
|
| 1325 |
+
size_t numDependencies;
|
| 1326 |
+
cudaEvent_t event;
|
| 1327 |
+
} cudaGraphAddEventRecordNode_v11010_params;
|
| 1328 |
+
|
| 1329 |
+
typedef struct cudaGraphEventRecordNodeGetEvent_v11010_params_st {
|
| 1330 |
+
cudaGraphNode_t node;
|
| 1331 |
+
cudaEvent_t *event_out;
|
| 1332 |
+
} cudaGraphEventRecordNodeGetEvent_v11010_params;
|
| 1333 |
+
|
| 1334 |
+
typedef struct cudaGraphEventRecordNodeSetEvent_v11010_params_st {
|
| 1335 |
+
cudaGraphNode_t node;
|
| 1336 |
+
cudaEvent_t event;
|
| 1337 |
+
} cudaGraphEventRecordNodeSetEvent_v11010_params;
|
| 1338 |
+
|
| 1339 |
+
typedef struct cudaGraphAddEventWaitNode_v11010_params_st {
|
| 1340 |
+
cudaGraphNode_t *pGraphNode;
|
| 1341 |
+
cudaGraph_t graph;
|
| 1342 |
+
const cudaGraphNode_t *pDependencies;
|
| 1343 |
+
size_t numDependencies;
|
| 1344 |
+
cudaEvent_t event;
|
| 1345 |
+
} cudaGraphAddEventWaitNode_v11010_params;
|
| 1346 |
+
|
| 1347 |
+
typedef struct cudaGraphEventWaitNodeGetEvent_v11010_params_st {
|
| 1348 |
+
cudaGraphNode_t node;
|
| 1349 |
+
cudaEvent_t *event_out;
|
| 1350 |
+
} cudaGraphEventWaitNodeGetEvent_v11010_params;
|
| 1351 |
+
|
| 1352 |
+
typedef struct cudaGraphEventWaitNodeSetEvent_v11010_params_st {
|
| 1353 |
+
cudaGraphNode_t node;
|
| 1354 |
+
cudaEvent_t event;
|
| 1355 |
+
} cudaGraphEventWaitNodeSetEvent_v11010_params;
|
| 1356 |
+
|
| 1357 |
+
typedef struct cudaGraphAddExternalSemaphoresSignalNode_v11020_params_st {
|
| 1358 |
+
cudaGraphNode_t *pGraphNode;
|
| 1359 |
+
cudaGraph_t graph;
|
| 1360 |
+
const cudaGraphNode_t *pDependencies;
|
| 1361 |
+
size_t numDependencies;
|
| 1362 |
+
const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
|
| 1363 |
+
} cudaGraphAddExternalSemaphoresSignalNode_v11020_params;
|
| 1364 |
+
|
| 1365 |
+
typedef struct cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params_st {
|
| 1366 |
+
cudaGraphNode_t hNode;
|
| 1367 |
+
struct cudaExternalSemaphoreSignalNodeParams *params_out;
|
| 1368 |
+
} cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params;
|
| 1369 |
+
|
| 1370 |
+
typedef struct cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params_st {
|
| 1371 |
+
cudaGraphNode_t hNode;
|
| 1372 |
+
const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
|
| 1373 |
+
} cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params;
|
| 1374 |
+
|
| 1375 |
+
typedef struct cudaGraphAddExternalSemaphoresWaitNode_v11020_params_st {
|
| 1376 |
+
cudaGraphNode_t *pGraphNode;
|
| 1377 |
+
cudaGraph_t graph;
|
| 1378 |
+
const cudaGraphNode_t *pDependencies;
|
| 1379 |
+
size_t numDependencies;
|
| 1380 |
+
const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
|
| 1381 |
+
} cudaGraphAddExternalSemaphoresWaitNode_v11020_params;
|
| 1382 |
+
|
| 1383 |
+
typedef struct cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params_st {
|
| 1384 |
+
cudaGraphNode_t hNode;
|
| 1385 |
+
struct cudaExternalSemaphoreWaitNodeParams *params_out;
|
| 1386 |
+
} cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params;
|
| 1387 |
+
|
| 1388 |
+
typedef struct cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params_st {
|
| 1389 |
+
cudaGraphNode_t hNode;
|
| 1390 |
+
const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
|
| 1391 |
+
} cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params;
|
| 1392 |
+
|
| 1393 |
+
typedef struct cudaGraphAddMemAllocNode_v11040_params_st {
|
| 1394 |
+
cudaGraphNode_t *pGraphNode;
|
| 1395 |
+
cudaGraph_t graph;
|
| 1396 |
+
const cudaGraphNode_t *pDependencies;
|
| 1397 |
+
size_t numDependencies;
|
| 1398 |
+
struct cudaMemAllocNodeParams *nodeParams;
|
| 1399 |
+
} cudaGraphAddMemAllocNode_v11040_params;
|
| 1400 |
+
|
| 1401 |
+
typedef struct cudaGraphMemAllocNodeGetParams_v11040_params_st {
|
| 1402 |
+
cudaGraphNode_t node;
|
| 1403 |
+
struct cudaMemAllocNodeParams *params_out;
|
| 1404 |
+
} cudaGraphMemAllocNodeGetParams_v11040_params;
|
| 1405 |
+
|
| 1406 |
+
typedef struct cudaGraphAddMemFreeNode_v11040_params_st {
|
| 1407 |
+
cudaGraphNode_t *pGraphNode;
|
| 1408 |
+
cudaGraph_t graph;
|
| 1409 |
+
const cudaGraphNode_t *pDependencies;
|
| 1410 |
+
size_t numDependencies;
|
| 1411 |
+
void *dptr;
|
| 1412 |
+
} cudaGraphAddMemFreeNode_v11040_params;
|
| 1413 |
+
|
| 1414 |
+
typedef struct cudaGraphMemFreeNodeGetParams_v11040_params_st {
|
| 1415 |
+
cudaGraphNode_t node;
|
| 1416 |
+
void *dptr_out;
|
| 1417 |
+
} cudaGraphMemFreeNodeGetParams_v11040_params;
|
| 1418 |
+
|
| 1419 |
+
typedef struct cudaDeviceGraphMemTrim_v11040_params_st {
|
| 1420 |
+
int device;
|
| 1421 |
+
} cudaDeviceGraphMemTrim_v11040_params;
|
| 1422 |
+
|
| 1423 |
+
typedef struct cudaDeviceGetGraphMemAttribute_v11040_params_st {
|
| 1424 |
+
int device;
|
| 1425 |
+
enum cudaGraphMemAttributeType attr;
|
| 1426 |
+
void *value;
|
| 1427 |
+
} cudaDeviceGetGraphMemAttribute_v11040_params;
|
| 1428 |
+
|
| 1429 |
+
typedef struct cudaDeviceSetGraphMemAttribute_v11040_params_st {
|
| 1430 |
+
int device;
|
| 1431 |
+
enum cudaGraphMemAttributeType attr;
|
| 1432 |
+
void *value;
|
| 1433 |
+
} cudaDeviceSetGraphMemAttribute_v11040_params;
|
| 1434 |
+
|
| 1435 |
+
typedef struct cudaGraphClone_v10000_params_st {
|
| 1436 |
+
cudaGraph_t *pGraphClone;
|
| 1437 |
+
cudaGraph_t originalGraph;
|
| 1438 |
+
} cudaGraphClone_v10000_params;
|
| 1439 |
+
|
| 1440 |
+
typedef struct cudaGraphNodeFindInClone_v10000_params_st {
|
| 1441 |
+
cudaGraphNode_t *pNode;
|
| 1442 |
+
cudaGraphNode_t originalNode;
|
| 1443 |
+
cudaGraph_t clonedGraph;
|
| 1444 |
+
} cudaGraphNodeFindInClone_v10000_params;
|
| 1445 |
+
|
| 1446 |
+
typedef struct cudaGraphNodeGetType_v10000_params_st {
|
| 1447 |
+
cudaGraphNode_t node;
|
| 1448 |
+
enum cudaGraphNodeType *pType;
|
| 1449 |
+
} cudaGraphNodeGetType_v10000_params;
|
| 1450 |
+
|
| 1451 |
+
typedef struct cudaGraphGetNodes_v10000_params_st {
|
| 1452 |
+
cudaGraph_t graph;
|
| 1453 |
+
cudaGraphNode_t *nodes;
|
| 1454 |
+
size_t *numNodes;
|
| 1455 |
+
} cudaGraphGetNodes_v10000_params;
|
| 1456 |
+
|
| 1457 |
+
typedef struct cudaGraphGetRootNodes_v10000_params_st {
|
| 1458 |
+
cudaGraph_t graph;
|
| 1459 |
+
cudaGraphNode_t *pRootNodes;
|
| 1460 |
+
size_t *pNumRootNodes;
|
| 1461 |
+
} cudaGraphGetRootNodes_v10000_params;
|
| 1462 |
+
|
| 1463 |
+
typedef struct cudaGraphGetEdges_v10000_params_st {
|
| 1464 |
+
cudaGraph_t graph;
|
| 1465 |
+
cudaGraphNode_t *from;
|
| 1466 |
+
cudaGraphNode_t *to;
|
| 1467 |
+
size_t *numEdges;
|
| 1468 |
+
} cudaGraphGetEdges_v10000_params;
|
| 1469 |
+
|
| 1470 |
+
typedef struct cudaGraphNodeGetDependencies_v10000_params_st {
|
| 1471 |
+
cudaGraphNode_t node;
|
| 1472 |
+
cudaGraphNode_t *pDependencies;
|
| 1473 |
+
size_t *pNumDependencies;
|
| 1474 |
+
} cudaGraphNodeGetDependencies_v10000_params;
|
| 1475 |
+
|
| 1476 |
+
typedef struct cudaGraphNodeGetDependentNodes_v10000_params_st {
|
| 1477 |
+
cudaGraphNode_t node;
|
| 1478 |
+
cudaGraphNode_t *pDependentNodes;
|
| 1479 |
+
size_t *pNumDependentNodes;
|
| 1480 |
+
} cudaGraphNodeGetDependentNodes_v10000_params;
|
| 1481 |
+
|
| 1482 |
+
typedef struct cudaGraphAddDependencies_v10000_params_st {
|
| 1483 |
+
cudaGraph_t graph;
|
| 1484 |
+
const cudaGraphNode_t *from;
|
| 1485 |
+
const cudaGraphNode_t *to;
|
| 1486 |
+
size_t numDependencies;
|
| 1487 |
+
} cudaGraphAddDependencies_v10000_params;
|
| 1488 |
+
|
| 1489 |
+
typedef struct cudaGraphRemoveDependencies_v10000_params_st {
|
| 1490 |
+
cudaGraph_t graph;
|
| 1491 |
+
const cudaGraphNode_t *from;
|
| 1492 |
+
const cudaGraphNode_t *to;
|
| 1493 |
+
size_t numDependencies;
|
| 1494 |
+
} cudaGraphRemoveDependencies_v10000_params;
|
| 1495 |
+
|
| 1496 |
+
typedef struct cudaGraphDestroyNode_v10000_params_st {
|
| 1497 |
+
cudaGraphNode_t node;
|
| 1498 |
+
} cudaGraphDestroyNode_v10000_params;
|
| 1499 |
+
|
| 1500 |
+
typedef struct cudaGraphInstantiate_v10000_params_st {
|
| 1501 |
+
cudaGraphExec_t *pGraphExec;
|
| 1502 |
+
cudaGraph_t graph;
|
| 1503 |
+
cudaGraphNode_t *pErrorNode;
|
| 1504 |
+
char *pLogBuffer;
|
| 1505 |
+
size_t bufferSize;
|
| 1506 |
+
} cudaGraphInstantiate_v10000_params;
|
| 1507 |
+
|
| 1508 |
+
typedef struct cudaGraphInstantiateWithFlags_v11040_params_st {
|
| 1509 |
+
cudaGraphExec_t *pGraphExec;
|
| 1510 |
+
cudaGraph_t graph;
|
| 1511 |
+
unsigned long long flags;
|
| 1512 |
+
} cudaGraphInstantiateWithFlags_v11040_params;
|
| 1513 |
+
|
| 1514 |
+
typedef struct cudaGraphExecKernelNodeSetParams_v10010_params_st {
|
| 1515 |
+
cudaGraphExec_t hGraphExec;
|
| 1516 |
+
cudaGraphNode_t node;
|
| 1517 |
+
const struct cudaKernelNodeParams *pNodeParams;
|
| 1518 |
+
} cudaGraphExecKernelNodeSetParams_v10010_params;
|
| 1519 |
+
|
| 1520 |
+
typedef struct cudaGraphExecMemcpyNodeSetParams_v10020_params_st {
|
| 1521 |
+
cudaGraphExec_t hGraphExec;
|
| 1522 |
+
cudaGraphNode_t node;
|
| 1523 |
+
const struct cudaMemcpy3DParms *pNodeParams;
|
| 1524 |
+
} cudaGraphExecMemcpyNodeSetParams_v10020_params;
|
| 1525 |
+
|
| 1526 |
+
typedef struct cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params_st {
|
| 1527 |
+
cudaGraphExec_t hGraphExec;
|
| 1528 |
+
cudaGraphNode_t node;
|
| 1529 |
+
const void *symbol;
|
| 1530 |
+
const void *src;
|
| 1531 |
+
size_t count;
|
| 1532 |
+
size_t offset;
|
| 1533 |
+
enum cudaMemcpyKind kind;
|
| 1534 |
+
} cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params;
|
| 1535 |
+
|
| 1536 |
+
typedef struct cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params_st {
|
| 1537 |
+
cudaGraphExec_t hGraphExec;
|
| 1538 |
+
cudaGraphNode_t node;
|
| 1539 |
+
void *dst;
|
| 1540 |
+
const void *symbol;
|
| 1541 |
+
size_t count;
|
| 1542 |
+
size_t offset;
|
| 1543 |
+
enum cudaMemcpyKind kind;
|
| 1544 |
+
} cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params;
|
| 1545 |
+
|
| 1546 |
+
typedef struct cudaGraphExecMemcpyNodeSetParams1D_v11010_params_st {
|
| 1547 |
+
cudaGraphExec_t hGraphExec;
|
| 1548 |
+
cudaGraphNode_t node;
|
| 1549 |
+
void *dst;
|
| 1550 |
+
const void *src;
|
| 1551 |
+
size_t count;
|
| 1552 |
+
enum cudaMemcpyKind kind;
|
| 1553 |
+
} cudaGraphExecMemcpyNodeSetParams1D_v11010_params;
|
| 1554 |
+
|
| 1555 |
+
typedef struct cudaGraphExecMemsetNodeSetParams_v10020_params_st {
|
| 1556 |
+
cudaGraphExec_t hGraphExec;
|
| 1557 |
+
cudaGraphNode_t node;
|
| 1558 |
+
const struct cudaMemsetParams *pNodeParams;
|
| 1559 |
+
} cudaGraphExecMemsetNodeSetParams_v10020_params;
|
| 1560 |
+
|
| 1561 |
+
typedef struct cudaGraphExecHostNodeSetParams_v10020_params_st {
|
| 1562 |
+
cudaGraphExec_t hGraphExec;
|
| 1563 |
+
cudaGraphNode_t node;
|
| 1564 |
+
const struct cudaHostNodeParams *pNodeParams;
|
| 1565 |
+
} cudaGraphExecHostNodeSetParams_v10020_params;
|
| 1566 |
+
|
| 1567 |
+
typedef struct cudaGraphExecChildGraphNodeSetParams_v11010_params_st {
|
| 1568 |
+
cudaGraphExec_t hGraphExec;
|
| 1569 |
+
cudaGraphNode_t node;
|
| 1570 |
+
cudaGraph_t childGraph;
|
| 1571 |
+
} cudaGraphExecChildGraphNodeSetParams_v11010_params;
|
| 1572 |
+
|
| 1573 |
+
typedef struct cudaGraphExecEventRecordNodeSetEvent_v11010_params_st {
|
| 1574 |
+
cudaGraphExec_t hGraphExec;
|
| 1575 |
+
cudaGraphNode_t hNode;
|
| 1576 |
+
cudaEvent_t event;
|
| 1577 |
+
} cudaGraphExecEventRecordNodeSetEvent_v11010_params;
|
| 1578 |
+
|
| 1579 |
+
typedef struct cudaGraphExecEventWaitNodeSetEvent_v11010_params_st {
|
| 1580 |
+
cudaGraphExec_t hGraphExec;
|
| 1581 |
+
cudaGraphNode_t hNode;
|
| 1582 |
+
cudaEvent_t event;
|
| 1583 |
+
} cudaGraphExecEventWaitNodeSetEvent_v11010_params;
|
| 1584 |
+
|
| 1585 |
+
typedef struct cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params_st {
|
| 1586 |
+
cudaGraphExec_t hGraphExec;
|
| 1587 |
+
cudaGraphNode_t hNode;
|
| 1588 |
+
const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
|
| 1589 |
+
} cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params;
|
| 1590 |
+
|
| 1591 |
+
typedef struct cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params_st {
|
| 1592 |
+
cudaGraphExec_t hGraphExec;
|
| 1593 |
+
cudaGraphNode_t hNode;
|
| 1594 |
+
const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
|
| 1595 |
+
} cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params;
|
| 1596 |
+
|
| 1597 |
+
typedef struct cudaGraphNodeSetEnabled_v11060_params_st {
|
| 1598 |
+
cudaGraphExec_t hGraphExec;
|
| 1599 |
+
cudaGraphNode_t hNode;
|
| 1600 |
+
unsigned int isEnabled;
|
| 1601 |
+
} cudaGraphNodeSetEnabled_v11060_params;
|
| 1602 |
+
|
| 1603 |
+
typedef struct cudaGraphNodeGetEnabled_v11060_params_st {
|
| 1604 |
+
cudaGraphExec_t hGraphExec;
|
| 1605 |
+
cudaGraphNode_t hNode;
|
| 1606 |
+
unsigned int *isEnabled;
|
| 1607 |
+
} cudaGraphNodeGetEnabled_v11060_params;
|
| 1608 |
+
|
| 1609 |
+
typedef struct cudaGraphExecUpdate_v10020_params_st {
|
| 1610 |
+
cudaGraphExec_t hGraphExec;
|
| 1611 |
+
cudaGraph_t hGraph;
|
| 1612 |
+
cudaGraphNode_t *hErrorNode_out;
|
| 1613 |
+
enum cudaGraphExecUpdateResult *updateResult_out;
|
| 1614 |
+
} cudaGraphExecUpdate_v10020_params;
|
| 1615 |
+
|
| 1616 |
+
typedef struct cudaGraphUpload_ptsz_v10000_params_st {
|
| 1617 |
+
cudaGraphExec_t graphExec;
|
| 1618 |
+
cudaStream_t stream;
|
| 1619 |
+
} cudaGraphUpload_ptsz_v10000_params;
|
| 1620 |
+
|
| 1621 |
+
typedef struct cudaGraphLaunch_ptsz_v10000_params_st {
|
| 1622 |
+
cudaGraphExec_t graphExec;
|
| 1623 |
+
cudaStream_t stream;
|
| 1624 |
+
} cudaGraphLaunch_ptsz_v10000_params;
|
| 1625 |
+
|
| 1626 |
+
typedef struct cudaGraphExecDestroy_v10000_params_st {
|
| 1627 |
+
cudaGraphExec_t graphExec;
|
| 1628 |
+
} cudaGraphExecDestroy_v10000_params;
|
| 1629 |
+
|
| 1630 |
+
typedef struct cudaGraphDestroy_v10000_params_st {
|
| 1631 |
+
cudaGraph_t graph;
|
| 1632 |
+
} cudaGraphDestroy_v10000_params;
|
| 1633 |
+
|
| 1634 |
+
typedef struct cudaGraphDebugDotPrint_v11030_params_st {
|
| 1635 |
+
cudaGraph_t graph;
|
| 1636 |
+
const char *path;
|
| 1637 |
+
unsigned int flags;
|
| 1638 |
+
} cudaGraphDebugDotPrint_v11030_params;
|
| 1639 |
+
|
| 1640 |
+
typedef struct cudaUserObjectCreate_v11030_params_st {
|
| 1641 |
+
cudaUserObject_t *object_out;
|
| 1642 |
+
void *ptr;
|
| 1643 |
+
cudaHostFn_t destroy;
|
| 1644 |
+
unsigned int initialRefcount;
|
| 1645 |
+
unsigned int flags;
|
| 1646 |
+
} cudaUserObjectCreate_v11030_params;
|
| 1647 |
+
|
| 1648 |
+
typedef struct cudaUserObjectRetain_v11030_params_st {
|
| 1649 |
+
cudaUserObject_t object;
|
| 1650 |
+
unsigned int count;
|
| 1651 |
+
} cudaUserObjectRetain_v11030_params;
|
| 1652 |
+
|
| 1653 |
+
typedef struct cudaUserObjectRelease_v11030_params_st {
|
| 1654 |
+
cudaUserObject_t object;
|
| 1655 |
+
unsigned int count;
|
| 1656 |
+
} cudaUserObjectRelease_v11030_params;
|
| 1657 |
+
|
| 1658 |
+
typedef struct cudaGraphRetainUserObject_v11030_params_st {
|
| 1659 |
+
cudaGraph_t graph;
|
| 1660 |
+
cudaUserObject_t object;
|
| 1661 |
+
unsigned int count;
|
| 1662 |
+
unsigned int flags;
|
| 1663 |
+
} cudaGraphRetainUserObject_v11030_params;
|
| 1664 |
+
|
| 1665 |
+
typedef struct cudaGraphReleaseUserObject_v11030_params_st {
|
| 1666 |
+
cudaGraph_t graph;
|
| 1667 |
+
cudaUserObject_t object;
|
| 1668 |
+
unsigned int count;
|
| 1669 |
+
} cudaGraphReleaseUserObject_v11030_params;
|
| 1670 |
+
|
| 1671 |
+
typedef struct cudaGetDriverEntryPoint_ptsz_v11030_params_st {
|
| 1672 |
+
const char *symbol;
|
| 1673 |
+
void **funcPtr;
|
| 1674 |
+
unsigned long long flags;
|
| 1675 |
+
} cudaGetDriverEntryPoint_ptsz_v11030_params;
|
| 1676 |
+
|
| 1677 |
+
typedef struct cudaGetFuncBySymbol_v11000_params_st {
|
| 1678 |
+
cudaFunction_t *functionPtr;
|
| 1679 |
+
const void *symbolPtr;
|
| 1680 |
+
} cudaGetFuncBySymbol_v11000_params;
|
| 1681 |
+
|
| 1682 |
+
typedef struct cudaMemcpy_v3020_params_st {
|
| 1683 |
+
void *dst;
|
| 1684 |
+
const void *src;
|
| 1685 |
+
size_t count;
|
| 1686 |
+
enum cudaMemcpyKind kind;
|
| 1687 |
+
} cudaMemcpy_v3020_params;
|
| 1688 |
+
|
| 1689 |
+
typedef struct cudaMemcpyToSymbol_v3020_params_st {
|
| 1690 |
+
const void *symbol;
|
| 1691 |
+
const void *src;
|
| 1692 |
+
size_t count;
|
| 1693 |
+
size_t offset;
|
| 1694 |
+
enum cudaMemcpyKind kind;
|
| 1695 |
+
} cudaMemcpyToSymbol_v3020_params;
|
| 1696 |
+
|
| 1697 |
+
typedef struct cudaMemcpyFromSymbol_v3020_params_st {
|
| 1698 |
+
void *dst;
|
| 1699 |
+
const void *symbol;
|
| 1700 |
+
size_t count;
|
| 1701 |
+
size_t offset;
|
| 1702 |
+
enum cudaMemcpyKind kind;
|
| 1703 |
+
} cudaMemcpyFromSymbol_v3020_params;
|
| 1704 |
+
|
| 1705 |
+
typedef struct cudaMemcpy2D_v3020_params_st {
|
| 1706 |
+
void *dst;
|
| 1707 |
+
size_t dpitch;
|
| 1708 |
+
const void *src;
|
| 1709 |
+
size_t spitch;
|
| 1710 |
+
size_t width;
|
| 1711 |
+
size_t height;
|
| 1712 |
+
enum cudaMemcpyKind kind;
|
| 1713 |
+
} cudaMemcpy2D_v3020_params;
|
| 1714 |
+
|
| 1715 |
+
typedef struct cudaMemcpyToArray_v3020_params_st {
|
| 1716 |
+
cudaArray_t dst;
|
| 1717 |
+
size_t wOffset;
|
| 1718 |
+
size_t hOffset;
|
| 1719 |
+
const void *src;
|
| 1720 |
+
size_t count;
|
| 1721 |
+
enum cudaMemcpyKind kind;
|
| 1722 |
+
} cudaMemcpyToArray_v3020_params;
|
| 1723 |
+
|
| 1724 |
+
typedef struct cudaMemcpy2DToArray_v3020_params_st {
|
| 1725 |
+
cudaArray_t dst;
|
| 1726 |
+
size_t wOffset;
|
| 1727 |
+
size_t hOffset;
|
| 1728 |
+
const void *src;
|
| 1729 |
+
size_t spitch;
|
| 1730 |
+
size_t width;
|
| 1731 |
+
size_t height;
|
| 1732 |
+
enum cudaMemcpyKind kind;
|
| 1733 |
+
} cudaMemcpy2DToArray_v3020_params;
|
| 1734 |
+
|
| 1735 |
+
typedef struct cudaMemcpyFromArray_v3020_params_st {
|
| 1736 |
+
void *dst;
|
| 1737 |
+
cudaArray_const_t src;
|
| 1738 |
+
size_t wOffset;
|
| 1739 |
+
size_t hOffset;
|
| 1740 |
+
size_t count;
|
| 1741 |
+
enum cudaMemcpyKind kind;
|
| 1742 |
+
} cudaMemcpyFromArray_v3020_params;
|
| 1743 |
+
|
| 1744 |
+
typedef struct cudaMemcpy2DFromArray_v3020_params_st {
|
| 1745 |
+
void *dst;
|
| 1746 |
+
size_t dpitch;
|
| 1747 |
+
cudaArray_const_t src;
|
| 1748 |
+
size_t wOffset;
|
| 1749 |
+
size_t hOffset;
|
| 1750 |
+
size_t width;
|
| 1751 |
+
size_t height;
|
| 1752 |
+
enum cudaMemcpyKind kind;
|
| 1753 |
+
} cudaMemcpy2DFromArray_v3020_params;
|
| 1754 |
+
|
| 1755 |
+
typedef struct cudaMemcpyArrayToArray_v3020_params_st {
|
| 1756 |
+
cudaArray_t dst;
|
| 1757 |
+
size_t wOffsetDst;
|
| 1758 |
+
size_t hOffsetDst;
|
| 1759 |
+
cudaArray_const_t src;
|
| 1760 |
+
size_t wOffsetSrc;
|
| 1761 |
+
size_t hOffsetSrc;
|
| 1762 |
+
size_t count;
|
| 1763 |
+
enum cudaMemcpyKind kind;
|
| 1764 |
+
} cudaMemcpyArrayToArray_v3020_params;
|
| 1765 |
+
|
| 1766 |
+
typedef struct cudaMemcpy2DArrayToArray_v3020_params_st {
|
| 1767 |
+
cudaArray_t dst;
|
| 1768 |
+
size_t wOffsetDst;
|
| 1769 |
+
size_t hOffsetDst;
|
| 1770 |
+
cudaArray_const_t src;
|
| 1771 |
+
size_t wOffsetSrc;
|
| 1772 |
+
size_t hOffsetSrc;
|
| 1773 |
+
size_t width;
|
| 1774 |
+
size_t height;
|
| 1775 |
+
enum cudaMemcpyKind kind;
|
| 1776 |
+
} cudaMemcpy2DArrayToArray_v3020_params;
|
| 1777 |
+
|
| 1778 |
+
typedef struct cudaMemcpy3D_v3020_params_st {
|
| 1779 |
+
const struct cudaMemcpy3DParms *p;
|
| 1780 |
+
} cudaMemcpy3D_v3020_params;
|
| 1781 |
+
|
| 1782 |
+
typedef struct cudaMemcpy3DPeer_v4000_params_st {
|
| 1783 |
+
const struct cudaMemcpy3DPeerParms *p;
|
| 1784 |
+
} cudaMemcpy3DPeer_v4000_params;
|
| 1785 |
+
|
| 1786 |
+
typedef struct cudaMemset_v3020_params_st {
|
| 1787 |
+
void *devPtr;
|
| 1788 |
+
int value;
|
| 1789 |
+
size_t count;
|
| 1790 |
+
} cudaMemset_v3020_params;
|
| 1791 |
+
|
| 1792 |
+
typedef struct cudaMemset2D_v3020_params_st {
|
| 1793 |
+
void *devPtr;
|
| 1794 |
+
size_t pitch;
|
| 1795 |
+
int value;
|
| 1796 |
+
size_t width;
|
| 1797 |
+
size_t height;
|
| 1798 |
+
} cudaMemset2D_v3020_params;
|
| 1799 |
+
|
| 1800 |
+
typedef struct cudaMemset3D_v3020_params_st {
|
| 1801 |
+
struct cudaPitchedPtr pitchedDevPtr;
|
| 1802 |
+
int value;
|
| 1803 |
+
struct cudaExtent extent;
|
| 1804 |
+
} cudaMemset3D_v3020_params;
|
| 1805 |
+
|
| 1806 |
+
typedef struct cudaMemcpyAsync_v3020_params_st {
|
| 1807 |
+
void *dst;
|
| 1808 |
+
const void *src;
|
| 1809 |
+
size_t count;
|
| 1810 |
+
enum cudaMemcpyKind kind;
|
| 1811 |
+
cudaStream_t stream;
|
| 1812 |
+
} cudaMemcpyAsync_v3020_params;
|
| 1813 |
+
|
| 1814 |
+
typedef struct cudaMemcpyToSymbolAsync_v3020_params_st {
|
| 1815 |
+
const void *symbol;
|
| 1816 |
+
const void *src;
|
| 1817 |
+
size_t count;
|
| 1818 |
+
size_t offset;
|
| 1819 |
+
enum cudaMemcpyKind kind;
|
| 1820 |
+
cudaStream_t stream;
|
| 1821 |
+
} cudaMemcpyToSymbolAsync_v3020_params;
|
| 1822 |
+
|
| 1823 |
+
typedef struct cudaMemcpyFromSymbolAsync_v3020_params_st {
|
| 1824 |
+
void *dst;
|
| 1825 |
+
const void *symbol;
|
| 1826 |
+
size_t count;
|
| 1827 |
+
size_t offset;
|
| 1828 |
+
enum cudaMemcpyKind kind;
|
| 1829 |
+
cudaStream_t stream;
|
| 1830 |
+
} cudaMemcpyFromSymbolAsync_v3020_params;
|
| 1831 |
+
|
| 1832 |
+
typedef struct cudaMemcpy2DAsync_v3020_params_st {
|
| 1833 |
+
void *dst;
|
| 1834 |
+
size_t dpitch;
|
| 1835 |
+
const void *src;
|
| 1836 |
+
size_t spitch;
|
| 1837 |
+
size_t width;
|
| 1838 |
+
size_t height;
|
| 1839 |
+
enum cudaMemcpyKind kind;
|
| 1840 |
+
cudaStream_t stream;
|
| 1841 |
+
} cudaMemcpy2DAsync_v3020_params;
|
| 1842 |
+
|
| 1843 |
+
typedef struct cudaMemcpyToArrayAsync_v3020_params_st {
|
| 1844 |
+
cudaArray_t dst;
|
| 1845 |
+
size_t wOffset;
|
| 1846 |
+
size_t hOffset;
|
| 1847 |
+
const void *src;
|
| 1848 |
+
size_t count;
|
| 1849 |
+
enum cudaMemcpyKind kind;
|
| 1850 |
+
cudaStream_t stream;
|
| 1851 |
+
} cudaMemcpyToArrayAsync_v3020_params;
|
| 1852 |
+
|
| 1853 |
+
typedef struct cudaMemcpy2DToArrayAsync_v3020_params_st {
|
| 1854 |
+
cudaArray_t dst;
|
| 1855 |
+
size_t wOffset;
|
| 1856 |
+
size_t hOffset;
|
| 1857 |
+
const void *src;
|
| 1858 |
+
size_t spitch;
|
| 1859 |
+
size_t width;
|
| 1860 |
+
size_t height;
|
| 1861 |
+
enum cudaMemcpyKind kind;
|
| 1862 |
+
cudaStream_t stream;
|
| 1863 |
+
} cudaMemcpy2DToArrayAsync_v3020_params;
|
| 1864 |
+
|
| 1865 |
+
typedef struct cudaMemcpyFromArrayAsync_v3020_params_st {
|
| 1866 |
+
void *dst;
|
| 1867 |
+
cudaArray_const_t src;
|
| 1868 |
+
size_t wOffset;
|
| 1869 |
+
size_t hOffset;
|
| 1870 |
+
size_t count;
|
| 1871 |
+
enum cudaMemcpyKind kind;
|
| 1872 |
+
cudaStream_t stream;
|
| 1873 |
+
} cudaMemcpyFromArrayAsync_v3020_params;
|
| 1874 |
+
|
| 1875 |
+
typedef struct cudaMemcpy2DFromArrayAsync_v3020_params_st {
|
| 1876 |
+
void *dst;
|
| 1877 |
+
size_t dpitch;
|
| 1878 |
+
cudaArray_const_t src;
|
| 1879 |
+
size_t wOffset;
|
| 1880 |
+
size_t hOffset;
|
| 1881 |
+
size_t width;
|
| 1882 |
+
size_t height;
|
| 1883 |
+
enum cudaMemcpyKind kind;
|
| 1884 |
+
cudaStream_t stream;
|
| 1885 |
+
} cudaMemcpy2DFromArrayAsync_v3020_params;
|
| 1886 |
+
|
| 1887 |
+
typedef struct cudaMemcpy3DAsync_v3020_params_st {
|
| 1888 |
+
const struct cudaMemcpy3DParms *p;
|
| 1889 |
+
cudaStream_t stream;
|
| 1890 |
+
} cudaMemcpy3DAsync_v3020_params;
|
| 1891 |
+
|
| 1892 |
+
typedef struct cudaMemcpy3DPeerAsync_v4000_params_st {
|
| 1893 |
+
const struct cudaMemcpy3DPeerParms *p;
|
| 1894 |
+
cudaStream_t stream;
|
| 1895 |
+
} cudaMemcpy3DPeerAsync_v4000_params;
|
| 1896 |
+
|
| 1897 |
+
typedef struct cudaMemsetAsync_v3020_params_st {
|
| 1898 |
+
void *devPtr;
|
| 1899 |
+
int value;
|
| 1900 |
+
size_t count;
|
| 1901 |
+
cudaStream_t stream;
|
| 1902 |
+
} cudaMemsetAsync_v3020_params;
|
| 1903 |
+
|
| 1904 |
+
typedef struct cudaMemset2DAsync_v3020_params_st {
|
| 1905 |
+
void *devPtr;
|
| 1906 |
+
size_t pitch;
|
| 1907 |
+
int value;
|
| 1908 |
+
size_t width;
|
| 1909 |
+
size_t height;
|
| 1910 |
+
cudaStream_t stream;
|
| 1911 |
+
} cudaMemset2DAsync_v3020_params;
|
| 1912 |
+
|
| 1913 |
+
typedef struct cudaMemset3DAsync_v3020_params_st {
|
| 1914 |
+
struct cudaPitchedPtr pitchedDevPtr;
|
| 1915 |
+
int value;
|
| 1916 |
+
struct cudaExtent extent;
|
| 1917 |
+
cudaStream_t stream;
|
| 1918 |
+
} cudaMemset3DAsync_v3020_params;
|
| 1919 |
+
|
| 1920 |
+
typedef struct cudaStreamQuery_v3020_params_st {
|
| 1921 |
+
cudaStream_t stream;
|
| 1922 |
+
} cudaStreamQuery_v3020_params;
|
| 1923 |
+
|
| 1924 |
+
typedef struct cudaStreamGetFlags_v5050_params_st {
|
| 1925 |
+
cudaStream_t hStream;
|
| 1926 |
+
unsigned int *flags;
|
| 1927 |
+
} cudaStreamGetFlags_v5050_params;
|
| 1928 |
+
|
| 1929 |
+
typedef struct cudaStreamGetPriority_v5050_params_st {
|
| 1930 |
+
cudaStream_t hStream;
|
| 1931 |
+
int *priority;
|
| 1932 |
+
} cudaStreamGetPriority_v5050_params;
|
| 1933 |
+
|
| 1934 |
+
typedef struct cudaEventRecord_v3020_params_st {
|
| 1935 |
+
cudaEvent_t event;
|
| 1936 |
+
cudaStream_t stream;
|
| 1937 |
+
} cudaEventRecord_v3020_params;
|
| 1938 |
+
|
| 1939 |
+
typedef struct cudaEventRecordWithFlags_v11010_params_st {
|
| 1940 |
+
cudaEvent_t event;
|
| 1941 |
+
cudaStream_t stream;
|
| 1942 |
+
unsigned int flags;
|
| 1943 |
+
} cudaEventRecordWithFlags_v11010_params;
|
| 1944 |
+
|
| 1945 |
+
typedef struct cudaStreamWaitEvent_v3020_params_st {
|
| 1946 |
+
cudaStream_t stream;
|
| 1947 |
+
cudaEvent_t event;
|
| 1948 |
+
unsigned int flags;
|
| 1949 |
+
} cudaStreamWaitEvent_v3020_params;
|
| 1950 |
+
|
| 1951 |
+
typedef struct cudaStreamAddCallback_v5000_params_st {
|
| 1952 |
+
cudaStream_t stream;
|
| 1953 |
+
cudaStreamCallback_t callback;
|
| 1954 |
+
void *userData;
|
| 1955 |
+
unsigned int flags;
|
| 1956 |
+
} cudaStreamAddCallback_v5000_params;
|
| 1957 |
+
|
| 1958 |
+
typedef struct cudaStreamAttachMemAsync_v6000_params_st {
|
| 1959 |
+
cudaStream_t stream;
|
| 1960 |
+
void *devPtr;
|
| 1961 |
+
size_t length;
|
| 1962 |
+
unsigned int flags;
|
| 1963 |
+
} cudaStreamAttachMemAsync_v6000_params;
|
| 1964 |
+
|
| 1965 |
+
typedef struct cudaStreamSynchronize_v3020_params_st {
|
| 1966 |
+
cudaStream_t stream;
|
| 1967 |
+
} cudaStreamSynchronize_v3020_params;
|
| 1968 |
+
|
| 1969 |
+
typedef struct cudaLaunchKernel_v7000_params_st {
|
| 1970 |
+
const void *func;
|
| 1971 |
+
dim3 gridDim;
|
| 1972 |
+
dim3 blockDim;
|
| 1973 |
+
void **args;
|
| 1974 |
+
size_t sharedMem;
|
| 1975 |
+
cudaStream_t stream;
|
| 1976 |
+
} cudaLaunchKernel_v7000_params;
|
| 1977 |
+
|
| 1978 |
+
typedef struct cudaLaunchKernelExC_v11060_params_st {
|
| 1979 |
+
const cudaLaunchConfig_t *config;
|
| 1980 |
+
const void *func;
|
| 1981 |
+
void **args;
|
| 1982 |
+
} cudaLaunchKernelExC_v11060_params;
|
| 1983 |
+
|
| 1984 |
+
typedef struct cudaLaunchCooperativeKernel_v9000_params_st {
|
| 1985 |
+
const void *func;
|
| 1986 |
+
dim3 gridDim;
|
| 1987 |
+
dim3 blockDim;
|
| 1988 |
+
void **args;
|
| 1989 |
+
size_t sharedMem;
|
| 1990 |
+
cudaStream_t stream;
|
| 1991 |
+
} cudaLaunchCooperativeKernel_v9000_params;
|
| 1992 |
+
|
| 1993 |
+
typedef struct cudaLaunchHostFunc_v10000_params_st {
|
| 1994 |
+
cudaStream_t stream;
|
| 1995 |
+
cudaHostFn_t fn;
|
| 1996 |
+
void *userData;
|
| 1997 |
+
} cudaLaunchHostFunc_v10000_params;
|
| 1998 |
+
|
| 1999 |
+
typedef struct cudaMemPrefetchAsync_v8000_params_st {
|
| 2000 |
+
const void *devPtr;
|
| 2001 |
+
size_t count;
|
| 2002 |
+
int dstDevice;
|
| 2003 |
+
cudaStream_t stream;
|
| 2004 |
+
} cudaMemPrefetchAsync_v8000_params;
|
| 2005 |
+
|
| 2006 |
+
typedef struct cudaSignalExternalSemaphoresAsync_v10000_params_st {
|
| 2007 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 2008 |
+
const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
|
| 2009 |
+
unsigned int numExtSems;
|
| 2010 |
+
cudaStream_t stream;
|
| 2011 |
+
} cudaSignalExternalSemaphoresAsync_v10000_params;
|
| 2012 |
+
|
| 2013 |
+
typedef struct cudaSignalExternalSemaphoresAsync_ptsz_v10000_params_st {
|
| 2014 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 2015 |
+
const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
|
| 2016 |
+
unsigned int numExtSems;
|
| 2017 |
+
cudaStream_t stream;
|
| 2018 |
+
} cudaSignalExternalSemaphoresAsync_ptsz_v10000_params;
|
| 2019 |
+
|
| 2020 |
+
typedef struct cudaSignalExternalSemaphoresAsync_v2_v11020_params_st {
|
| 2021 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 2022 |
+
const struct cudaExternalSemaphoreSignalParams *paramsArray;
|
| 2023 |
+
unsigned int numExtSems;
|
| 2024 |
+
cudaStream_t stream;
|
| 2025 |
+
} cudaSignalExternalSemaphoresAsync_v2_v11020_params;
|
| 2026 |
+
|
| 2027 |
+
typedef struct cudaWaitExternalSemaphoresAsync_v10000_params_st {
|
| 2028 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 2029 |
+
const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
|
| 2030 |
+
unsigned int numExtSems;
|
| 2031 |
+
cudaStream_t stream;
|
| 2032 |
+
} cudaWaitExternalSemaphoresAsync_v10000_params;
|
| 2033 |
+
|
| 2034 |
+
typedef struct cudaWaitExternalSemaphoresAsync_ptsz_v10000_params_st {
|
| 2035 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 2036 |
+
const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
|
| 2037 |
+
unsigned int numExtSems;
|
| 2038 |
+
cudaStream_t stream;
|
| 2039 |
+
} cudaWaitExternalSemaphoresAsync_ptsz_v10000_params;
|
| 2040 |
+
|
| 2041 |
+
typedef struct cudaWaitExternalSemaphoresAsync_v2_v11020_params_st {
|
| 2042 |
+
const cudaExternalSemaphore_t *extSemArray;
|
| 2043 |
+
const struct cudaExternalSemaphoreWaitParams *paramsArray;
|
| 2044 |
+
unsigned int numExtSems;
|
| 2045 |
+
cudaStream_t stream;
|
| 2046 |
+
} cudaWaitExternalSemaphoresAsync_v2_v11020_params;
|
| 2047 |
+
|
| 2048 |
+
typedef struct cudaGraphUpload_v10000_params_st {
|
| 2049 |
+
cudaGraphExec_t graphExec;
|
| 2050 |
+
cudaStream_t stream;
|
| 2051 |
+
} cudaGraphUpload_v10000_params;
|
| 2052 |
+
|
| 2053 |
+
typedef struct cudaGraphLaunch_v10000_params_st {
|
| 2054 |
+
cudaGraphExec_t graphExec;
|
| 2055 |
+
cudaStream_t stream;
|
| 2056 |
+
} cudaGraphLaunch_v10000_params;
|
| 2057 |
+
|
| 2058 |
+
typedef struct cudaStreamBeginCapture_v10000_params_st {
|
| 2059 |
+
cudaStream_t stream;
|
| 2060 |
+
enum cudaStreamCaptureMode mode;
|
| 2061 |
+
} cudaStreamBeginCapture_v10000_params;
|
| 2062 |
+
|
| 2063 |
+
typedef struct cudaStreamEndCapture_v10000_params_st {
|
| 2064 |
+
cudaStream_t stream;
|
| 2065 |
+
cudaGraph_t *pGraph;
|
| 2066 |
+
} cudaStreamEndCapture_v10000_params;
|
| 2067 |
+
|
| 2068 |
+
typedef struct cudaStreamIsCapturing_v10000_params_st {
|
| 2069 |
+
cudaStream_t stream;
|
| 2070 |
+
enum cudaStreamCaptureStatus *pCaptureStatus;
|
| 2071 |
+
} cudaStreamIsCapturing_v10000_params;
|
| 2072 |
+
|
| 2073 |
+
typedef struct cudaStreamGetCaptureInfo_v10010_params_st {
|
| 2074 |
+
cudaStream_t stream;
|
| 2075 |
+
enum cudaStreamCaptureStatus *captureStatus_out;
|
| 2076 |
+
unsigned long long *id_out;
|
| 2077 |
+
} cudaStreamGetCaptureInfo_v10010_params;
|
| 2078 |
+
|
| 2079 |
+
typedef struct cudaStreamGetCaptureInfo_v2_v11030_params_st {
|
| 2080 |
+
cudaStream_t stream;
|
| 2081 |
+
enum cudaStreamCaptureStatus *captureStatus_out;
|
| 2082 |
+
unsigned long long *id_out;
|
| 2083 |
+
cudaGraph_t *graph_out;
|
| 2084 |
+
const cudaGraphNode_t **dependencies_out;
|
| 2085 |
+
size_t *numDependencies_out;
|
| 2086 |
+
} cudaStreamGetCaptureInfo_v2_v11030_params;
|
| 2087 |
+
|
| 2088 |
+
typedef struct cudaStreamUpdateCaptureDependencies_ptsz_v11030_params_st {
|
| 2089 |
+
cudaStream_t stream;
|
| 2090 |
+
cudaGraphNode_t *dependencies;
|
| 2091 |
+
size_t numDependencies;
|
| 2092 |
+
unsigned int flags;
|
| 2093 |
+
} cudaStreamUpdateCaptureDependencies_ptsz_v11030_params;
|
| 2094 |
+
|
| 2095 |
+
typedef struct cudaStreamCopyAttributes_v11000_params_st {
|
| 2096 |
+
cudaStream_t dstStream;
|
| 2097 |
+
cudaStream_t srcStream;
|
| 2098 |
+
} cudaStreamCopyAttributes_v11000_params;
|
| 2099 |
+
|
| 2100 |
+
typedef struct cudaStreamGetAttribute_v11000_params_st {
|
| 2101 |
+
cudaStream_t stream;
|
| 2102 |
+
cudaStreamAttrID attr;
|
| 2103 |
+
cudaStreamAttrValue *value;
|
| 2104 |
+
} cudaStreamGetAttribute_v11000_params;
|
| 2105 |
+
|
| 2106 |
+
typedef struct cudaStreamSetAttribute_v11000_params_st {
|
| 2107 |
+
cudaStream_t stream;
|
| 2108 |
+
cudaStreamAttrID attr;
|
| 2109 |
+
const cudaStreamAttrValue *param;
|
| 2110 |
+
} cudaStreamSetAttribute_v11000_params;
|
| 2111 |
+
|
| 2112 |
+
typedef struct cudaMallocAsync_v11020_params_st {
|
| 2113 |
+
void **devPtr;
|
| 2114 |
+
size_t size;
|
| 2115 |
+
cudaStream_t hStream;
|
| 2116 |
+
} cudaMallocAsync_v11020_params;
|
| 2117 |
+
|
| 2118 |
+
typedef struct cudaFreeAsync_v11020_params_st {
|
| 2119 |
+
void *devPtr;
|
| 2120 |
+
cudaStream_t hStream;
|
| 2121 |
+
} cudaFreeAsync_v11020_params;
|
| 2122 |
+
|
| 2123 |
+
typedef struct cudaMallocFromPoolAsync_v11020_params_st {
|
| 2124 |
+
void **ptr;
|
| 2125 |
+
size_t size;
|
| 2126 |
+
cudaMemPool_t memPool;
|
| 2127 |
+
cudaStream_t stream;
|
| 2128 |
+
} cudaMallocFromPoolAsync_v11020_params;
|
| 2129 |
+
|
| 2130 |
+
typedef struct cudaGetDriverEntryPoint_v11030_params_st {
|
| 2131 |
+
const char *symbol;
|
| 2132 |
+
void **funcPtr;
|
| 2133 |
+
unsigned long long flags;
|
| 2134 |
+
} cudaGetDriverEntryPoint_v11030_params;
|
| 2135 |
+
|
| 2136 |
+
// Parameter trace structures for removed functions
|
| 2137 |
+
|
| 2138 |
+
|
| 2139 |
+
// End of parameter trace structures
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// This file is generated. Any changes you make will be lost during the next clean build.
|
| 2 |
+
|
| 3 |
+
// CUDA public interface, for type definitions and api function prototypes
|
| 4 |
+
#include "cuda_vdpau_interop.h"
|
| 5 |
+
|
| 6 |
+
// *************************************************************************
|
| 7 |
+
// Definitions of structs to hold parameters for each function
|
| 8 |
+
// *************************************************************************
|
| 9 |
+
|
| 10 |
+
// Currently used parameter trace structures
|
| 11 |
+
typedef struct cudaVDPAUGetDevice_v3020_params_st {
|
| 12 |
+
int *device;
|
| 13 |
+
VdpDevice vdpDevice;
|
| 14 |
+
VdpGetProcAddress *vdpGetProcAddress;
|
| 15 |
+
} cudaVDPAUGetDevice_v3020_params;
|
| 16 |
+
|
| 17 |
+
typedef struct cudaVDPAUSetVDPAUDevice_v3020_params_st {
|
| 18 |
+
int device;
|
| 19 |
+
VdpDevice vdpDevice;
|
| 20 |
+
VdpGetProcAddress *vdpGetProcAddress;
|
| 21 |
+
} cudaVDPAUSetVDPAUDevice_v3020_params;
|
| 22 |
+
|
| 23 |
+
typedef struct cudaGraphicsVDPAURegisterVideoSurface_v3020_params_st {
|
| 24 |
+
struct cudaGraphicsResource **resource;
|
| 25 |
+
VdpVideoSurface vdpSurface;
|
| 26 |
+
unsigned int flags;
|
| 27 |
+
} cudaGraphicsVDPAURegisterVideoSurface_v3020_params;
|
| 28 |
+
|
| 29 |
+
typedef struct cudaGraphicsVDPAURegisterOutputSurface_v3020_params_st {
|
| 30 |
+
struct cudaGraphicsResource **resource;
|
| 31 |
+
VdpOutputSurface vdpSurface;
|
| 32 |
+
unsigned int flags;
|
| 33 |
+
} cudaGraphicsVDPAURegisterOutputSurface_v3020_params;
|
| 34 |
+
|
| 35 |
+
// Parameter trace structures for removed functions
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
// End of parameter trace structures
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer.h
ADDED
|
@@ -0,0 +1,571 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Copyright 2017-2022 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* This source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*
|
| 51 |
+
* cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions.
|
| 52 |
+
*/
|
| 53 |
+
|
| 54 |
+
#if !defined(CUDNN_CNN_INFER_H_)
|
| 55 |
+
#define CUDNN_CNN_INFER_H_
|
| 56 |
+
|
| 57 |
+
#pragma once
|
| 58 |
+
#include <cuda_runtime.h>
|
| 59 |
+
#include <stdint.h>
|
| 60 |
+
|
| 61 |
+
#include "cudnn_version.h"
|
| 62 |
+
#include "cudnn_ops_infer.h"
|
| 63 |
+
|
| 64 |
+
/* These version numbers are autogenerated, do not edit manually. */
|
| 65 |
+
#define CUDNN_CNN_INFER_MAJOR 8
|
| 66 |
+
#define CUDNN_CNN_INFER_MINOR 7
|
| 67 |
+
#define CUDNN_CNN_INFER_PATCH 0
|
| 68 |
+
|
| 69 |
+
#if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) || \
|
| 70 |
+
(CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL)
|
| 71 |
+
#error Version mismatch in cuDNN CNN INFER!!!
|
| 72 |
+
#endif
|
| 73 |
+
|
| 74 |
+
#if defined(__cplusplus)
|
| 75 |
+
extern "C" {
|
| 76 |
+
#endif
|
| 77 |
+
|
| 78 |
+
typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t;
|
| 79 |
+
|
| 80 |
+
/*
|
| 81 |
+
* convolution mode
|
| 82 |
+
*/
|
| 83 |
+
typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
|
| 84 |
+
|
| 85 |
+
/*
|
| 86 |
+
* CUDNN Reorder
|
| 87 |
+
*/
|
| 88 |
+
typedef enum {
|
| 89 |
+
CUDNN_DEFAULT_REORDER = 0,
|
| 90 |
+
CUDNN_NO_REORDER = 1,
|
| 91 |
+
} cudnnReorderType_t;
|
| 92 |
+
|
| 93 |
+
typedef struct cudnnConvolutionFwdAlgoPerfStruct {
|
| 94 |
+
cudnnConvolutionFwdAlgo_t algo;
|
| 95 |
+
cudnnStatus_t status;
|
| 96 |
+
float time;
|
| 97 |
+
size_t memory;
|
| 98 |
+
cudnnDeterminism_t determinism;
|
| 99 |
+
cudnnMathType_t mathType;
|
| 100 |
+
int reserved[3];
|
| 101 |
+
} cudnnConvolutionFwdAlgoPerf_t;
|
| 102 |
+
|
| 103 |
+
/* Create an instance of convolution descriptor */
|
| 104 |
+
cudnnStatus_t CUDNNWINAPI
|
| 105 |
+
cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
|
| 106 |
+
|
| 107 |
+
/* Destroy an instance of convolution descriptor */
|
| 108 |
+
cudnnStatus_t CUDNNWINAPI
|
| 109 |
+
cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
|
| 110 |
+
|
| 111 |
+
cudnnStatus_t CUDNNWINAPI
|
| 112 |
+
cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
|
| 113 |
+
|
| 114 |
+
cudnnStatus_t CUDNNWINAPI
|
| 115 |
+
cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
|
| 116 |
+
|
| 117 |
+
cudnnStatus_t CUDNNWINAPI
|
| 118 |
+
cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
|
| 119 |
+
|
| 120 |
+
cudnnStatus_t CUDNNWINAPI
|
| 121 |
+
cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
|
| 122 |
+
|
| 123 |
+
cudnnStatus_t CUDNNWINAPI
|
| 124 |
+
cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
|
| 125 |
+
|
| 126 |
+
cudnnStatus_t CUDNNWINAPI
|
| 127 |
+
cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
|
| 128 |
+
|
| 129 |
+
cudnnStatus_t CUDNNWINAPI
|
| 130 |
+
cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
| 131 |
+
int pad_h, /* zero-padding height */
|
| 132 |
+
int pad_w, /* zero-padding width */
|
| 133 |
+
int u, /* vertical filter stride */
|
| 134 |
+
int v, /* horizontal filter stride */
|
| 135 |
+
int dilation_h, /* filter dilation in the vertical dimension */
|
| 136 |
+
int dilation_w, /* filter dilation in the horizontal dimension */
|
| 137 |
+
cudnnConvolutionMode_t mode,
|
| 138 |
+
cudnnDataType_t computeType);
|
| 139 |
+
|
| 140 |
+
cudnnStatus_t CUDNNWINAPI
|
| 141 |
+
cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
| 142 |
+
int *pad_h, /* zero-padding height */
|
| 143 |
+
int *pad_w, /* zero-padding width */
|
| 144 |
+
int *u, /* vertical filter stride */
|
| 145 |
+
int *v, /* horizontal filter stride */
|
| 146 |
+
int *dilation_h, /* filter dilation in the vertical dimension */
|
| 147 |
+
int *dilation_w, /* filter dilation in the horizontal dimension */
|
| 148 |
+
cudnnConvolutionMode_t *mode,
|
| 149 |
+
cudnnDataType_t *computeType);
|
| 150 |
+
|
| 151 |
+
cudnnStatus_t CUDNNWINAPI
|
| 152 |
+
cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
| 153 |
+
int arrayLength, /* nbDims-2 size */
|
| 154 |
+
const int padA[],
|
| 155 |
+
const int filterStrideA[],
|
| 156 |
+
const int dilationA[],
|
| 157 |
+
cudnnConvolutionMode_t mode,
|
| 158 |
+
cudnnDataType_t computeType); /* convolution data type */
|
| 159 |
+
|
| 160 |
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
| 161 |
+
cudnnStatus_t CUDNNWINAPI
|
| 162 |
+
cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
| 163 |
+
int arrayLengthRequested,
|
| 164 |
+
int *arrayLength,
|
| 165 |
+
int padA[],
|
| 166 |
+
int strideA[],
|
| 167 |
+
int dilationA[],
|
| 168 |
+
cudnnConvolutionMode_t *mode,
|
| 169 |
+
cudnnDataType_t *computeType); /* convolution data type */
|
| 170 |
+
|
| 171 |
+
cudnnStatus_t CUDNNWINAPI
|
| 172 |
+
cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
| 173 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 174 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 175 |
+
int *n,
|
| 176 |
+
int *c,
|
| 177 |
+
int *h,
|
| 178 |
+
int *w);
|
| 179 |
+
|
| 180 |
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
| 181 |
+
cudnnStatus_t CUDNNWINAPI
|
| 182 |
+
cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
| 183 |
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
| 184 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 185 |
+
int nbDims,
|
| 186 |
+
int tensorOuputDimA[]);
|
| 187 |
+
|
| 188 |
+
/* helper function to provide the convolution forward algo that fit best the requirement */
|
| 189 |
+
cudnnStatus_t CUDNNWINAPI
|
| 190 |
+
cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 191 |
+
|
| 192 |
+
cudnnStatus_t CUDNNWINAPI
|
| 193 |
+
cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
|
| 194 |
+
const cudnnTensorDescriptor_t srcDesc,
|
| 195 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 196 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 197 |
+
const cudnnTensorDescriptor_t destDesc,
|
| 198 |
+
const int requestedAlgoCount,
|
| 199 |
+
int *returnedAlgoCount,
|
| 200 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
| 201 |
+
|
| 202 |
+
cudnnStatus_t CUDNNWINAPI
|
| 203 |
+
cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
|
| 204 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 205 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 206 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 207 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 208 |
+
const int requestedAlgoCount,
|
| 209 |
+
int *returnedAlgoCount,
|
| 210 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
| 211 |
+
|
| 212 |
+
cudnnStatus_t CUDNNWINAPI
|
| 213 |
+
cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
|
| 214 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 215 |
+
const void *x,
|
| 216 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 217 |
+
const void *w,
|
| 218 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 219 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 220 |
+
void *y,
|
| 221 |
+
const int requestedAlgoCount,
|
| 222 |
+
int *returnedAlgoCount,
|
| 223 |
+
cudnnConvolutionFwdAlgoPerf_t *perfResults,
|
| 224 |
+
void *workSpace,
|
| 225 |
+
size_t workSpaceSizeInBytes);
|
| 226 |
+
|
| 227 |
+
cudnnStatus_t CUDNNWINAPI
|
| 228 |
+
cudnnIm2Col(cudnnHandle_t handle,
|
| 229 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 230 |
+
const void *x,
|
| 231 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 232 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 233 |
+
void *colBuffer);
|
| 234 |
+
|
| 235 |
+
cudnnStatus_t CUDNNWINAPI
|
| 236 |
+
cudnnReorderFilterAndBias(cudnnHandle_t handle,
|
| 237 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 238 |
+
cudnnReorderType_t reorderType,
|
| 239 |
+
const void *filterData,
|
| 240 |
+
void *reorderedFilterData,
|
| 241 |
+
int reorderBias,
|
| 242 |
+
const void *biasData,
|
| 243 |
+
void *reorderedBiasData);
|
| 244 |
+
|
| 245 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 246 |
+
cudnnStatus_t CUDNNWINAPI
|
| 247 |
+
cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
|
| 248 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 249 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 250 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 251 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 252 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 253 |
+
size_t *sizeInBytes);
|
| 254 |
+
|
| 255 |
+
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
| 256 |
+
|
| 257 |
+
/* Function to perform the forward pass for batch convolution */
|
| 258 |
+
cudnnStatus_t CUDNNWINAPI
|
| 259 |
+
cudnnConvolutionForward(cudnnHandle_t handle,
|
| 260 |
+
const void *alpha,
|
| 261 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 262 |
+
const void *x,
|
| 263 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 264 |
+
const void *w,
|
| 265 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 266 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 267 |
+
void *workSpace,
|
| 268 |
+
size_t workSpaceSizeInBytes,
|
| 269 |
+
const void *beta,
|
| 270 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 271 |
+
void *y);
|
| 272 |
+
|
| 273 |
+
/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
|
| 274 |
+
cudnnStatus_t CUDNNWINAPI
|
| 275 |
+
cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
|
| 276 |
+
const void *alpha1,
|
| 277 |
+
const cudnnTensorDescriptor_t xDesc,
|
| 278 |
+
const void *x,
|
| 279 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 280 |
+
const void *w,
|
| 281 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 282 |
+
cudnnConvolutionFwdAlgo_t algo,
|
| 283 |
+
void *workSpace,
|
| 284 |
+
size_t workSpaceSizeInBytes,
|
| 285 |
+
const void *alpha2,
|
| 286 |
+
const cudnnTensorDescriptor_t zDesc,
|
| 287 |
+
const void *z,
|
| 288 |
+
const cudnnTensorDescriptor_t biasDesc,
|
| 289 |
+
const void *bias,
|
| 290 |
+
const cudnnActivationDescriptor_t activationDesc,
|
| 291 |
+
const cudnnTensorDescriptor_t yDesc,
|
| 292 |
+
void *y);
|
| 293 |
+
|
| 294 |
+
/* helper function to provide the convolution backward data algo that fit best the requirement */
|
| 295 |
+
|
| 296 |
+
typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
|
| 297 |
+
cudnnConvolutionBwdDataAlgo_t algo;
|
| 298 |
+
cudnnStatus_t status;
|
| 299 |
+
float time;
|
| 300 |
+
size_t memory;
|
| 301 |
+
cudnnDeterminism_t determinism;
|
| 302 |
+
cudnnMathType_t mathType;
|
| 303 |
+
int reserved[3];
|
| 304 |
+
} cudnnConvolutionBwdDataAlgoPerf_t;
|
| 305 |
+
|
| 306 |
+
cudnnStatus_t CUDNNWINAPI
|
| 307 |
+
cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
| 308 |
+
|
| 309 |
+
cudnnStatus_t CUDNNWINAPI
|
| 310 |
+
cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
|
| 311 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 312 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 313 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 314 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 315 |
+
const int requestedAlgoCount,
|
| 316 |
+
int *returnedAlgoCount,
|
| 317 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
| 318 |
+
|
| 319 |
+
cudnnStatus_t CUDNNWINAPI
|
| 320 |
+
cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
|
| 321 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 322 |
+
const void *w,
|
| 323 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 324 |
+
const void *dy,
|
| 325 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 326 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 327 |
+
void *dx,
|
| 328 |
+
const int requestedAlgoCount,
|
| 329 |
+
int *returnedAlgoCount,
|
| 330 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
|
| 331 |
+
void *workSpace,
|
| 332 |
+
size_t workSpaceSizeInBytes);
|
| 333 |
+
|
| 334 |
+
cudnnStatus_t CUDNNWINAPI
|
| 335 |
+
cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
|
| 336 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 337 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 338 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 339 |
+
const cudnnTensorDescriptor_t gradDesc,
|
| 340 |
+
const int requestedAlgoCount,
|
| 341 |
+
int *returnedAlgoCount,
|
| 342 |
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
| 343 |
+
|
| 344 |
+
/*
|
| 345 |
+
* convolution algorithm (which requires potentially some workspace)
|
| 346 |
+
*/
|
| 347 |
+
|
| 348 |
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
| 349 |
+
cudnnStatus_t CUDNNWINAPI
|
| 350 |
+
cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
|
| 351 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 352 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 353 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 354 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 355 |
+
cudnnConvolutionBwdDataAlgo_t algo,
|
| 356 |
+
size_t *sizeInBytes);
|
| 357 |
+
|
| 358 |
+
cudnnStatus_t CUDNNWINAPI
|
| 359 |
+
cudnnConvolutionBackwardData(cudnnHandle_t handle,
|
| 360 |
+
const void *alpha,
|
| 361 |
+
const cudnnFilterDescriptor_t wDesc,
|
| 362 |
+
const void *w,
|
| 363 |
+
const cudnnTensorDescriptor_t dyDesc,
|
| 364 |
+
const void *dy,
|
| 365 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 366 |
+
cudnnConvolutionBwdDataAlgo_t algo,
|
| 367 |
+
void *workSpace,
|
| 368 |
+
size_t workSpaceSizeInBytes,
|
| 369 |
+
const void *beta,
|
| 370 |
+
const cudnnTensorDescriptor_t dxDesc,
|
| 371 |
+
void *dx);
|
| 372 |
+
|
| 373 |
+
/* Helper function to calculate folding descriptors for dgrad */
|
| 374 |
+
cudnnStatus_t CUDNNWINAPI
|
| 375 |
+
cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
|
| 376 |
+
const cudnnFilterDescriptor_t filterDesc,
|
| 377 |
+
const cudnnTensorDescriptor_t diffDesc,
|
| 378 |
+
const cudnnConvolutionDescriptor_t convDesc,
|
| 379 |
+
const cudnnTensorDescriptor_t gradDesc,
|
| 380 |
+
const cudnnTensorFormat_t transformFormat,
|
| 381 |
+
cudnnFilterDescriptor_t foldedFilterDesc,
|
| 382 |
+
cudnnTensorDescriptor_t paddedDiffDesc,
|
| 383 |
+
cudnnConvolutionDescriptor_t foldedConvDesc,
|
| 384 |
+
cudnnTensorDescriptor_t foldedGradDesc,
|
| 385 |
+
cudnnTensorTransformDescriptor_t filterFoldTransDesc,
|
| 386 |
+
cudnnTensorTransformDescriptor_t diffPadTransDesc,
|
| 387 |
+
cudnnTensorTransformDescriptor_t gradFoldTransDesc,
|
| 388 |
+
cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
|
| 389 |
+
|
| 390 |
+
/* cudnnFusedOps... */
|
| 391 |
+
struct cudnnFusedOpsConstParamStruct;
|
| 392 |
+
typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t;
|
| 393 |
+
|
| 394 |
+
struct cudnnFusedOpsVariantParamStruct;
|
| 395 |
+
typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t;
|
| 396 |
+
|
| 397 |
+
struct cudnnFusedOpsPlanStruct;
|
| 398 |
+
typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t;
|
| 399 |
+
|
| 400 |
+
typedef enum {
|
| 401 |
+
/* each op in [ ] can be disabled by passing NULL ptr */
|
| 402 |
+
/* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
|
| 403 |
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
|
| 404 |
+
/* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
|
| 405 |
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
|
| 406 |
+
/* utility for BN training in BN-conv fusion */
|
| 407 |
+
/* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
|
| 408 |
+
/* optionally update running stats and generate saved stats */
|
| 409 |
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
|
| 410 |
+
/* utility for BN inference in BN-conv fusion */
|
| 411 |
+
/* computes the equivalent scale and bias from learned running stats and learned scale, bias */
|
| 412 |
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
|
| 413 |
+
/* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
|
| 414 |
+
CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
|
| 415 |
+
/* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */
|
| 416 |
+
CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
|
| 417 |
+
/* reserved for future use */
|
| 418 |
+
CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
|
| 419 |
+
} cudnnFusedOps_t;
|
| 420 |
+
|
| 421 |
+
typedef enum {
|
| 422 |
+
/* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 423 |
+
/* get XDESC: pass previously created cudnnTensorDescriptor_t */
|
| 424 |
+
CUDNN_PARAM_XDESC = 0,
|
| 425 |
+
/* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 426 |
+
CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
|
| 427 |
+
/* set/get BN_MODE: pass cudnnBatchNormMode_t* */
|
| 428 |
+
CUDNN_PARAM_BN_MODE = 2,
|
| 429 |
+
/* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 430 |
+
/* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 431 |
+
CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
|
| 432 |
+
/* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 433 |
+
CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
|
| 434 |
+
/* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 435 |
+
CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
|
| 436 |
+
/* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
|
| 437 |
+
/* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
|
| 438 |
+
CUDNN_PARAM_ACTIVATION_DESC = 6,
|
| 439 |
+
/* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
|
| 440 |
+
/* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
|
| 441 |
+
CUDNN_PARAM_CONV_DESC = 7,
|
| 442 |
+
/* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
|
| 443 |
+
/* get WDESC: pass previously created cudnnFilterDescriptor_t */
|
| 444 |
+
CUDNN_PARAM_WDESC = 8,
|
| 445 |
+
/* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 446 |
+
CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
|
| 447 |
+
/* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
|
| 448 |
+
/* get DWDESC: pass previously created cudnnFilterDescriptor_t */
|
| 449 |
+
CUDNN_PARAM_DWDESC = 10,
|
| 450 |
+
/* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 451 |
+
CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
|
| 452 |
+
/* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 453 |
+
/* get YDESC: pass previously created cudnnTensorDescriptor_t */
|
| 454 |
+
CUDNN_PARAM_YDESC = 12,
|
| 455 |
+
/* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 456 |
+
CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
|
| 457 |
+
/* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 458 |
+
/* get DYDESC: pass previously created cudnnTensorDescriptor_t */
|
| 459 |
+
CUDNN_PARAM_DYDESC = 14,
|
| 460 |
+
/* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 461 |
+
CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
|
| 462 |
+
/* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 463 |
+
/* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 464 |
+
CUDNN_PARAM_YSTATS_DESC = 16,
|
| 465 |
+
/* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 466 |
+
CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
|
| 467 |
+
/* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 468 |
+
CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
|
| 469 |
+
/* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 470 |
+
/* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 471 |
+
CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
|
| 472 |
+
/* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 473 |
+
CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
|
| 474 |
+
/* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 475 |
+
CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
|
| 476 |
+
/* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 477 |
+
CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
|
| 478 |
+
/* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 479 |
+
CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
|
| 480 |
+
/* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 481 |
+
CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
|
| 482 |
+
/* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 483 |
+
CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
|
| 484 |
+
|
| 485 |
+
/* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 486 |
+
/* get ZDESC: pass previously created cudnnTensorDescriptor_t */
|
| 487 |
+
CUDNN_PARAM_ZDESC = 26,
|
| 488 |
+
/* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 489 |
+
CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
|
| 490 |
+
/* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 491 |
+
/* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 492 |
+
CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
|
| 493 |
+
/* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 494 |
+
CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
|
| 495 |
+
/* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 496 |
+
CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
|
| 497 |
+
|
| 498 |
+
/* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 499 |
+
/* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
|
| 500 |
+
CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
|
| 501 |
+
/* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 502 |
+
CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
|
| 503 |
+
|
| 504 |
+
/* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 505 |
+
/* get DXDESC: pass previously created cudnnTensorDescriptor_t */
|
| 506 |
+
CUDNN_PARAM_DXDESC = 33,
|
| 507 |
+
/* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 508 |
+
CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
|
| 509 |
+
/* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
| 510 |
+
/* get DZDESC: pass previously created cudnnTensorDescriptor_t */
|
| 511 |
+
CUDNN_PARAM_DZDESC = 35,
|
| 512 |
+
/* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 513 |
+
CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
|
| 514 |
+
/* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 515 |
+
CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
|
| 516 |
+
/* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
| 517 |
+
CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
|
| 518 |
+
} cudnnFusedOpsConstParamLabel_t;
|
| 519 |
+
|
| 520 |
+
typedef enum {
|
| 521 |
+
CUDNN_PTR_NULL = 0,
|
| 522 |
+
CUDNN_PTR_ELEM_ALIGNED = 1,
|
| 523 |
+
CUDNN_PTR_16B_ALIGNED = 2,
|
| 524 |
+
} cudnnFusedOpsPointerPlaceHolder_t;
|
| 525 |
+
|
| 526 |
+
typedef enum {
|
| 527 |
+
/* set: pass void* pointing to dev memory */
|
| 528 |
+
/* get: pass void** pointing to host memory */
|
| 529 |
+
CUDNN_PTR_XDATA = 0,
|
| 530 |
+
CUDNN_PTR_BN_EQSCALE = 1,
|
| 531 |
+
CUDNN_PTR_BN_EQBIAS = 2,
|
| 532 |
+
CUDNN_PTR_WDATA = 3,
|
| 533 |
+
CUDNN_PTR_DWDATA = 4,
|
| 534 |
+
CUDNN_PTR_YDATA = 5,
|
| 535 |
+
CUDNN_PTR_DYDATA = 6,
|
| 536 |
+
CUDNN_PTR_YSUM = 7,
|
| 537 |
+
CUDNN_PTR_YSQSUM = 8,
|
| 538 |
+
CUDNN_PTR_WORKSPACE = 9,
|
| 539 |
+
CUDNN_PTR_BN_SCALE = 10,
|
| 540 |
+
CUDNN_PTR_BN_BIAS = 11,
|
| 541 |
+
CUDNN_PTR_BN_SAVED_MEAN = 12,
|
| 542 |
+
CUDNN_PTR_BN_SAVED_INVSTD = 13,
|
| 543 |
+
CUDNN_PTR_BN_RUNNING_MEAN = 14,
|
| 544 |
+
CUDNN_PTR_BN_RUNNING_VAR = 15,
|
| 545 |
+
CUDNN_PTR_ZDATA = 16,
|
| 546 |
+
CUDNN_PTR_BN_Z_EQSCALE = 17,
|
| 547 |
+
CUDNN_PTR_BN_Z_EQBIAS = 18,
|
| 548 |
+
CUDNN_PTR_ACTIVATION_BITMASK = 19,
|
| 549 |
+
CUDNN_PTR_DXDATA = 20,
|
| 550 |
+
CUDNN_PTR_DZDATA = 21,
|
| 551 |
+
CUDNN_PTR_BN_DSCALE = 22,
|
| 552 |
+
CUDNN_PTR_BN_DBIAS = 23,
|
| 553 |
+
|
| 554 |
+
/* set/get: pass size_t* pointing to host memory */
|
| 555 |
+
CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
|
| 556 |
+
/* set/get: pass int64_t* pointing to host memory */
|
| 557 |
+
CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
|
| 558 |
+
/* set/get: pass double* pointing to host memory */
|
| 559 |
+
CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
|
| 560 |
+
/* set/get: pass double* pointing to host memory */
|
| 561 |
+
CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
|
| 562 |
+
} cudnnFusedOpsVariantParamLabel_t;
|
| 563 |
+
|
| 564 |
+
cudnnStatus_t CUDNNWINAPI
|
| 565 |
+
cudnnCnnInferVersionCheck(void);
|
| 566 |
+
|
| 567 |
+
#if defined(__cplusplus)
|
| 568 |
+
}
|
| 569 |
+
#endif
|
| 570 |
+
|
| 571 |
+
#endif /* CUDNN_CNN_INFER_H_ */
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (221 Bytes). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftw.h
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
/* Copyright 2005-2014 NVIDIA Corporation. All rights reserved.
|
| 3 |
+
*
|
| 4 |
+
* NOTICE TO LICENSEE:
|
| 5 |
+
*
|
| 6 |
+
* The source code and/or documentation ("Licensed Deliverables") are
|
| 7 |
+
* subject to NVIDIA intellectual property rights under U.S. and
|
| 8 |
+
* international Copyright laws.
|
| 9 |
+
*
|
| 10 |
+
* The Licensed Deliverables contained herein are PROPRIETARY and
|
| 11 |
+
* CONFIDENTIAL to NVIDIA and are being provided under the terms and
|
| 12 |
+
* conditions of a form of NVIDIA software license agreement by and
|
| 13 |
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
| 14 |
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
| 15 |
+
* the contrary in the License Agreement, reproduction or disclosure
|
| 16 |
+
* of the Licensed Deliverables to any third party without the express
|
| 17 |
+
* written consent of NVIDIA is prohibited.
|
| 18 |
+
*
|
| 19 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 20 |
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
| 21 |
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
|
| 22 |
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
| 23 |
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
| 24 |
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
| 25 |
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
| 26 |
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
| 27 |
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
| 28 |
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
| 29 |
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
| 30 |
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
| 31 |
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
| 32 |
+
* OF THESE LICENSED DELIVERABLES.
|
| 33 |
+
*
|
| 34 |
+
* U.S. Government End Users. These Licensed Deliverables are a
|
| 35 |
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
| 36 |
+
* 1995), consisting of "commercial computer software" and "commercial
|
| 37 |
+
* computer software documentation" as such terms are used in 48
|
| 38 |
+
* C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
|
| 39 |
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
| 40 |
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
| 41 |
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
| 42 |
+
* only those rights set forth herein.
|
| 43 |
+
*
|
| 44 |
+
* Any use of the Licensed Deliverables in individual and commercial
|
| 45 |
+
* software must include, in the user documentation and internal
|
| 46 |
+
* comments to the code, the above Disclaimer and U.S. Government End
|
| 47 |
+
* Users Notice.
|
| 48 |
+
*/
|
| 49 |
+
|
| 50 |
+
/*!
|
| 51 |
+
* \file cufftw.h
|
| 52 |
+
* \brief Public header file for the NVIDIA CUDA FFTW library (CUFFTW)
|
| 53 |
+
*/
|
| 54 |
+
|
| 55 |
+
#ifndef _CUFFTW_H_
|
| 56 |
+
#define _CUFFTW_H_
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
#include <stdio.h>
|
| 60 |
+
#include "cufft.h"
|
| 61 |
+
|
| 62 |
+
#ifdef __cplusplus
|
| 63 |
+
extern "C" {
|
| 64 |
+
#endif
|
| 65 |
+
|
| 66 |
+
// transform direction
|
| 67 |
+
#define FFTW_FORWARD -1
|
| 68 |
+
#define FFTW_INVERSE 1
|
| 69 |
+
#define FFTW_BACKWARD 1
|
| 70 |
+
|
| 71 |
+
// Planner flags
|
| 72 |
+
|
| 73 |
+
#define FFTW_ESTIMATE 0x01
|
| 74 |
+
#define FFTW_MEASURE 0x02
|
| 75 |
+
#define FFTW_PATIENT 0x03
|
| 76 |
+
#define FFTW_EXHAUSTIVE 0x04
|
| 77 |
+
#define FFTW_WISDOM_ONLY 0x05
|
| 78 |
+
|
| 79 |
+
//Algorithm restriction flags
|
| 80 |
+
|
| 81 |
+
#define FFTW_DESTROY_INPUT 0x08
|
| 82 |
+
#define FFTW_PRESERVE_INPUT 0x0C
|
| 83 |
+
#define FFTW_UNALIGNED 0x10
|
| 84 |
+
|
| 85 |
+
// CUFFTW defines and supports the following data types
|
| 86 |
+
|
| 87 |
+
// note if complex.h has been included we use the C99 complex types
|
| 88 |
+
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined (complex)
|
| 89 |
+
typedef double _Complex fftw_complex;
|
| 90 |
+
typedef float _Complex fftwf_complex;
|
| 91 |
+
#else
|
| 92 |
+
typedef double fftw_complex[2];
|
| 93 |
+
typedef float fftwf_complex[2];
|
| 94 |
+
#endif
|
| 95 |
+
|
| 96 |
+
typedef void *fftw_plan;
|
| 97 |
+
|
| 98 |
+
typedef void *fftwf_plan;
|
| 99 |
+
|
| 100 |
+
typedef struct {
|
| 101 |
+
int n;
|
| 102 |
+
int is;
|
| 103 |
+
int os;
|
| 104 |
+
} fftw_iodim;
|
| 105 |
+
|
| 106 |
+
typedef fftw_iodim fftwf_iodim;
|
| 107 |
+
|
| 108 |
+
typedef struct {
|
| 109 |
+
ptrdiff_t n;
|
| 110 |
+
ptrdiff_t is;
|
| 111 |
+
ptrdiff_t os;
|
| 112 |
+
} fftw_iodim64;
|
| 113 |
+
|
| 114 |
+
typedef fftw_iodim64 fftwf_iodim64;
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
// CUFFTW defines and supports the following double precision APIs
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
fftw_plan CUFFTAPI fftw_plan_dft_1d(int n,
|
| 121 |
+
fftw_complex *in,
|
| 122 |
+
fftw_complex *out,
|
| 123 |
+
int sign,
|
| 124 |
+
unsigned flags);
|
| 125 |
+
|
| 126 |
+
fftw_plan CUFFTAPI fftw_plan_dft_2d(int n0,
|
| 127 |
+
int n1,
|
| 128 |
+
fftw_complex *in,
|
| 129 |
+
fftw_complex *out,
|
| 130 |
+
int sign,
|
| 131 |
+
unsigned flags);
|
| 132 |
+
|
| 133 |
+
fftw_plan CUFFTAPI fftw_plan_dft_3d(int n0,
|
| 134 |
+
int n1,
|
| 135 |
+
int n2,
|
| 136 |
+
fftw_complex *in,
|
| 137 |
+
fftw_complex *out,
|
| 138 |
+
int sign,
|
| 139 |
+
unsigned flags);
|
| 140 |
+
|
| 141 |
+
fftw_plan CUFFTAPI fftw_plan_dft(int rank,
|
| 142 |
+
const int *n,
|
| 143 |
+
fftw_complex *in,
|
| 144 |
+
fftw_complex *out,
|
| 145 |
+
int sign,
|
| 146 |
+
unsigned flags);
|
| 147 |
+
|
| 148 |
+
fftw_plan CUFFTAPI fftw_plan_dft_r2c_1d(int n,
|
| 149 |
+
double *in,
|
| 150 |
+
fftw_complex *out,
|
| 151 |
+
unsigned flags);
|
| 152 |
+
|
| 153 |
+
fftw_plan CUFFTAPI fftw_plan_dft_r2c_2d(int n0,
|
| 154 |
+
int n1,
|
| 155 |
+
double *in,
|
| 156 |
+
fftw_complex *out,
|
| 157 |
+
unsigned flags);
|
| 158 |
+
|
| 159 |
+
fftw_plan CUFFTAPI fftw_plan_dft_r2c_3d(int n0,
|
| 160 |
+
int n1,
|
| 161 |
+
int n2,
|
| 162 |
+
double *in,
|
| 163 |
+
fftw_complex *out,
|
| 164 |
+
unsigned flags);
|
| 165 |
+
|
| 166 |
+
fftw_plan CUFFTAPI fftw_plan_dft_r2c(int rank,
|
| 167 |
+
const int *n,
|
| 168 |
+
double *in,
|
| 169 |
+
fftw_complex *out,
|
| 170 |
+
unsigned flags);
|
| 171 |
+
|
| 172 |
+
fftw_plan CUFFTAPI fftw_plan_dft_c2r_1d(int n,
|
| 173 |
+
fftw_complex *in,
|
| 174 |
+
double *out,
|
| 175 |
+
unsigned flags);
|
| 176 |
+
|
| 177 |
+
fftw_plan CUFFTAPI fftw_plan_dft_c2r_2d(int n0,
|
| 178 |
+
int n1,
|
| 179 |
+
fftw_complex *in,
|
| 180 |
+
double *out,
|
| 181 |
+
unsigned flags);
|
| 182 |
+
|
| 183 |
+
fftw_plan CUFFTAPI fftw_plan_dft_c2r_3d(int n0,
|
| 184 |
+
int n1,
|
| 185 |
+
int n2,
|
| 186 |
+
fftw_complex *in,
|
| 187 |
+
double *out,
|
| 188 |
+
unsigned flags);
|
| 189 |
+
|
| 190 |
+
fftw_plan CUFFTAPI fftw_plan_dft_c2r(int rank,
|
| 191 |
+
const int *n,
|
| 192 |
+
fftw_complex *in,
|
| 193 |
+
double *out,
|
| 194 |
+
unsigned flags);
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
fftw_plan CUFFTAPI fftw_plan_many_dft(int rank,
|
| 198 |
+
const int *n,
|
| 199 |
+
int batch,
|
| 200 |
+
fftw_complex *in,
|
| 201 |
+
const int *inembed, int istride, int idist,
|
| 202 |
+
fftw_complex *out,
|
| 203 |
+
const int *onembed, int ostride, int odist,
|
| 204 |
+
int sign, unsigned flags);
|
| 205 |
+
|
| 206 |
+
fftw_plan CUFFTAPI fftw_plan_many_dft_r2c(int rank,
|
| 207 |
+
const int *n,
|
| 208 |
+
int batch,
|
| 209 |
+
double *in,
|
| 210 |
+
const int *inembed, int istride, int idist,
|
| 211 |
+
fftw_complex *out,
|
| 212 |
+
const int *onembed, int ostride, int odist,
|
| 213 |
+
unsigned flags);
|
| 214 |
+
|
| 215 |
+
fftw_plan CUFFTAPI fftw_plan_many_dft_c2r(int rank,
|
| 216 |
+
const int *n,
|
| 217 |
+
int batch,
|
| 218 |
+
fftw_complex *in,
|
| 219 |
+
const int *inembed, int istride, int idist,
|
| 220 |
+
double *out,
|
| 221 |
+
const int *onembed, int ostride, int odist,
|
| 222 |
+
unsigned flags);
|
| 223 |
+
|
| 224 |
+
fftw_plan CUFFTAPI fftw_plan_guru_dft(int rank, const fftw_iodim *dims,
|
| 225 |
+
int batch_rank, const fftw_iodim *batch_dims,
|
| 226 |
+
fftw_complex *in, fftw_complex *out,
|
| 227 |
+
int sign, unsigned flags);
|
| 228 |
+
|
| 229 |
+
fftw_plan CUFFTAPI fftw_plan_guru_dft_r2c(int rank, const fftw_iodim *dims,
|
| 230 |
+
int batch_rank, const fftw_iodim *batch_dims,
|
| 231 |
+
double *in, fftw_complex *out,
|
| 232 |
+
unsigned flags);
|
| 233 |
+
|
| 234 |
+
fftw_plan CUFFTAPI fftw_plan_guru_dft_c2r(int rank, const fftw_iodim *dims,
|
| 235 |
+
int batch_rank, const fftw_iodim *batch_dims,
|
| 236 |
+
fftw_complex *in, double *out,
|
| 237 |
+
unsigned flags);
|
| 238 |
+
|
| 239 |
+
void CUFFTAPI fftw_execute(const fftw_plan plan);
|
| 240 |
+
|
| 241 |
+
void CUFFTAPI fftw_execute_dft(const fftw_plan plan,
|
| 242 |
+
fftw_complex *idata,
|
| 243 |
+
fftw_complex *odata);
|
| 244 |
+
|
| 245 |
+
void CUFFTAPI fftw_execute_dft_r2c(const fftw_plan plan,
|
| 246 |
+
double *idata,
|
| 247 |
+
fftw_complex *odata);
|
| 248 |
+
|
| 249 |
+
void CUFFTAPI fftw_execute_dft_c2r(const fftw_plan plan,
|
| 250 |
+
fftw_complex *idata,
|
| 251 |
+
double *odata);
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
// CUFFTW defines and supports the following single precision APIs
|
| 255 |
+
|
| 256 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_1d(int n,
|
| 257 |
+
fftwf_complex *in,
|
| 258 |
+
fftwf_complex *out,
|
| 259 |
+
int sign,
|
| 260 |
+
unsigned flags);
|
| 261 |
+
|
| 262 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_2d(int n0,
|
| 263 |
+
int n1,
|
| 264 |
+
fftwf_complex *in,
|
| 265 |
+
fftwf_complex *out,
|
| 266 |
+
int sign,
|
| 267 |
+
unsigned flags);
|
| 268 |
+
|
| 269 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_3d(int n0,
|
| 270 |
+
int n1,
|
| 271 |
+
int n2,
|
| 272 |
+
fftwf_complex *in,
|
| 273 |
+
fftwf_complex *out,
|
| 274 |
+
int sign,
|
| 275 |
+
unsigned flags);
|
| 276 |
+
|
| 277 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft(int rank,
|
| 278 |
+
const int *n,
|
| 279 |
+
fftwf_complex *in,
|
| 280 |
+
fftwf_complex *out,
|
| 281 |
+
int sign,
|
| 282 |
+
unsigned flags);
|
| 283 |
+
|
| 284 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_1d(int n,
|
| 285 |
+
float *in,
|
| 286 |
+
fftwf_complex *out,
|
| 287 |
+
unsigned flags);
|
| 288 |
+
|
| 289 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_2d(int n0,
|
| 290 |
+
int n1,
|
| 291 |
+
float *in,
|
| 292 |
+
fftwf_complex *out,
|
| 293 |
+
unsigned flags);
|
| 294 |
+
|
| 295 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_3d(int n0,
|
| 296 |
+
int n1,
|
| 297 |
+
int n2,
|
| 298 |
+
float *in,
|
| 299 |
+
fftwf_complex *out,
|
| 300 |
+
unsigned flags);
|
| 301 |
+
|
| 302 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c(int rank,
|
| 303 |
+
const int *n,
|
| 304 |
+
float *in,
|
| 305 |
+
fftwf_complex *out,
|
| 306 |
+
unsigned flags);
|
| 307 |
+
|
| 308 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_1d(int n,
|
| 309 |
+
fftwf_complex *in,
|
| 310 |
+
float *out,
|
| 311 |
+
unsigned flags);
|
| 312 |
+
|
| 313 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_2d(int n0,
|
| 314 |
+
int n1,
|
| 315 |
+
fftwf_complex *in,
|
| 316 |
+
float *out,
|
| 317 |
+
unsigned flags);
|
| 318 |
+
|
| 319 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_3d(int n0,
|
| 320 |
+
int n1,
|
| 321 |
+
int n2,
|
| 322 |
+
fftwf_complex *in,
|
| 323 |
+
float *out,
|
| 324 |
+
unsigned flags);
|
| 325 |
+
|
| 326 |
+
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r(int rank,
|
| 327 |
+
const int *n,
|
| 328 |
+
fftwf_complex *in,
|
| 329 |
+
float *out,
|
| 330 |
+
unsigned flags);
|
| 331 |
+
|
| 332 |
+
fftwf_plan CUFFTAPI fftwf_plan_many_dft(int rank,
|
| 333 |
+
const int *n,
|
| 334 |
+
int batch,
|
| 335 |
+
fftwf_complex *in,
|
| 336 |
+
const int *inembed, int istride, int idist,
|
| 337 |
+
fftwf_complex *out,
|
| 338 |
+
const int *onembed, int ostride, int odist,
|
| 339 |
+
int sign, unsigned flags);
|
| 340 |
+
|
| 341 |
+
fftwf_plan CUFFTAPI fftwf_plan_many_dft_r2c(int rank,
|
| 342 |
+
const int *n,
|
| 343 |
+
int batch,
|
| 344 |
+
float *in,
|
| 345 |
+
const int *inembed, int istride, int idist,
|
| 346 |
+
fftwf_complex *out,
|
| 347 |
+
const int *onembed, int ostride, int odist,
|
| 348 |
+
unsigned flags);
|
| 349 |
+
|
| 350 |
+
fftwf_plan CUFFTAPI fftwf_plan_many_dft_c2r(int rank,
|
| 351 |
+
const int *n,
|
| 352 |
+
int batch,
|
| 353 |
+
fftwf_complex *in,
|
| 354 |
+
const int *inembed, int istride, int idist,
|
| 355 |
+
float *out,
|
| 356 |
+
const int *onembed, int ostride, int odist,
|
| 357 |
+
unsigned flags);
|
| 358 |
+
|
| 359 |
+
fftwf_plan CUFFTAPI fftwf_plan_guru_dft(int rank, const fftwf_iodim *dims,
|
| 360 |
+
int batch_rank, const fftwf_iodim *batch_dims,
|
| 361 |
+
fftwf_complex *in, fftwf_complex *out,
|
| 362 |
+
int sign, unsigned flags);
|
| 363 |
+
|
| 364 |
+
fftwf_plan CUFFTAPI fftwf_plan_guru_dft_r2c(int rank, const fftwf_iodim *dims,
|
| 365 |
+
int batch_rank, const fftwf_iodim *batch_dims,
|
| 366 |
+
float *in, fftwf_complex *out,
|
| 367 |
+
unsigned flags);
|
| 368 |
+
|
| 369 |
+
fftwf_plan CUFFTAPI fftwf_plan_guru_dft_c2r(int rank, const fftwf_iodim *dims,
|
| 370 |
+
int batch_rank, const fftwf_iodim *batch_dims,
|
| 371 |
+
fftwf_complex *in, float *out,
|
| 372 |
+
unsigned flags);
|
| 373 |
+
|
| 374 |
+
void CUFFTAPI fftwf_execute(const fftw_plan plan);
|
| 375 |
+
|
| 376 |
+
void CUFFTAPI fftwf_execute_dft(const fftwf_plan plan,
|
| 377 |
+
fftwf_complex *idata,
|
| 378 |
+
fftwf_complex *odata);
|
| 379 |
+
|
| 380 |
+
void CUFFTAPI fftwf_execute_dft_r2c(const fftwf_plan plan,
|
| 381 |
+
float *idata,
|
| 382 |
+
fftwf_complex *odata);
|
| 383 |
+
|
| 384 |
+
void CUFFTAPI fftwf_execute_dft_c2r(const fftwf_plan plan,
|
| 385 |
+
fftwf_complex *idata,
|
| 386 |
+
float *odata);
|
| 387 |
+
|
| 388 |
+
/// CUFFTW 64-bit Guru Interface
|
| 389 |
+
/// dp
|
| 390 |
+
fftw_plan CUFFTAPI fftw_plan_guru64_dft(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, fftw_complex* out, int sign, unsigned flags);
|
| 391 |
+
|
| 392 |
+
fftw_plan CUFFTAPI fftw_plan_guru64_dft_r2c(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, double* in, fftw_complex* out, unsigned flags);
|
| 393 |
+
|
| 394 |
+
fftw_plan CUFFTAPI fftw_plan_guru64_dft_c2r(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, double* out, unsigned flags);
|
| 395 |
+
|
| 396 |
+
/// sp
|
| 397 |
+
fftwf_plan CUFFTAPI fftwf_plan_guru64_dft(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, fftwf_complex* out, int sign, unsigned flags);
|
| 398 |
+
|
| 399 |
+
fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_r2c(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, float* in, fftwf_complex* out, unsigned flags);
|
| 400 |
+
|
| 401 |
+
fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_c2r(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, float* out, unsigned flags);
|
| 402 |
+
|
| 403 |
+
#ifdef _WIN32
|
| 404 |
+
#define _CUFFTAPI(T) T CUFFTAPI
|
| 405 |
+
#else
|
| 406 |
+
#define _CUFFTAPI(T) CUFFTAPI T
|
| 407 |
+
#endif
|
| 408 |
+
|
| 409 |
+
// CUFFTW defines and supports the following support APIs
|
| 410 |
+
_CUFFTAPI(void *) fftw_malloc(size_t n);
|
| 411 |
+
|
| 412 |
+
_CUFFTAPI(void *) fftwf_malloc(size_t n);
|
| 413 |
+
|
| 414 |
+
void CUFFTAPI fftw_free(void *pointer);
|
| 415 |
+
|
| 416 |
+
void CUFFTAPI fftwf_free(void *pointer);
|
| 417 |
+
|
| 418 |
+
void CUFFTAPI fftw_export_wisdom_to_file(FILE * output_file);
|
| 419 |
+
|
| 420 |
+
void CUFFTAPI fftwf_export_wisdom_to_file(FILE * output_file);
|
| 421 |
+
|
| 422 |
+
void CUFFTAPI fftw_import_wisdom_from_file(FILE * input_file);
|
| 423 |
+
|
| 424 |
+
void CUFFTAPI fftwf_import_wisdom_from_file(FILE * input_file);
|
| 425 |
+
|
| 426 |
+
void CUFFTAPI fftw_print_plan(const fftw_plan plan);
|
| 427 |
+
|
| 428 |
+
void CUFFTAPI fftwf_print_plan(const fftwf_plan plan);
|
| 429 |
+
|
| 430 |
+
void CUFFTAPI fftw_set_timelimit(double seconds);
|
| 431 |
+
|
| 432 |
+
void CUFFTAPI fftwf_set_timelimit(double seconds);
|
| 433 |
+
|
| 434 |
+
double CUFFTAPI fftw_cost(const fftw_plan plan);
|
| 435 |
+
|
| 436 |
+
double CUFFTAPI fftwf_cost(const fftw_plan plan);
|
| 437 |
+
|
| 438 |
+
void CUFFTAPI fftw_flops(const fftw_plan plan, double *add, double *mul, double *fma);
|
| 439 |
+
|
| 440 |
+
void CUFFTAPI fftwf_flops(const fftw_plan plan, double *add, double *mul, double *fma);
|
| 441 |
+
|
| 442 |
+
void CUFFTAPI fftw_destroy_plan(fftw_plan plan);
|
| 443 |
+
|
| 444 |
+
void CUFFTAPI fftwf_destroy_plan(fftwf_plan plan);
|
| 445 |
+
|
| 446 |
+
void CUFFTAPI fftw_cleanup(void);
|
| 447 |
+
|
| 448 |
+
void CUFFTAPI fftwf_cleanup(void);
|
| 449 |
+
|
| 450 |
+
#ifdef __cplusplus
|
| 451 |
+
}
|
| 452 |
+
#endif
|
| 453 |
+
|
| 454 |
+
#endif /* _CUFFTW_H_ */
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (217 Bytes). View file
|
|
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e0c01fe1a0e5738b15b6952c63eebb273a28c12beefd13f01594da265a1b156
|
| 3 |
+
size 101565
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/LICENSE
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
| 2 |
+
|
| 3 |
+
Redistribution and use in source and binary forms, with or without
|
| 4 |
+
modification, are permitted provided that the following conditions are met:
|
| 5 |
+
|
| 6 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
| 7 |
+
list of conditions and the following disclaimer.
|
| 8 |
+
|
| 9 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
| 10 |
+
this list of conditions and the following disclaimer in the documentation
|
| 11 |
+
and/or other materials provided with the distribution.
|
| 12 |
+
|
| 13 |
+
3. Neither the name of the copyright holder nor the names of its contributors
|
| 14 |
+
may be used to endorse or promote products derived from this software
|
| 15 |
+
without specific prior written permission.
|
| 16 |
+
|
| 17 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
| 18 |
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| 19 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 20 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
| 21 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
| 22 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 23 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
| 24 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
| 25 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 26 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 27 |
+
|
| 28 |
+
Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
|
| 29 |
+
external contributions to this project including patches, pull requests, etc.
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/RECORD
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
../../../bin/pybind11-config,sha256=KwKhJwrv86OeAvCUq7sBopc-kDZzCJdnh_4RZIF8T-c,265
|
| 2 |
+
pybind11-2.13.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 3 |
+
pybind11-2.13.6.dist-info/LICENSE,sha256=g5ZbhDuY9nDTqFvQQe1LNyyOxQ17SlmVqDrGl7pnXcs,1684
|
| 4 |
+
pybind11-2.13.6.dist-info/METADATA,sha256=Gg_aZ0f3aFFDF3bQvgzR9kwVT_jogjVEc74kDVldlq0,9513
|
| 5 |
+
pybind11-2.13.6.dist-info/RECORD,,
|
| 6 |
+
pybind11-2.13.6.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 7 |
+
pybind11-2.13.6.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
| 8 |
+
pybind11-2.13.6.dist-info/entry_points.txt,sha256=Q_kAwEJBDz8wHD0V50hY3AvchDk3Pfyeox2YHrAcWZ0,105
|
| 9 |
+
pybind11-2.13.6.dist-info/top_level.txt,sha256=d1mqwSpUlmlZhXDQ9Y57eNlXc3dVDM1toKmfC1kJbvU,9
|
| 10 |
+
pybind11/__init__.py,sha256=9vt06pvuwvdKW0YwYQKOTxBEgmQ0kb5ZUOJrgtGhdKs,459
|
| 11 |
+
pybind11/__main__.py,sha256=p8vZ4btnkb_TaF03R1ac7qHmp-Eut86gCSUcVP8F3i4,2526
|
| 12 |
+
pybind11/__pycache__/__init__.cpython-311.pyc,,
|
| 13 |
+
pybind11/__pycache__/__main__.cpython-311.pyc,,
|
| 14 |
+
pybind11/__pycache__/_version.cpython-311.pyc,,
|
| 15 |
+
pybind11/__pycache__/commands.cpython-311.pyc,,
|
| 16 |
+
pybind11/__pycache__/setup_helpers.cpython-311.pyc,,
|
| 17 |
+
pybind11/_version.py,sha256=XUUceDIbc3kdRixyEVMy5v0LcGF36QUxMG9rJHlT6P4,232
|
| 18 |
+
pybind11/commands.py,sha256=V43hKb7VE_abYZvaO-TpJLOU65n6W3ZrdYHGF3G3qUs,1243
|
| 19 |
+
pybind11/include/pybind11/attr.h,sha256=QPjH7BfhL8QFwHHkrDak8gNOLMlb1itAO5fobjdoLp8,24334
|
| 20 |
+
pybind11/include/pybind11/buffer_info.h,sha256=_FcQisqdpphfWXKeCGNv3Gq5ivy1z-qF3d1Noeteaok,7778
|
| 21 |
+
pybind11/include/pybind11/cast.h,sha256=8gJ4Y4nc83dyq12CuU7ircAvAV1HoEZEVr0UyfeLQNA,71696
|
| 22 |
+
pybind11/include/pybind11/chrono.h,sha256=A23naeloqn-1NKVAABOsJtHU9Vz8lfvrAICuLk-7qBM,8458
|
| 23 |
+
pybind11/include/pybind11/common.h,sha256=ATg9Bt1pwF8qnNuI086fprM4CUTdrZdk_g2HXE1Sf6A,120
|
| 24 |
+
pybind11/include/pybind11/complex.h,sha256=AaDZ-rEmK4tFaue-K9P5y3TxxnaQF6JwZ_6LAzkdLQI,2096
|
| 25 |
+
pybind11/include/pybind11/detail/class.h,sha256=Bjk3K6xAMgwxPNTKfik7SC5Y24wgKs8Oz5VjvFdy0kA,29026
|
| 26 |
+
pybind11/include/pybind11/detail/common.h,sha256=uxFMVYKW87YPbUz8Mo70xoVrpK2D1NzhKSwlDpwrJxo,54708
|
| 27 |
+
pybind11/include/pybind11/detail/cpp_conduit.h,sha256=Bbx5728XzvyCL2gfW7kG6vgDltS5-V5gtkNQFPFevXg,2589
|
| 28 |
+
pybind11/include/pybind11/detail/descr.h,sha256=D63pIHsF3luO_g51CjbJU8Wl9VOihciEXQhXvfRg-Rk,6035
|
| 29 |
+
pybind11/include/pybind11/detail/exception_translation.h,sha256=fM1J19z00AuDlozHt0srpCJr-1uWW4kj_fLdSJDbdY8,2600
|
| 30 |
+
pybind11/include/pybind11/detail/init.h,sha256=Sb1UkPecC5l9xj5naYLdUM7qIRLVpe614H9Frvyg8xg,17983
|
| 31 |
+
pybind11/include/pybind11/detail/internals.h,sha256=xs-I7JdJACxx7gJf12HBLjL007jRXcAffPDsd0oTrq4,31985
|
| 32 |
+
pybind11/include/pybind11/detail/type_caster_base.h,sha256=mdgZ-FIkxdSShMPPe69EXxjvd1eQDDBVX835B7XqCNo,48938
|
| 33 |
+
pybind11/include/pybind11/detail/typeid.h,sha256=jw5pr9m72vkDsloT8vxl9wj17VJGcEdXDyziBlt89Js,1625
|
| 34 |
+
pybind11/include/pybind11/detail/value_and_holder.h,sha256=hwNYlqxjUhlUqihwMjr6s3LhhKlZiTLaWREtQrgOAkQ,2814
|
| 35 |
+
pybind11/include/pybind11/eigen.h,sha256=-HmSA1kgwCQ-GHUt7PHtTEc-vxqw9xARpF8PHWJip28,316
|
| 36 |
+
pybind11/include/pybind11/eigen/common.h,sha256=dIeqmK7IzW5K4k2larPnA1A863rDp38U9YbNIwiIyYk,378
|
| 37 |
+
pybind11/include/pybind11/eigen/matrix.h,sha256=VjCfx8M2AcD3m8THUbIEYidJyIClaNw9jMbd_Fzfo1s,32142
|
| 38 |
+
pybind11/include/pybind11/eigen/tensor.h,sha256=csE3_N9yy-9k0SWQPJuAxmv8Jp_-lFrrPdVOyMV8-gc,18384
|
| 39 |
+
pybind11/include/pybind11/embed.h,sha256=F3JQiOWnLGSuZ0NuEyBWFhHyVdczD8D_67kriU4QfsY,13362
|
| 40 |
+
pybind11/include/pybind11/eval.h,sha256=7re-O2Eor1yD0Q_KgFkHIjKD17ejzII687Yszl9_KfE,4731
|
| 41 |
+
pybind11/include/pybind11/functional.h,sha256=iOyYuNmbI-K3zgc1IMDwe4iHEOO3F8vwZbVSvbgxFQ4,5267
|
| 42 |
+
pybind11/include/pybind11/gil.h,sha256=hsJj6z1iXqlo5c7fPCgEvK_-eeDoKZm7PKPwPNCdVVo,7702
|
| 43 |
+
pybind11/include/pybind11/gil_safe_call_once.h,sha256=KKcy9Wgc_MJY-U5WpCZeNyzW7oVmC-d6yXkgephZ7zs,3993
|
| 44 |
+
pybind11/include/pybind11/iostream.h,sha256=K5rPXoCYN325r1PptcJCIhPhgtRtTJQjMr7bvUIOwxk,8862
|
| 45 |
+
pybind11/include/pybind11/numpy.h,sha256=xREhfycUTCOPF8CF-UWRdoLX0B23V6YWRiBqeRRElZg,84442
|
| 46 |
+
pybind11/include/pybind11/operators.h,sha256=224RoAXcv1la4NNY9rQ3aD_AeC8S9ZKx3HVK1O8B4MU,9103
|
| 47 |
+
pybind11/include/pybind11/options.h,sha256=qXvmnj--9fZSp56NYefnB3W5V17ppHlY1Srgo3DNBpw,2734
|
| 48 |
+
pybind11/include/pybind11/pybind11.h,sha256=hbzXHRCBIW7dwtwaKjXKPC0Nl1MGHZ5-BjGsMlE3LuU,129898
|
| 49 |
+
pybind11/include/pybind11/pytypes.h,sha256=BF8x4S5fsAzWf-d9pu83UsqjwRRo0ragHPy9sDOpUvk,99894
|
| 50 |
+
pybind11/include/pybind11/stl.h,sha256=aMi1OCCw2Zb-IRLSlAtQEJJHtWsRJiLT9dKDMHST1Ic,15532
|
| 51 |
+
pybind11/include/pybind11/stl/filesystem.h,sha256=lcYRCwNA8Xf4e4FRbeYh36SAwQjxKgyTXXdrguR4gM4,4559
|
| 52 |
+
pybind11/include/pybind11/stl_bind.h,sha256=B5t8E0A4Zdgm2sF0J8Q_UI2U5uqEBQ9TsJCelsJ4q0E,28495
|
| 53 |
+
pybind11/include/pybind11/type_caster_pyobject_ptr.h,sha256=H7pKBYTvUlibiJQEcKmeAkygSQwoCkuIyukNSDmVq-U,1929
|
| 54 |
+
pybind11/include/pybind11/typing.h,sha256=PIjZFNNzY_KsrkHQPlg0Vt24jlTi6kThdOldEJjchtY,7000
|
| 55 |
+
pybind11/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 56 |
+
pybind11/setup_helpers.py,sha256=AwD_CjfVzX653nW4_i0U4bkFMCG4ZILoMZixyL8CZ4o,17490
|
| 57 |
+
pybind11/share/cmake/pybind11/FindPythonLibsNew.cmake,sha256=_ZVzgVp6GQSEEv-b2iuauqTgoi1k2jHiNJlpl25MN-4,12187
|
| 58 |
+
pybind11/share/cmake/pybind11/pybind11Common.cmake,sha256=lvJJ518cN7SjKDgjpXw0XU0eKW358wEloIcKCyCNPB0,16164
|
| 59 |
+
pybind11/share/cmake/pybind11/pybind11Config.cmake,sha256=I96KX_zIZvLHbedHknVBj2YKhMt_QjM5LhCbzVNTvD8,7959
|
| 60 |
+
pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake,sha256=vDsLSBg7-Nop8Ar9wRe0xKgGUV4LRzWE4XE0kE5B6fE,1403
|
| 61 |
+
pybind11/share/cmake/pybind11/pybind11GuessPythonExtSuffix.cmake,sha256=WvhK2E-vWi9ArY0WJZXEK4kEFHpDQjl-au963hqH0r0,3321
|
| 62 |
+
pybind11/share/cmake/pybind11/pybind11NewTools.cmake,sha256=zGLNjL28gzi8tvwiabudLsye7id_sZI5ooYfiBBllvM,12169
|
| 63 |
+
pybind11/share/cmake/pybind11/pybind11Targets.cmake,sha256=tIjPtIpfb5m9POtu484cjGgNyWc5E4bbKzESLrcOLA0,4271
|
| 64 |
+
pybind11/share/cmake/pybind11/pybind11Tools.cmake,sha256=5K6EahoS7wIaQIhjrDS4p4jTpYr0b_MronXKee8zCAc,8565
|
| 65 |
+
pybind11/share/pkgconfig/pybind11.pc,sha256=M17R2NbpW6o7ujxioMP5M6WgVGrmJ_1vu_-E-H_rbes,171
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/REQUESTED
ADDED
|
File without changes
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/entry_points.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
pybind11-config = pybind11.__main__:main
|
| 3 |
+
|
| 4 |
+
[pipx.run]
|
| 5 |
+
pybind11 = pybind11.__main__:main
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/detail/cpp_conduit.h
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Copyright (c) 2024 The pybind Community.
|
| 2 |
+
|
| 3 |
+
#pragma once
|
| 4 |
+
|
| 5 |
+
#include <pybind11/pytypes.h>
|
| 6 |
+
|
| 7 |
+
#include "common.h"
|
| 8 |
+
#include "internals.h"
|
| 9 |
+
|
| 10 |
+
#include <typeinfo>
|
| 11 |
+
|
| 12 |
+
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
| 13 |
+
PYBIND11_NAMESPACE_BEGIN(detail)
|
| 14 |
+
|
| 15 |
+
// Forward declaration needed here: Refactoring opportunity.
|
| 16 |
+
extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *);
|
| 17 |
+
|
| 18 |
+
inline bool type_is_managed_by_our_internals(PyTypeObject *type_obj) {
|
| 19 |
+
#if defined(PYPY_VERSION)
|
| 20 |
+
auto &internals = get_internals();
|
| 21 |
+
return bool(internals.registered_types_py.find(type_obj)
|
| 22 |
+
!= internals.registered_types_py.end());
|
| 23 |
+
#else
|
| 24 |
+
return bool(type_obj->tp_new == pybind11_object_new);
|
| 25 |
+
#endif
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
inline bool is_instance_method_of_type(PyTypeObject *type_obj, PyObject *attr_name) {
|
| 29 |
+
PyObject *descr = _PyType_Lookup(type_obj, attr_name);
|
| 30 |
+
return bool((descr != nullptr) && PyInstanceMethod_Check(descr));
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
inline object try_get_cpp_conduit_method(PyObject *obj) {
|
| 34 |
+
if (PyType_Check(obj)) {
|
| 35 |
+
return object();
|
| 36 |
+
}
|
| 37 |
+
PyTypeObject *type_obj = Py_TYPE(obj);
|
| 38 |
+
str attr_name("_pybind11_conduit_v1_");
|
| 39 |
+
bool assumed_to_be_callable = false;
|
| 40 |
+
if (type_is_managed_by_our_internals(type_obj)) {
|
| 41 |
+
if (!is_instance_method_of_type(type_obj, attr_name.ptr())) {
|
| 42 |
+
return object();
|
| 43 |
+
}
|
| 44 |
+
assumed_to_be_callable = true;
|
| 45 |
+
}
|
| 46 |
+
PyObject *method = PyObject_GetAttr(obj, attr_name.ptr());
|
| 47 |
+
if (method == nullptr) {
|
| 48 |
+
PyErr_Clear();
|
| 49 |
+
return object();
|
| 50 |
+
}
|
| 51 |
+
if (!assumed_to_be_callable && PyCallable_Check(method) == 0) {
|
| 52 |
+
Py_DECREF(method);
|
| 53 |
+
return object();
|
| 54 |
+
}
|
| 55 |
+
return reinterpret_steal<object>(method);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
inline void *try_raw_pointer_ephemeral_from_cpp_conduit(handle src,
|
| 59 |
+
const std::type_info *cpp_type_info) {
|
| 60 |
+
object method = try_get_cpp_conduit_method(src.ptr());
|
| 61 |
+
if (method) {
|
| 62 |
+
capsule cpp_type_info_capsule(const_cast<void *>(static_cast<const void *>(cpp_type_info)),
|
| 63 |
+
typeid(std::type_info).name());
|
| 64 |
+
object cpp_conduit = method(bytes(PYBIND11_PLATFORM_ABI_ID),
|
| 65 |
+
cpp_type_info_capsule,
|
| 66 |
+
bytes("raw_pointer_ephemeral"));
|
| 67 |
+
if (isinstance<capsule>(cpp_conduit)) {
|
| 68 |
+
return reinterpret_borrow<capsule>(cpp_conduit).get_pointer();
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
return nullptr;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
#define PYBIND11_HAS_CPP_CONDUIT 1
|
| 75 |
+
|
| 76 |
+
PYBIND11_NAMESPACE_END(detail)
|
| 77 |
+
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/gil.h
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
pybind11/gil.h: RAII helpers for managing the GIL
|
| 3 |
+
|
| 4 |
+
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
|
| 5 |
+
|
| 6 |
+
All rights reserved. Use of this source code is governed by a
|
| 7 |
+
BSD-style license that can be found in the LICENSE file.
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
#pragma once
|
| 11 |
+
|
| 12 |
+
#include "detail/common.h"
|
| 13 |
+
|
| 14 |
+
#include <cassert>
|
| 15 |
+
|
| 16 |
+
#if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
|
| 17 |
+
# include "detail/internals.h"
|
| 18 |
+
#endif
|
| 19 |
+
|
| 20 |
+
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
| 21 |
+
|
| 22 |
+
PYBIND11_NAMESPACE_BEGIN(detail)
|
| 23 |
+
|
| 24 |
+
// forward declarations
|
| 25 |
+
PyThreadState *get_thread_state_unchecked();
|
| 26 |
+
|
| 27 |
+
PYBIND11_NAMESPACE_END(detail)
|
| 28 |
+
|
| 29 |
+
#if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
|
| 30 |
+
|
| 31 |
+
/* The functions below essentially reproduce the PyGILState_* API using a RAII
|
| 32 |
+
* pattern, but there are a few important differences:
|
| 33 |
+
*
|
| 34 |
+
* 1. When acquiring the GIL from an non-main thread during the finalization
|
| 35 |
+
* phase, the GILState API blindly terminates the calling thread, which
|
| 36 |
+
* is often not what is wanted. This API does not do this.
|
| 37 |
+
*
|
| 38 |
+
* 2. The gil_scoped_release function can optionally cut the relationship
|
| 39 |
+
* of a PyThreadState and its associated thread, which allows moving it to
|
| 40 |
+
* another thread (this is a fairly rare/advanced use case).
|
| 41 |
+
*
|
| 42 |
+
* 3. The reference count of an acquired thread state can be controlled. This
|
| 43 |
+
* can be handy to prevent cases where callbacks issued from an external
|
| 44 |
+
* thread would otherwise constantly construct and destroy thread state data
|
| 45 |
+
* structures.
|
| 46 |
+
*
|
| 47 |
+
* See the Python bindings of NanoGUI (http://github.com/wjakob/nanogui) for an
|
| 48 |
+
* example which uses features 2 and 3 to migrate the Python thread of
|
| 49 |
+
* execution to another thread (to run the event loop on the original thread,
|
| 50 |
+
* in this case).
|
| 51 |
+
*/
|
| 52 |
+
|
| 53 |
+
class gil_scoped_acquire {
|
| 54 |
+
public:
|
| 55 |
+
PYBIND11_NOINLINE gil_scoped_acquire() {
|
| 56 |
+
auto &internals = detail::get_internals();
|
| 57 |
+
tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);
|
| 58 |
+
|
| 59 |
+
if (!tstate) {
|
| 60 |
+
/* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if
|
| 61 |
+
calling from a Python thread). Since we use a different key, this ensures
|
| 62 |
+
we don't create a new thread state and deadlock in PyEval_AcquireThread
|
| 63 |
+
below. Note we don't save this state with internals.tstate, since we don't
|
| 64 |
+
create it we would fail to clear it (its reference count should be > 0). */
|
| 65 |
+
tstate = PyGILState_GetThisThreadState();
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
if (!tstate) {
|
| 69 |
+
tstate = PyThreadState_New(internals.istate);
|
| 70 |
+
# if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
|
| 71 |
+
if (!tstate) {
|
| 72 |
+
pybind11_fail("scoped_acquire: could not create thread state!");
|
| 73 |
+
}
|
| 74 |
+
# endif
|
| 75 |
+
tstate->gilstate_counter = 0;
|
| 76 |
+
PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate);
|
| 77 |
+
} else {
|
| 78 |
+
release = detail::get_thread_state_unchecked() != tstate;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
if (release) {
|
| 82 |
+
PyEval_AcquireThread(tstate);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
inc_ref();
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
gil_scoped_acquire(const gil_scoped_acquire &) = delete;
|
| 89 |
+
gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
|
| 90 |
+
|
| 91 |
+
void inc_ref() { ++tstate->gilstate_counter; }
|
| 92 |
+
|
| 93 |
+
PYBIND11_NOINLINE void dec_ref() {
|
| 94 |
+
--tstate->gilstate_counter;
|
| 95 |
+
# if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
|
| 96 |
+
if (detail::get_thread_state_unchecked() != tstate) {
|
| 97 |
+
pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!");
|
| 98 |
+
}
|
| 99 |
+
if (tstate->gilstate_counter < 0) {
|
| 100 |
+
pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!");
|
| 101 |
+
}
|
| 102 |
+
# endif
|
| 103 |
+
if (tstate->gilstate_counter == 0) {
|
| 104 |
+
# if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
|
| 105 |
+
if (!release) {
|
| 106 |
+
pybind11_fail("scoped_acquire::dec_ref(): internal error!");
|
| 107 |
+
}
|
| 108 |
+
# endif
|
| 109 |
+
PyThreadState_Clear(tstate);
|
| 110 |
+
if (active) {
|
| 111 |
+
PyThreadState_DeleteCurrent();
|
| 112 |
+
}
|
| 113 |
+
PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
|
| 114 |
+
release = false;
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
/// This method will disable the PyThreadState_DeleteCurrent call and the
|
| 119 |
+
/// GIL won't be acquired. This method should be used if the interpreter
|
| 120 |
+
/// could be shutting down when this is called, as thread deletion is not
|
| 121 |
+
/// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
|
| 122 |
+
/// protect subsequent code.
|
| 123 |
+
PYBIND11_NOINLINE void disarm() { active = false; }
|
| 124 |
+
|
| 125 |
+
PYBIND11_NOINLINE ~gil_scoped_acquire() {
|
| 126 |
+
dec_ref();
|
| 127 |
+
if (release) {
|
| 128 |
+
PyEval_SaveThread();
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
private:
|
| 133 |
+
PyThreadState *tstate = nullptr;
|
| 134 |
+
bool release = true;
|
| 135 |
+
bool active = true;
|
| 136 |
+
};
|
| 137 |
+
|
| 138 |
+
class gil_scoped_release {
|
| 139 |
+
public:
|
| 140 |
+
// PRECONDITION: The GIL must be held when this constructor is called.
|
| 141 |
+
explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) {
|
| 142 |
+
assert(PyGILState_Check());
|
| 143 |
+
// `get_internals()` must be called here unconditionally in order to initialize
|
| 144 |
+
// `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
|
| 145 |
+
// initialization race could occur as multiple threads try `gil_scoped_acquire`.
|
| 146 |
+
auto &internals = detail::get_internals();
|
| 147 |
+
// NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
|
| 148 |
+
tstate = PyEval_SaveThread();
|
| 149 |
+
if (disassoc) {
|
| 150 |
+
// Python >= 3.7 can remove this, it's an int before 3.7
|
| 151 |
+
// NOLINTNEXTLINE(readability-qualified-auto)
|
| 152 |
+
auto key = internals.tstate;
|
| 153 |
+
PYBIND11_TLS_DELETE_VALUE(key);
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
gil_scoped_release(const gil_scoped_release &) = delete;
|
| 158 |
+
gil_scoped_release &operator=(const gil_scoped_release &) = delete;
|
| 159 |
+
|
| 160 |
+
/// This method will disable the PyThreadState_DeleteCurrent call and the
|
| 161 |
+
/// GIL won't be acquired. This method should be used if the interpreter
|
| 162 |
+
/// could be shutting down when this is called, as thread deletion is not
|
| 163 |
+
/// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
|
| 164 |
+
/// protect subsequent code.
|
| 165 |
+
PYBIND11_NOINLINE void disarm() { active = false; }
|
| 166 |
+
|
| 167 |
+
~gil_scoped_release() {
|
| 168 |
+
if (!tstate) {
|
| 169 |
+
return;
|
| 170 |
+
}
|
| 171 |
+
// `PyEval_RestoreThread()` should not be called if runtime is finalizing
|
| 172 |
+
if (active) {
|
| 173 |
+
PyEval_RestoreThread(tstate);
|
| 174 |
+
}
|
| 175 |
+
if (disassoc) {
|
| 176 |
+
// Python >= 3.7 can remove this, it's an int before 3.7
|
| 177 |
+
// NOLINTNEXTLINE(readability-qualified-auto)
|
| 178 |
+
auto key = detail::get_internals().tstate;
|
| 179 |
+
PYBIND11_TLS_REPLACE_VALUE(key, tstate);
|
| 180 |
+
}
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
private:
|
| 184 |
+
PyThreadState *tstate;
|
| 185 |
+
bool disassoc;
|
| 186 |
+
bool active = true;
|
| 187 |
+
};
|
| 188 |
+
|
| 189 |
+
#else // PYBIND11_SIMPLE_GIL_MANAGEMENT
|
| 190 |
+
|
| 191 |
+
class gil_scoped_acquire {
|
| 192 |
+
PyGILState_STATE state;
|
| 193 |
+
|
| 194 |
+
public:
|
| 195 |
+
gil_scoped_acquire() : state{PyGILState_Ensure()} {}
|
| 196 |
+
gil_scoped_acquire(const gil_scoped_acquire &) = delete;
|
| 197 |
+
gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
|
| 198 |
+
~gil_scoped_acquire() { PyGILState_Release(state); }
|
| 199 |
+
void disarm() {}
|
| 200 |
+
};
|
| 201 |
+
|
| 202 |
+
class gil_scoped_release {
|
| 203 |
+
PyThreadState *state;
|
| 204 |
+
|
| 205 |
+
public:
|
| 206 |
+
// PRECONDITION: The GIL must be held when this constructor is called.
|
| 207 |
+
gil_scoped_release() {
|
| 208 |
+
assert(PyGILState_Check());
|
| 209 |
+
state = PyEval_SaveThread();
|
| 210 |
+
}
|
| 211 |
+
gil_scoped_release(const gil_scoped_release &) = delete;
|
| 212 |
+
gil_scoped_release &operator=(const gil_scoped_release &) = delete;
|
| 213 |
+
~gil_scoped_release() { PyEval_RestoreThread(state); }
|
| 214 |
+
void disarm() {}
|
| 215 |
+
};
|
| 216 |
+
|
| 217 |
+
#endif // PYBIND11_SIMPLE_GIL_MANAGEMENT
|
| 218 |
+
|
| 219 |
+
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/iostream.h
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
pybind11/iostream.h -- Tools to assist with redirecting cout and cerr to Python
|
| 3 |
+
|
| 4 |
+
Copyright (c) 2017 Henry F. Schreiner
|
| 5 |
+
|
| 6 |
+
All rights reserved. Use of this source code is governed by a
|
| 7 |
+
BSD-style license that can be found in the LICENSE file.
|
| 8 |
+
|
| 9 |
+
WARNING: The implementation in this file is NOT thread safe. Multiple
|
| 10 |
+
threads writing to a redirected ostream concurrently cause data races
|
| 11 |
+
and potentially buffer overflows. Therefore it is currently a requirement
|
| 12 |
+
that all (possibly) concurrent redirected ostream writes are protected by
|
| 13 |
+
a mutex.
|
| 14 |
+
#HelpAppreciated: Work on iostream.h thread safety.
|
| 15 |
+
For more background see the discussions under
|
| 16 |
+
https://github.com/pybind/pybind11/pull/2982 and
|
| 17 |
+
https://github.com/pybind/pybind11/pull/2995.
|
| 18 |
+
*/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "pybind11.h"
|
| 23 |
+
|
| 24 |
+
#include <algorithm>
|
| 25 |
+
#include <cstring>
|
| 26 |
+
#include <iostream>
|
| 27 |
+
#include <iterator>
|
| 28 |
+
#include <memory>
|
| 29 |
+
#include <ostream>
|
| 30 |
+
#include <streambuf>
|
| 31 |
+
#include <string>
|
| 32 |
+
#include <utility>
|
| 33 |
+
|
| 34 |
+
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
| 35 |
+
PYBIND11_NAMESPACE_BEGIN(detail)
|
| 36 |
+
|
| 37 |
+
// Buffer that writes to Python instead of C++
|
| 38 |
+
class pythonbuf : public std::streambuf {
|
| 39 |
+
private:
|
| 40 |
+
using traits_type = std::streambuf::traits_type;
|
| 41 |
+
|
| 42 |
+
const size_t buf_size;
|
| 43 |
+
std::unique_ptr<char[]> d_buffer;
|
| 44 |
+
object pywrite;
|
| 45 |
+
object pyflush;
|
| 46 |
+
|
| 47 |
+
int overflow(int c) override {
|
| 48 |
+
if (!traits_type::eq_int_type(c, traits_type::eof())) {
|
| 49 |
+
*pptr() = traits_type::to_char_type(c);
|
| 50 |
+
pbump(1);
|
| 51 |
+
}
|
| 52 |
+
return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof();
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// Computes how many bytes at the end of the buffer are part of an
|
| 56 |
+
// incomplete sequence of UTF-8 bytes.
|
| 57 |
+
// Precondition: pbase() < pptr()
|
| 58 |
+
size_t utf8_remainder() const {
|
| 59 |
+
const auto rbase = std::reverse_iterator<char *>(pbase());
|
| 60 |
+
const auto rpptr = std::reverse_iterator<char *>(pptr());
|
| 61 |
+
auto is_ascii = [](char c) { return (static_cast<unsigned char>(c) & 0x80) == 0x00; };
|
| 62 |
+
auto is_leading = [](char c) { return (static_cast<unsigned char>(c) & 0xC0) == 0xC0; };
|
| 63 |
+
auto is_leading_2b = [](char c) { return static_cast<unsigned char>(c) <= 0xDF; };
|
| 64 |
+
auto is_leading_3b = [](char c) { return static_cast<unsigned char>(c) <= 0xEF; };
|
| 65 |
+
// If the last character is ASCII, there are no incomplete code points
|
| 66 |
+
if (is_ascii(*rpptr)) {
|
| 67 |
+
return 0;
|
| 68 |
+
}
|
| 69 |
+
// Otherwise, work back from the end of the buffer and find the first
|
| 70 |
+
// UTF-8 leading byte
|
| 71 |
+
const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
|
| 72 |
+
const auto leading = std::find_if(rpptr, rpend, is_leading);
|
| 73 |
+
if (leading == rbase) {
|
| 74 |
+
return 0;
|
| 75 |
+
}
|
| 76 |
+
const auto dist = static_cast<size_t>(leading - rpptr);
|
| 77 |
+
size_t remainder = 0;
|
| 78 |
+
|
| 79 |
+
if (dist == 0) {
|
| 80 |
+
remainder = 1; // 1-byte code point is impossible
|
| 81 |
+
} else if (dist == 1) {
|
| 82 |
+
remainder = is_leading_2b(*leading) ? 0 : dist + 1;
|
| 83 |
+
} else if (dist == 2) {
|
| 84 |
+
remainder = is_leading_3b(*leading) ? 0 : dist + 1;
|
| 85 |
+
}
|
| 86 |
+
// else if (dist >= 3), at least 4 bytes before encountering an UTF-8
|
| 87 |
+
// leading byte, either no remainder or invalid UTF-8.
|
| 88 |
+
// Invalid UTF-8 will cause an exception later when converting
|
| 89 |
+
// to a Python string, so that's not handled here.
|
| 90 |
+
return remainder;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
// This function must be non-virtual to be called in a destructor.
|
| 94 |
+
int _sync() {
|
| 95 |
+
if (pbase() != pptr()) { // If buffer is not empty
|
| 96 |
+
gil_scoped_acquire tmp;
|
| 97 |
+
// This subtraction cannot be negative, so dropping the sign.
|
| 98 |
+
auto size = static_cast<size_t>(pptr() - pbase());
|
| 99 |
+
size_t remainder = utf8_remainder();
|
| 100 |
+
|
| 101 |
+
if (size > remainder) {
|
| 102 |
+
str line(pbase(), size - remainder);
|
| 103 |
+
pywrite(std::move(line));
|
| 104 |
+
pyflush();
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// Copy the remainder at the end of the buffer to the beginning:
|
| 108 |
+
if (remainder > 0) {
|
| 109 |
+
std::memmove(pbase(), pptr() - remainder, remainder);
|
| 110 |
+
}
|
| 111 |
+
setp(pbase(), epptr());
|
| 112 |
+
pbump(static_cast<int>(remainder));
|
| 113 |
+
}
|
| 114 |
+
return 0;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
int sync() override { return _sync(); }
|
| 118 |
+
|
| 119 |
+
public:
|
| 120 |
+
explicit pythonbuf(const object &pyostream, size_t buffer_size = 1024)
|
| 121 |
+
: buf_size(buffer_size), d_buffer(new char[buf_size]), pywrite(pyostream.attr("write")),
|
| 122 |
+
pyflush(pyostream.attr("flush")) {
|
| 123 |
+
setp(d_buffer.get(), d_buffer.get() + buf_size - 1);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
pythonbuf(pythonbuf &&) = default;
|
| 127 |
+
|
| 128 |
+
/// Sync before destroy
|
| 129 |
+
~pythonbuf() override { _sync(); }
|
| 130 |
+
};
|
| 131 |
+
|
| 132 |
+
PYBIND11_NAMESPACE_END(detail)
|
| 133 |
+
|
| 134 |
+
/** \rst
|
| 135 |
+
This a move-only guard that redirects output.
|
| 136 |
+
|
| 137 |
+
.. code-block:: cpp
|
| 138 |
+
|
| 139 |
+
#include <pybind11/iostream.h>
|
| 140 |
+
|
| 141 |
+
...
|
| 142 |
+
|
| 143 |
+
{
|
| 144 |
+
py::scoped_ostream_redirect output;
|
| 145 |
+
std::cout << "Hello, World!"; // Python stdout
|
| 146 |
+
} // <-- return std::cout to normal
|
| 147 |
+
|
| 148 |
+
You can explicitly pass the c++ stream and the python object,
|
| 149 |
+
for example to guard stderr instead.
|
| 150 |
+
|
| 151 |
+
.. code-block:: cpp
|
| 152 |
+
|
| 153 |
+
{
|
| 154 |
+
py::scoped_ostream_redirect output{
|
| 155 |
+
std::cerr, py::module::import("sys").attr("stderr")};
|
| 156 |
+
std::cout << "Hello, World!";
|
| 157 |
+
}
|
| 158 |
+
\endrst */
|
| 159 |
+
class scoped_ostream_redirect {
|
| 160 |
+
protected:
|
| 161 |
+
std::streambuf *old;
|
| 162 |
+
std::ostream &costream;
|
| 163 |
+
detail::pythonbuf buffer;
|
| 164 |
+
|
| 165 |
+
public:
|
| 166 |
+
explicit scoped_ostream_redirect(std::ostream &costream = std::cout,
|
| 167 |
+
const object &pyostream
|
| 168 |
+
= module_::import("sys").attr("stdout"))
|
| 169 |
+
: costream(costream), buffer(pyostream) {
|
| 170 |
+
old = costream.rdbuf(&buffer);
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
~scoped_ostream_redirect() { costream.rdbuf(old); }
|
| 174 |
+
|
| 175 |
+
scoped_ostream_redirect(const scoped_ostream_redirect &) = delete;
|
| 176 |
+
scoped_ostream_redirect(scoped_ostream_redirect &&other) = default;
|
| 177 |
+
scoped_ostream_redirect &operator=(const scoped_ostream_redirect &) = delete;
|
| 178 |
+
scoped_ostream_redirect &operator=(scoped_ostream_redirect &&) = delete;
|
| 179 |
+
};
|
| 180 |
+
|
| 181 |
+
/** \rst
|
| 182 |
+
Like `scoped_ostream_redirect`, but redirects cerr by default. This class
|
| 183 |
+
is provided primary to make ``py::call_guard`` easier to make.
|
| 184 |
+
|
| 185 |
+
.. code-block:: cpp
|
| 186 |
+
|
| 187 |
+
m.def("noisy_func", &noisy_func,
|
| 188 |
+
py::call_guard<scoped_ostream_redirect,
|
| 189 |
+
scoped_estream_redirect>());
|
| 190 |
+
|
| 191 |
+
\endrst */
|
| 192 |
+
class scoped_estream_redirect : public scoped_ostream_redirect {
|
| 193 |
+
public:
|
| 194 |
+
explicit scoped_estream_redirect(std::ostream &costream = std::cerr,
|
| 195 |
+
const object &pyostream
|
| 196 |
+
= module_::import("sys").attr("stderr"))
|
| 197 |
+
: scoped_ostream_redirect(costream, pyostream) {}
|
| 198 |
+
};
|
| 199 |
+
|
| 200 |
+
PYBIND11_NAMESPACE_BEGIN(detail)
|
| 201 |
+
|
| 202 |
+
// Class to redirect output as a context manager. C++ backend.
|
| 203 |
+
class OstreamRedirect {
|
| 204 |
+
bool do_stdout_;
|
| 205 |
+
bool do_stderr_;
|
| 206 |
+
std::unique_ptr<scoped_ostream_redirect> redirect_stdout;
|
| 207 |
+
std::unique_ptr<scoped_estream_redirect> redirect_stderr;
|
| 208 |
+
|
| 209 |
+
public:
|
| 210 |
+
explicit OstreamRedirect(bool do_stdout = true, bool do_stderr = true)
|
| 211 |
+
: do_stdout_(do_stdout), do_stderr_(do_stderr) {}
|
| 212 |
+
|
| 213 |
+
void enter() {
|
| 214 |
+
if (do_stdout_) {
|
| 215 |
+
redirect_stdout.reset(new scoped_ostream_redirect());
|
| 216 |
+
}
|
| 217 |
+
if (do_stderr_) {
|
| 218 |
+
redirect_stderr.reset(new scoped_estream_redirect());
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
void exit() {
|
| 223 |
+
redirect_stdout.reset();
|
| 224 |
+
redirect_stderr.reset();
|
| 225 |
+
}
|
| 226 |
+
};
|
| 227 |
+
|
| 228 |
+
PYBIND11_NAMESPACE_END(detail)
|
| 229 |
+
|
| 230 |
+
/** \rst
|
| 231 |
+
This is a helper function to add a C++ redirect context manager to Python
|
| 232 |
+
instead of using a C++ guard. To use it, add the following to your binding code:
|
| 233 |
+
|
| 234 |
+
.. code-block:: cpp
|
| 235 |
+
|
| 236 |
+
#include <pybind11/iostream.h>
|
| 237 |
+
|
| 238 |
+
...
|
| 239 |
+
|
| 240 |
+
py::add_ostream_redirect(m, "ostream_redirect");
|
| 241 |
+
|
| 242 |
+
You now have a Python context manager that redirects your output:
|
| 243 |
+
|
| 244 |
+
.. code-block:: python
|
| 245 |
+
|
| 246 |
+
with m.ostream_redirect():
|
| 247 |
+
m.print_to_cout_function()
|
| 248 |
+
|
| 249 |
+
This manager can optionally be told which streams to operate on:
|
| 250 |
+
|
| 251 |
+
.. code-block:: python
|
| 252 |
+
|
| 253 |
+
with m.ostream_redirect(stdout=true, stderr=true):
|
| 254 |
+
m.noisy_function_with_error_printing()
|
| 255 |
+
|
| 256 |
+
\endrst */
|
| 257 |
+
inline class_<detail::OstreamRedirect>
|
| 258 |
+
add_ostream_redirect(module_ m, const std::string &name = "ostream_redirect") {
|
| 259 |
+
return class_<detail::OstreamRedirect>(std::move(m), name.c_str(), module_local())
|
| 260 |
+
.def(init<bool, bool>(), arg("stdout") = true, arg("stderr") = true)
|
| 261 |
+
.def("__enter__", &detail::OstreamRedirect::enter)
|
| 262 |
+
.def("__exit__", [](detail::OstreamRedirect &self_, const args &) { self_.exit(); });
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
|
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so
ADDED
|
Binary file (37.9 kB). View file
|
|
|