koichi12 commited on
Commit
b891f5b
·
verified ·
1 Parent(s): e74124d

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so +3 -0
  3. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc +3 -0
  4. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc +3 -0
  5. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc +3 -0
  6. tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc +3 -0
  7. tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc +3 -0
  8. tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/__init__.cpython-311.pyc +0 -0
  9. tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc +0 -0
  10. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/INSTALLER +1 -0
  11. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/RECORD +104 -0
  12. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/WHEEL +5 -0
  13. tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/top_level.txt +1 -0
  14. tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so +3 -0
  15. tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc +3 -0
  16. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/ctx_mp.cpython-311.pyc +0 -0
  17. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__init__.py +77 -0
  18. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libelefun.cpython-311.pyc +0 -0
  19. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libhyper.cpython-311.pyc +0 -0
  20. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libmpc.cpython-311.pyc +0 -0
  21. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libelefun.py +1428 -0
  22. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libhyper.py +1150 -0
  23. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libintmath.py +584 -0
  24. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libmpc.py +835 -0
  25. tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc +3 -0
  26. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__init__.py +0 -0
  27. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h +98 -0
  28. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_activity.h +0 -0
  29. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_events.h +1371 -0
  30. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h +419 -0
  31. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_result.h +328 -0
  32. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h +447 -0
  33. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_target.h +43 -0
  34. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_version.h +130 -0
  35. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h +2941 -0
  36. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h +2139 -0
  37. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h +38 -0
  38. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer.h +571 -0
  39. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc +0 -0
  40. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftw.h +454 -0
  41. tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc +0 -0
  42. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc +3 -0
  43. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/LICENSE +29 -0
  44. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/RECORD +65 -0
  45. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/REQUESTED +0 -0
  46. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/entry_points.txt +5 -0
  47. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/detail/cpp_conduit.h +77 -0
  48. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/gil.h +219 -0
  49. tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/iostream.h +265 -0
  50. tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so +0 -0
.gitattributes CHANGED
@@ -51,3 +51,14 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Plex/Trans
51
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ExprNodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
52
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
53
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
51
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ExprNodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
52
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
53
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
54
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
55
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
56
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
57
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/gen.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
58
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
59
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
60
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
61
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
62
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
63
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
64
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.cpython-311-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35bbd7708e61d6b2d4704c7139018d3eae67bca303d9fa03228b50845f6fffe6
3
+ size 340320
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Code.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e03d22fd7cc8b4e378f65e07858c4720dcc03e0fa3553c776863e4969826cfd4
3
+ size 145746
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Nodes.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c423f97f1ac36f06a8a2c6ff723696608c3e094001049a85ad421706ae558dea
3
+ size 522167
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/ParseTreeTransforms.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab3f1d1811e8f1f97f96bc002bc8705a4adb7a26f43def577bf24b25263f4b32
3
+ size 213081
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/PyrexTypes.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:465b72a0af764658a62bbb1d50e50b9a762ba16ddb1a6be0dd5b3b1f15c8a205
3
+ size 254554
tuning-competition-baseline/.venv/lib/python3.11/site-packages/__pycache__/typing_extensions.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f505b823a26bd0da98ceb5e93ba4f79513f56cebf4f8cb1c8ed579dcdabaac32
3
+ size 129942
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.44 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc ADDED
Binary file (3.58 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/RECORD ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fsspec-2024.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ fsspec-2024.2.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
3
+ fsspec-2024.2.0.dist-info/METADATA,sha256=uwzW1Braxnd_QGVI8W6J0KHi5KTiTJEm8YzSUdG-_Dc,6786
4
+ fsspec-2024.2.0.dist-info/RECORD,,
5
+ fsspec-2024.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
+ fsspec-2024.2.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
7
+ fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
8
+ fsspec/__pycache__/__init__.cpython-311.pyc,,
9
+ fsspec/__pycache__/_version.cpython-311.pyc,,
10
+ fsspec/__pycache__/archive.cpython-311.pyc,,
11
+ fsspec/__pycache__/asyn.cpython-311.pyc,,
12
+ fsspec/__pycache__/caching.cpython-311.pyc,,
13
+ fsspec/__pycache__/callbacks.cpython-311.pyc,,
14
+ fsspec/__pycache__/compression.cpython-311.pyc,,
15
+ fsspec/__pycache__/config.cpython-311.pyc,,
16
+ fsspec/__pycache__/conftest.cpython-311.pyc,,
17
+ fsspec/__pycache__/core.cpython-311.pyc,,
18
+ fsspec/__pycache__/dircache.cpython-311.pyc,,
19
+ fsspec/__pycache__/exceptions.cpython-311.pyc,,
20
+ fsspec/__pycache__/fuse.cpython-311.pyc,,
21
+ fsspec/__pycache__/generic.cpython-311.pyc,,
22
+ fsspec/__pycache__/gui.cpython-311.pyc,,
23
+ fsspec/__pycache__/mapping.cpython-311.pyc,,
24
+ fsspec/__pycache__/parquet.cpython-311.pyc,,
25
+ fsspec/__pycache__/registry.cpython-311.pyc,,
26
+ fsspec/__pycache__/spec.cpython-311.pyc,,
27
+ fsspec/__pycache__/transaction.cpython-311.pyc,,
28
+ fsspec/__pycache__/utils.cpython-311.pyc,,
29
+ fsspec/_version.py,sha256=onTKKWe4fXkBjQxbTwM82SUT0H3x4U17IYrciFAryaU,500
30
+ fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
31
+ fsspec/asyn.py,sha256=kJ45sFFya2lZsmu2v8CVc8ZPRs8AccEzAy6Jot2ylkU,36157
32
+ fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
33
+ fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
34
+ fsspec/compression.py,sha256=Yyd8FXw2rwWRtVoRVah_yguv-J7BUcBo4yDu6Qt52a0,4859
35
+ fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
36
+ fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
37
+ fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
38
+ fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
39
+ fsspec/exceptions.py,sha256=xcS7LiRrQ748kvOB9mrUR14kpjNztrHgEkZWi9M-VaI,330
40
+ fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
41
+ fsspec/generic.py,sha256=NuNaP66OaphwMbuLHRFBLda78TD81isa9O4ozJqbUv0,13455
42
+ fsspec/gui.py,sha256=XKoXZpUhRE7jOhRCJH4-jRbKhVu56aS8h9tecvPD3nc,13932
43
+ fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
45
+ fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
46
+ fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
47
+ fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
48
+ fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
49
+ fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
50
+ fsspec/implementations/__pycache__/data.cpython-311.pyc,,
51
+ fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
52
+ fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
53
+ fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
54
+ fsspec/implementations/__pycache__/git.cpython-311.pyc,,
55
+ fsspec/implementations/__pycache__/github.cpython-311.pyc,,
56
+ fsspec/implementations/__pycache__/http.cpython-311.pyc,,
57
+ fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
58
+ fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
59
+ fsspec/implementations/__pycache__/local.cpython-311.pyc,,
60
+ fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
61
+ fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
62
+ fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
63
+ fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
64
+ fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
65
+ fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
66
+ fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
67
+ fsspec/implementations/arrow.py,sha256=_7TLuV6ZzNlpmUU_v6ud56u2wadzsKmY5qugPBxgMEs,8649
68
+ fsspec/implementations/cache_mapper.py,sha256=iHgBA6gjzDJ7_mBboHFzpLTf55HP3UEwUOZ43xyUK4M,2429
69
+ fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
70
+ fsspec/implementations/cached.py,sha256=LbbPbeUup07O0y7gXD_atFgajWM9p1vlDKu_BOyLfbo,30943
71
+ fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
72
+ fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
73
+ fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
74
+ fsspec/implementations/dirfs.py,sha256=inDIRSDPhI1_ud1MMBFrpZQ11VIAMJ_dZQtbE4V08Ng,11384
75
+ fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
76
+ fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
77
+ fsspec/implementations/github.py,sha256=0kIiKkeAaROuHgdWBHVQFrzJ2ZfoDgymCehL_kJXHYA,7565
78
+ fsspec/implementations/http.py,sha256=PkhfgUV3-T7fG2Jf-NLX9doH52snV5Wmw91uVA9k74M,29454
79
+ fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
80
+ fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
81
+ fsspec/implementations/local.py,sha256=nxiRKg9FAQHTQss9-ET8ZzDXPGhSOktgkxrg0ffMs2I,13454
82
+ fsspec/implementations/memory.py,sha256=2iU--pOV2KCTrS-d5K8VKSygh9MPk2D7NZ_C8lMMEIw,9701
83
+ fsspec/implementations/reference.py,sha256=0iGu8mscaQ3a5iTlRNByytQ3_-1Bj8__ARqVwyy4q2M,43871
84
+ fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
85
+ fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
86
+ fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
87
+ fsspec/implementations/webhdfs.py,sha256=wqVfno7z0TY1HepaIvKTUUcl_bi5NkV6qWsST8t_s7Y,16745
88
+ fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
89
+ fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
90
+ fsspec/parquet.py,sha256=qVxDhwc960SGOt5etcYAJxCr-7HQKP01687KpDR02Gw,19463
91
+ fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
92
+ fsspec/spec.py,sha256=3t96RgizRN_slIuHXnuR0bXjVUfBS1TfuDrEua4oQvE,66277
93
+ fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
94
+ fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
95
+ fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
96
+ fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
97
+ fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
98
+ fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
99
+ fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
100
+ fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
101
+ fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
102
+ fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
103
+ fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
104
+ fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.42.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/fsspec-2024.2.0.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fsspec
tuning-competition-baseline/.venv/lib/python3.11/site-packages/functorch/_C.cpython-311-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d0c8228a395e1b7975c5d22cd5fe655e5a7b7024723a69164e0c9045aee847d
3
+ size 324168
tuning-competition-baseline/.venv/lib/python3.11/site-packages/jinja2/__pycache__/compiler.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6ec603b289fea3017e8bb0c8eb537328f368d775f0aee16f2837595da3258b
3
+ size 110499
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/ctx_mp.cpython-311.pyc ADDED
Binary file (71.2 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__init__.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .libmpf import (prec_to_dps, dps_to_prec, repr_dps,
2
+ round_down, round_up, round_floor, round_ceiling, round_nearest,
3
+ to_pickable, from_pickable, ComplexResult,
4
+ fzero, fnzero, fone, fnone, ftwo, ften, fhalf, fnan, finf, fninf,
5
+ math_float_inf, round_int, normalize, normalize1,
6
+ from_man_exp, from_int, to_man_exp, to_int, mpf_ceil, mpf_floor,
7
+ mpf_nint, mpf_frac,
8
+ from_float, from_npfloat, from_Decimal, to_float, from_rational, to_rational, to_fixed,
9
+ mpf_rand, mpf_eq, mpf_hash, mpf_cmp, mpf_lt, mpf_le, mpf_gt, mpf_ge,
10
+ mpf_pos, mpf_neg, mpf_abs, mpf_sign, mpf_add, mpf_sub, mpf_sum,
11
+ mpf_mul, mpf_mul_int, mpf_shift, mpf_frexp,
12
+ mpf_div, mpf_rdiv_int, mpf_mod, mpf_pow_int,
13
+ mpf_perturb,
14
+ to_digits_exp, to_str, str_to_man_exp, from_str, from_bstr, to_bstr,
15
+ mpf_sqrt, mpf_hypot)
16
+
17
+ from .libmpc import (mpc_one, mpc_zero, mpc_two, mpc_half,
18
+ mpc_is_inf, mpc_is_infnan, mpc_to_str, mpc_to_complex, mpc_hash,
19
+ mpc_conjugate, mpc_is_nonzero, mpc_add, mpc_add_mpf,
20
+ mpc_sub, mpc_sub_mpf, mpc_pos, mpc_neg, mpc_shift, mpc_abs,
21
+ mpc_arg, mpc_floor, mpc_ceil, mpc_nint, mpc_frac, mpc_mul, mpc_square,
22
+ mpc_mul_mpf, mpc_mul_imag_mpf, mpc_mul_int,
23
+ mpc_div, mpc_div_mpf, mpc_reciprocal, mpc_mpf_div,
24
+ complex_int_pow, mpc_pow, mpc_pow_mpf, mpc_pow_int,
25
+ mpc_sqrt, mpc_nthroot, mpc_cbrt, mpc_exp, mpc_log, mpc_cos, mpc_sin,
26
+ mpc_tan, mpc_cos_pi, mpc_sin_pi, mpc_cosh, mpc_sinh, mpc_tanh,
27
+ mpc_atan, mpc_acos, mpc_asin, mpc_asinh, mpc_acosh, mpc_atanh,
28
+ mpc_fibonacci, mpf_expj, mpf_expjpi, mpc_expj, mpc_expjpi,
29
+ mpc_cos_sin, mpc_cos_sin_pi)
30
+
31
+ from .libelefun import (ln2_fixed, mpf_ln2, ln10_fixed, mpf_ln10,
32
+ pi_fixed, mpf_pi, e_fixed, mpf_e, phi_fixed, mpf_phi,
33
+ degree_fixed, mpf_degree,
34
+ mpf_pow, mpf_nthroot, mpf_cbrt, log_int_fixed, agm_fixed,
35
+ mpf_log, mpf_log_hypot, mpf_exp, mpf_cos_sin, mpf_cos, mpf_sin, mpf_tan,
36
+ mpf_cos_sin_pi, mpf_cos_pi, mpf_sin_pi, mpf_cosh_sinh,
37
+ mpf_cosh, mpf_sinh, mpf_tanh, mpf_atan, mpf_atan2, mpf_asin,
38
+ mpf_acos, mpf_asinh, mpf_acosh, mpf_atanh, mpf_fibonacci)
39
+
40
+ from .libhyper import (NoConvergence, make_hyp_summator,
41
+ mpf_erf, mpf_erfc, mpf_ei, mpc_ei, mpf_e1, mpc_e1, mpf_expint,
42
+ mpf_ci_si, mpf_ci, mpf_si, mpc_ci, mpc_si, mpf_besseljn,
43
+ mpc_besseljn, mpf_agm, mpf_agm1, mpc_agm, mpc_agm1,
44
+ mpf_ellipk, mpc_ellipk, mpf_ellipe, mpc_ellipe)
45
+
46
+ from .gammazeta import (catalan_fixed, mpf_catalan,
47
+ khinchin_fixed, mpf_khinchin, glaisher_fixed, mpf_glaisher,
48
+ apery_fixed, mpf_apery, euler_fixed, mpf_euler, mertens_fixed,
49
+ mpf_mertens, twinprime_fixed, mpf_twinprime,
50
+ mpf_bernoulli, bernfrac, mpf_gamma_int,
51
+ mpf_factorial, mpc_factorial, mpf_gamma, mpc_gamma,
52
+ mpf_loggamma, mpc_loggamma, mpf_rgamma, mpc_rgamma,
53
+ mpf_harmonic, mpc_harmonic, mpf_psi0, mpc_psi0,
54
+ mpf_psi, mpc_psi, mpf_zeta_int, mpf_zeta, mpc_zeta,
55
+ mpf_altzeta, mpc_altzeta, mpf_zetasum, mpc_zetasum)
56
+
57
+ from .libmpi import (mpi_str,
58
+ mpi_from_str, mpi_to_str,
59
+ mpi_eq, mpi_ne,
60
+ mpi_lt, mpi_le, mpi_gt, mpi_ge,
61
+ mpi_add, mpi_sub, mpi_delta, mpi_mid,
62
+ mpi_pos, mpi_neg, mpi_abs, mpi_mul, mpi_div, mpi_exp,
63
+ mpi_log, mpi_sqrt, mpi_pow_int, mpi_pow, mpi_cos_sin,
64
+ mpi_cos, mpi_sin, mpi_tan, mpi_cot,
65
+ mpi_atan, mpi_atan2,
66
+ mpci_pos, mpci_neg, mpci_add, mpci_sub, mpci_mul, mpci_div, mpci_pow,
67
+ mpci_abs, mpci_pow, mpci_exp, mpci_log, mpci_cos, mpci_sin,
68
+ mpi_gamma, mpci_gamma, mpi_loggamma, mpci_loggamma,
69
+ mpi_rgamma, mpci_rgamma, mpi_factorial, mpci_factorial)
70
+
71
+ from .libintmath import (trailing, bitcount, numeral, bin_to_radix,
72
+ isqrt, isqrt_small, isqrt_fast, sqrt_fixed, sqrtrem, ifib, ifac,
73
+ list_primes, isprime, moebius, gcd, eulernum, stirling1, stirling2)
74
+
75
+ from .backend import (gmpy, sage, BACKEND, STRICT, MPZ, MPZ_TYPE,
76
+ MPZ_ZERO, MPZ_ONE, MPZ_TWO, MPZ_THREE, MPZ_FIVE, int_types,
77
+ HASH_MODULUS, HASH_BITS)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libelefun.cpython-311.pyc ADDED
Binary file (54.8 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libhyper.cpython-311.pyc ADDED
Binary file (52.9 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/__pycache__/libmpc.cpython-311.pyc ADDED
Binary file (43.8 kB). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libelefun.py ADDED
@@ -0,0 +1,1428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module implements computation of elementary transcendental
3
+ functions (powers, logarithms, trigonometric and hyperbolic
4
+ functions, inverse trigonometric and hyperbolic) for real
5
+ floating-point numbers.
6
+
7
+ For complex and interval implementations of the same functions,
8
+ see libmpc and libmpi.
9
+
10
+ """
11
+
12
+ import math
13
+ from bisect import bisect
14
+
15
+ from .backend import xrange
16
+ from .backend import MPZ, MPZ_ZERO, MPZ_ONE, MPZ_TWO, MPZ_FIVE, BACKEND
17
+
18
+ from .libmpf import (
19
+ round_floor, round_ceiling, round_down, round_up,
20
+ round_nearest, round_fast,
21
+ ComplexResult,
22
+ bitcount, bctable, lshift, rshift, giant_steps, sqrt_fixed,
23
+ from_int, to_int, from_man_exp, to_fixed, to_float, from_float,
24
+ from_rational, normalize,
25
+ fzero, fone, fnone, fhalf, finf, fninf, fnan,
26
+ mpf_cmp, mpf_sign, mpf_abs,
27
+ mpf_pos, mpf_neg, mpf_add, mpf_sub, mpf_mul, mpf_div, mpf_shift,
28
+ mpf_rdiv_int, mpf_pow_int, mpf_sqrt,
29
+ reciprocal_rnd, negative_rnd, mpf_perturb,
30
+ isqrt_fast
31
+ )
32
+
33
+ from .libintmath import ifib
34
+
35
+
36
+ #-------------------------------------------------------------------------------
37
+ # Tuning parameters
38
+ #-------------------------------------------------------------------------------
39
+
40
+ # Cutoff for computing exp from cosh+sinh. This reduces the
41
+ # number of terms by half, but also requires a square root which
42
+ # is expensive with the pure-Python square root code.
43
+ if BACKEND == 'python':
44
+ EXP_COSH_CUTOFF = 600
45
+ else:
46
+ EXP_COSH_CUTOFF = 400
47
+ # Cutoff for using more than 2 series
48
+ EXP_SERIES_U_CUTOFF = 1500
49
+
50
+ # Also basically determined by sqrt
51
+ if BACKEND == 'python':
52
+ COS_SIN_CACHE_PREC = 400
53
+ else:
54
+ COS_SIN_CACHE_PREC = 200
55
+ COS_SIN_CACHE_STEP = 8
56
+ cos_sin_cache = {}
57
+
58
+ # Number of integer logarithms to cache (for zeta sums)
59
+ MAX_LOG_INT_CACHE = 2000
60
+ log_int_cache = {}
61
+
62
+ LOG_TAYLOR_PREC = 2500 # Use Taylor series with caching up to this prec
63
+ LOG_TAYLOR_SHIFT = 9 # Cache log values in steps of size 2^-N
64
+ log_taylor_cache = {}
65
+ # prec/size ratio of x for fastest convergence in AGM formula
66
+ LOG_AGM_MAG_PREC_RATIO = 20
67
+
68
+ ATAN_TAYLOR_PREC = 3000 # Same as for log
69
+ ATAN_TAYLOR_SHIFT = 7 # steps of size 2^-N
70
+ atan_taylor_cache = {}
71
+
72
+
73
+ # ~= next power of two + 20
74
+ cache_prec_steps = [22,22]
75
+ for k in xrange(1, bitcount(LOG_TAYLOR_PREC)+1):
76
+ cache_prec_steps += [min(2**k,LOG_TAYLOR_PREC)+20] * 2**(k-1)
77
+
78
+
79
+ #----------------------------------------------------------------------------#
80
+ # #
81
+ # Elementary mathematical constants #
82
+ # #
83
+ #----------------------------------------------------------------------------#
84
+
85
+ def constant_memo(f):
86
+ """
87
+ Decorator for caching computed values of mathematical
88
+ constants. This decorator should be applied to a
89
+ function taking a single argument prec as input and
90
+ returning a fixed-point value with the given precision.
91
+ """
92
+ f.memo_prec = -1
93
+ f.memo_val = None
94
+ def g(prec, **kwargs):
95
+ memo_prec = f.memo_prec
96
+ if prec <= memo_prec:
97
+ return f.memo_val >> (memo_prec-prec)
98
+ newprec = int(prec*1.05+10)
99
+ f.memo_val = f(newprec, **kwargs)
100
+ f.memo_prec = newprec
101
+ return f.memo_val >> (newprec-prec)
102
+ g.__name__ = f.__name__
103
+ g.__doc__ = f.__doc__
104
+ return g
105
+
106
+ def def_mpf_constant(fixed):
107
+ """
108
+ Create a function that computes the mpf value for a mathematical
109
+ constant, given a function that computes the fixed-point value.
110
+
111
+ Assumptions: the constant is positive and has magnitude ~= 1;
112
+ the fixed-point function rounds to floor.
113
+ """
114
+ def f(prec, rnd=round_fast):
115
+ wp = prec + 20
116
+ v = fixed(wp)
117
+ if rnd in (round_up, round_ceiling):
118
+ v += 1
119
+ return normalize(0, v, -wp, bitcount(v), prec, rnd)
120
+ f.__doc__ = fixed.__doc__
121
+ return f
122
+
123
+ def bsp_acot(q, a, b, hyperbolic):
124
+ if b - a == 1:
125
+ a1 = MPZ(2*a + 3)
126
+ if hyperbolic or a&1:
127
+ return MPZ_ONE, a1 * q**2, a1
128
+ else:
129
+ return -MPZ_ONE, a1 * q**2, a1
130
+ m = (a+b)//2
131
+ p1, q1, r1 = bsp_acot(q, a, m, hyperbolic)
132
+ p2, q2, r2 = bsp_acot(q, m, b, hyperbolic)
133
+ return q2*p1 + r1*p2, q1*q2, r1*r2
134
+
135
+ # the acoth(x) series converges like the geometric series for x^2
136
+ # N = ceil(p*log(2)/(2*log(x)))
137
+ def acot_fixed(a, prec, hyperbolic):
138
+ """
139
+ Compute acot(a) or acoth(a) for an integer a with binary splitting; see
140
+ http://numbers.computation.free.fr/Constants/Algorithms/splitting.html
141
+ """
142
+ N = int(0.35 * prec/math.log(a) + 20)
143
+ p, q, r = bsp_acot(a, 0,N, hyperbolic)
144
+ return ((p+q)<<prec)//(q*a)
145
+
146
+ def machin(coefs, prec, hyperbolic=False):
147
+ """
148
+ Evaluate a Machin-like formula, i.e., a linear combination of
149
+ acot(n) or acoth(n) for specific integer values of n, using fixed-
150
+ point arithmetic. The input should be a list [(c, n), ...], giving
151
+ c*acot[h](n) + ...
152
+ """
153
+ extraprec = 10
154
+ s = MPZ_ZERO
155
+ for a, b in coefs:
156
+ s += MPZ(a) * acot_fixed(MPZ(b), prec+extraprec, hyperbolic)
157
+ return (s >> extraprec)
158
+
159
+ # Logarithms of integers are needed for various computations involving
160
+ # logarithms, powers, radix conversion, etc
161
+
162
+ @constant_memo
163
+ def ln2_fixed(prec):
164
+ """
165
+ Computes ln(2). This is done with a hyperbolic Machin-type formula,
166
+ with binary splitting at high precision.
167
+ """
168
+ return machin([(18, 26), (-2, 4801), (8, 8749)], prec, True)
169
+
170
+ @constant_memo
171
+ def ln10_fixed(prec):
172
+ """
173
+ Computes ln(10). This is done with a hyperbolic Machin-type formula.
174
+ """
175
+ return machin([(46, 31), (34, 49), (20, 161)], prec, True)
176
+
177
+
178
+ r"""
179
+ For computation of pi, we use the Chudnovsky series:
180
+
181
+ oo
182
+ ___ k
183
+ 1 \ (-1) (6 k)! (A + B k)
184
+ ----- = ) -----------------------
185
+ 12 pi /___ 3 3k+3/2
186
+ (3 k)! (k!) C
187
+ k = 0
188
+
189
+ where A, B, and C are certain integer constants. This series adds roughly
190
+ 14 digits per term. Note that C^(3/2) can be extracted so that the
191
+ series contains only rational terms. This makes binary splitting very
192
+ efficient.
193
+
194
+ The recurrence formulas for the binary splitting were taken from
195
+ ftp://ftp.gmplib.org/pub/src/gmp-chudnovsky.c
196
+
197
+ Previously, Machin's formula was used at low precision and the AGM iteration
198
+ was used at high precision. However, the Chudnovsky series is essentially as
199
+ fast as the Machin formula at low precision and in practice about 3x faster
200
+ than the AGM at high precision (despite theoretically having a worse
201
+ asymptotic complexity), so there is no reason not to use it in all cases.
202
+
203
+ """
204
+
205
+ # Constants in Chudnovsky's series
206
+ CHUD_A = MPZ(13591409)
207
+ CHUD_B = MPZ(545140134)
208
+ CHUD_C = MPZ(640320)
209
+ CHUD_D = MPZ(12)
210
+
211
+ def bs_chudnovsky(a, b, level, verbose):
212
+ """
213
+ Computes the sum from a to b of the series in the Chudnovsky
214
+ formula. Returns g, p, q where p/q is the sum as an exact
215
+ fraction and g is a temporary value used to save work
216
+ for recursive calls.
217
+ """
218
+ if b-a == 1:
219
+ g = MPZ((6*b-5)*(2*b-1)*(6*b-1))
220
+ p = b**3 * CHUD_C**3 // 24
221
+ q = (-1)**b * g * (CHUD_A+CHUD_B*b)
222
+ else:
223
+ if verbose and level < 4:
224
+ print(" binary splitting", a, b)
225
+ mid = (a+b)//2
226
+ g1, p1, q1 = bs_chudnovsky(a, mid, level+1, verbose)
227
+ g2, p2, q2 = bs_chudnovsky(mid, b, level+1, verbose)
228
+ p = p1*p2
229
+ g = g1*g2
230
+ q = q1*p2 + q2*g1
231
+ return g, p, q
232
+
233
+ @constant_memo
234
+ def pi_fixed(prec, verbose=False, verbose_base=None):
235
+ """
236
+ Compute floor(pi * 2**prec) as a big integer.
237
+
238
+ This is done using Chudnovsky's series (see comments in
239
+ libelefun.py for details).
240
+ """
241
+ # The Chudnovsky series gives 14.18 digits per term
242
+ N = int(prec/3.3219280948/14.181647462 + 2)
243
+ if verbose:
244
+ print("binary splitting with N =", N)
245
+ g, p, q = bs_chudnovsky(0, N, 0, verbose)
246
+ sqrtC = isqrt_fast(CHUD_C<<(2*prec))
247
+ v = p*CHUD_C*sqrtC//((q+CHUD_A*p)*CHUD_D)
248
+ return v
249
+
250
+ def degree_fixed(prec):
251
+ return pi_fixed(prec)//180
252
+
253
+ def bspe(a, b):
254
+ """
255
+ Sum series for exp(1)-1 between a, b, returning the result
256
+ as an exact fraction (p, q).
257
+ """
258
+ if b-a == 1:
259
+ return MPZ_ONE, MPZ(b)
260
+ m = (a+b)//2
261
+ p1, q1 = bspe(a, m)
262
+ p2, q2 = bspe(m, b)
263
+ return p1*q2+p2, q1*q2
264
+
265
+ @constant_memo
266
+ def e_fixed(prec):
267
+ """
268
+ Computes exp(1). This is done using the ordinary Taylor series for
269
+ exp, with binary splitting. For a description of the algorithm,
270
+ see:
271
+
272
+ http://numbers.computation.free.fr/Constants/
273
+ Algorithms/splitting.html
274
+ """
275
+ # Slight overestimate of N needed for 1/N! < 2**(-prec)
276
+ # This could be tightened for large N.
277
+ N = int(1.1*prec/math.log(prec) + 20)
278
+ p, q = bspe(0,N)
279
+ return ((p+q)<<prec)//q
280
+
281
+ @constant_memo
282
+ def phi_fixed(prec):
283
+ """
284
+ Computes the golden ratio, (1+sqrt(5))/2
285
+ """
286
+ prec += 10
287
+ a = isqrt_fast(MPZ_FIVE<<(2*prec)) + (MPZ_ONE << prec)
288
+ return a >> 11
289
+
290
+ mpf_phi = def_mpf_constant(phi_fixed)
291
+ mpf_pi = def_mpf_constant(pi_fixed)
292
+ mpf_e = def_mpf_constant(e_fixed)
293
+ mpf_degree = def_mpf_constant(degree_fixed)
294
+ mpf_ln2 = def_mpf_constant(ln2_fixed)
295
+ mpf_ln10 = def_mpf_constant(ln10_fixed)
296
+
297
+
298
+ @constant_memo
299
+ def ln_sqrt2pi_fixed(prec):
300
+ wp = prec + 10
301
+ # ln(sqrt(2*pi)) = ln(2*pi)/2
302
+ return to_fixed(mpf_log(mpf_shift(mpf_pi(wp), 1), wp), prec-1)
303
+
304
+ @constant_memo
305
+ def sqrtpi_fixed(prec):
306
+ return sqrt_fixed(pi_fixed(prec), prec)
307
+
308
+ mpf_sqrtpi = def_mpf_constant(sqrtpi_fixed)
309
+ mpf_ln_sqrt2pi = def_mpf_constant(ln_sqrt2pi_fixed)
310
+
311
+
312
+ #----------------------------------------------------------------------------#
313
+ # #
314
+ # Powers #
315
+ # #
316
+ #----------------------------------------------------------------------------#
317
+
318
+ def mpf_pow(s, t, prec, rnd=round_fast):
319
+ """
320
+ Compute s**t. Raises ComplexResult if s is negative and t is
321
+ fractional.
322
+ """
323
+ ssign, sman, sexp, sbc = s
324
+ tsign, tman, texp, tbc = t
325
+ if ssign and texp < 0:
326
+ raise ComplexResult("negative number raised to a fractional power")
327
+ if texp >= 0:
328
+ return mpf_pow_int(s, (-1)**tsign * (tman<<texp), prec, rnd)
329
+ # s**(n/2) = sqrt(s)**n
330
+ if texp == -1:
331
+ if tman == 1:
332
+ if tsign:
333
+ return mpf_div(fone, mpf_sqrt(s, prec+10,
334
+ reciprocal_rnd[rnd]), prec, rnd)
335
+ return mpf_sqrt(s, prec, rnd)
336
+ else:
337
+ if tsign:
338
+ return mpf_pow_int(mpf_sqrt(s, prec+10,
339
+ reciprocal_rnd[rnd]), -tman, prec, rnd)
340
+ return mpf_pow_int(mpf_sqrt(s, prec+10, rnd), tman, prec, rnd)
341
+ # General formula: s**t = exp(t*log(s))
342
+ # TODO: handle rnd direction of the logarithm carefully
343
+ c = mpf_log(s, prec+10, rnd)
344
+ return mpf_exp(mpf_mul(t, c), prec, rnd)
345
+
346
+ def int_pow_fixed(y, n, prec):
347
+ """n-th power of a fixed point number with precision prec
348
+
349
+ Returns the power in the form man, exp,
350
+ man * 2**exp ~= y**n
351
+ """
352
+ if n == 2:
353
+ return (y*y), 0
354
+ bc = bitcount(y)
355
+ exp = 0
356
+ workprec = 2 * (prec + 4*bitcount(n) + 4)
357
+ _, pm, pe, pbc = fone
358
+ while 1:
359
+ if n & 1:
360
+ pm = pm*y
361
+ pe = pe+exp
362
+ pbc += bc - 2
363
+ pbc = pbc + bctable[int(pm >> pbc)]
364
+ if pbc > workprec:
365
+ pm = pm >> (pbc-workprec)
366
+ pe += pbc - workprec
367
+ pbc = workprec
368
+ n -= 1
369
+ if not n:
370
+ break
371
+ y = y*y
372
+ exp = exp+exp
373
+ bc = bc + bc - 2
374
+ bc = bc + bctable[int(y >> bc)]
375
+ if bc > workprec:
376
+ y = y >> (bc-workprec)
377
+ exp += bc - workprec
378
+ bc = workprec
379
+ n = n // 2
380
+ return pm, pe
381
+
382
+ # froot(s, n, prec, rnd) computes the real n-th root of a
383
+ # positive mpf tuple s.
384
+ # To compute the root we start from a 50-bit estimate for r
385
+ # generated with ordinary floating-point arithmetic, and then refine
386
+ # the value to full accuracy using the iteration
387
+
388
+ # 1 / y \
389
+ # r = --- | (n-1) * r + ---------- |
390
+ # n+1 n \ n r_n**(n-1) /
391
+
392
+ # which is simply Newton's method applied to the equation r**n = y.
393
+ # With giant_steps(start, prec+extra) = [p0,...,pm, prec+extra]
394
+ # and y = man * 2**-shift one has
395
+ # (man * 2**exp)**(1/n) =
396
+ # y**(1/n) * 2**(start-prec/n) * 2**(p0-start) * ... * 2**(prec+extra-pm) *
397
+ # 2**((exp+shift-(n-1)*prec)/n -extra))
398
+ # The last factor is accounted for in the last line of froot.
399
+
400
+ def nthroot_fixed(y, n, prec, exp1):
401
+ start = 50
402
+ try:
403
+ y1 = rshift(y, prec - n*start)
404
+ r = MPZ(int(y1**(1.0/n)))
405
+ except OverflowError:
406
+ y1 = from_int(y1, start)
407
+ fn = from_int(n)
408
+ fn = mpf_rdiv_int(1, fn, start)
409
+ r = mpf_pow(y1, fn, start)
410
+ r = to_int(r)
411
+ extra = 10
412
+ extra1 = n
413
+ prevp = start
414
+ for p in giant_steps(start, prec+extra):
415
+ pm, pe = int_pow_fixed(r, n-1, prevp)
416
+ r2 = rshift(pm, (n-1)*prevp - p - pe - extra1)
417
+ B = lshift(y, 2*p-prec+extra1)//r2
418
+ r = (B + (n-1) * lshift(r, p-prevp))//n
419
+ prevp = p
420
+ return r
421
+
422
+ def mpf_nthroot(s, n, prec, rnd=round_fast):
423
+ """nth-root of a positive number
424
+
425
+ Use the Newton method when faster, otherwise use x**(1/n)
426
+ """
427
+ sign, man, exp, bc = s
428
+ if sign:
429
+ raise ComplexResult("nth root of a negative number")
430
+ if not man:
431
+ if s == fnan:
432
+ return fnan
433
+ if s == fzero:
434
+ if n > 0:
435
+ return fzero
436
+ if n == 0:
437
+ return fone
438
+ return finf
439
+ # Infinity
440
+ if not n:
441
+ return fnan
442
+ if n < 0:
443
+ return fzero
444
+ return finf
445
+ flag_inverse = False
446
+ if n < 2:
447
+ if n == 0:
448
+ return fone
449
+ if n == 1:
450
+ return mpf_pos(s, prec, rnd)
451
+ if n == -1:
452
+ return mpf_div(fone, s, prec, rnd)
453
+ # n < 0
454
+ rnd = reciprocal_rnd[rnd]
455
+ flag_inverse = True
456
+ extra_inverse = 5
457
+ prec += extra_inverse
458
+ n = -n
459
+ if n > 20 and (n >= 20000 or prec < int(233 + 28.3 * n**0.62)):
460
+ prec2 = prec + 10
461
+ fn = from_int(n)
462
+ nth = mpf_rdiv_int(1, fn, prec2)
463
+ r = mpf_pow(s, nth, prec2, rnd)
464
+ s = normalize(r[0], r[1], r[2], r[3], prec, rnd)
465
+ if flag_inverse:
466
+ return mpf_div(fone, s, prec-extra_inverse, rnd)
467
+ else:
468
+ return s
469
+ # Convert to a fixed-point number with prec2 bits.
470
+ prec2 = prec + 2*n - (prec%n)
471
+ # a few tests indicate that
472
+ # for 10 < n < 10**4 a bit more precision is needed
473
+ if n > 10:
474
+ prec2 += prec2//10
475
+ prec2 = prec2 - prec2%n
476
+ # Mantissa may have more bits than we need. Trim it down.
477
+ shift = bc - prec2
478
+ # Adjust exponents to make prec2 and exp+shift multiples of n.
479
+ sign1 = 0
480
+ es = exp+shift
481
+ if es < 0:
482
+ sign1 = 1
483
+ es = -es
484
+ if sign1:
485
+ shift += es%n
486
+ else:
487
+ shift -= es%n
488
+ man = rshift(man, shift)
489
+ extra = 10
490
+ exp1 = ((exp+shift-(n-1)*prec2)//n) - extra
491
+ rnd_shift = 0
492
+ if flag_inverse:
493
+ if rnd == 'u' or rnd == 'c':
494
+ rnd_shift = 1
495
+ else:
496
+ if rnd == 'd' or rnd == 'f':
497
+ rnd_shift = 1
498
+ man = nthroot_fixed(man+rnd_shift, n, prec2, exp1)
499
+ s = from_man_exp(man, exp1, prec, rnd)
500
+ if flag_inverse:
501
+ return mpf_div(fone, s, prec-extra_inverse, rnd)
502
+ else:
503
+ return s
504
+
505
+ def mpf_cbrt(s, prec, rnd=round_fast):
506
+ """cubic root of a positive number"""
507
+ return mpf_nthroot(s, 3, prec, rnd)
508
+
509
+ #----------------------------------------------------------------------------#
510
+ # #
511
+ # Logarithms #
512
+ # #
513
+ #----------------------------------------------------------------------------#
514
+
515
+
516
+ def log_int_fixed(n, prec, ln2=None):
517
+ """
518
+ Fast computation of log(n), caching the value for small n,
519
+ intended for zeta sums.
520
+ """
521
+ if n in log_int_cache:
522
+ value, vprec = log_int_cache[n]
523
+ if vprec >= prec:
524
+ return value >> (vprec - prec)
525
+ wp = prec + 10
526
+ if wp <= LOG_TAYLOR_SHIFT:
527
+ if ln2 is None:
528
+ ln2 = ln2_fixed(wp)
529
+ r = bitcount(n)
530
+ x = n << (wp-r)
531
+ v = log_taylor_cached(x, wp) + r*ln2
532
+ else:
533
+ v = to_fixed(mpf_log(from_int(n), wp+5), wp)
534
+ if n < MAX_LOG_INT_CACHE:
535
+ log_int_cache[n] = (v, wp)
536
+ return v >> (wp-prec)
537
+
538
+ def agm_fixed(a, b, prec):
539
+ """
540
+ Fixed-point computation of agm(a,b), assuming
541
+ a, b both close to unit magnitude.
542
+ """
543
+ i = 0
544
+ while 1:
545
+ anew = (a+b)>>1
546
+ if i > 4 and abs(a-anew) < 8:
547
+ return a
548
+ b = isqrt_fast(a*b)
549
+ a = anew
550
+ i += 1
551
+ return a
552
+
553
+ def log_agm(x, prec):
554
+ """
555
+ Fixed-point computation of -log(x) = log(1/x), suitable
556
+ for large precision. It is required that 0 < x < 1. The
557
+ algorithm used is the Sasaki-Kanada formula
558
+
559
+ -log(x) = pi/agm(theta2(x)^2,theta3(x)^2). [1]
560
+
561
+ For faster convergence in the theta functions, x should
562
+ be chosen closer to 0.
563
+
564
+ Guard bits must be added by the caller.
565
+
566
+ HYPOTHESIS: if x = 2^(-n), n bits need to be added to
567
+ account for the truncation to a fixed-point number,
568
+ and this is the only significant cancellation error.
569
+
570
+ The number of bits lost to roundoff is small and can be
571
+ considered constant.
572
+
573
+ [1] Richard P. Brent, "Fast Algorithms for High-Precision
574
+ Computation of Elementary Functions (extended abstract)",
575
+ http://wwwmaths.anu.edu.au/~brent/pd/RNC7-Brent.pdf
576
+
577
+ """
578
+ x2 = (x*x) >> prec
579
+ # Compute jtheta2(x)**2
580
+ s = a = b = x2
581
+ while a:
582
+ b = (b*x2) >> prec
583
+ a = (a*b) >> prec
584
+ s += a
585
+ s += (MPZ_ONE<<prec)
586
+ s = (s*s)>>(prec-2)
587
+ s = (s*isqrt_fast(x<<prec))>>prec
588
+ # Compute jtheta3(x)**2
589
+ t = a = b = x
590
+ while a:
591
+ b = (b*x2) >> prec
592
+ a = (a*b) >> prec
593
+ t += a
594
+ t = (MPZ_ONE<<prec) + (t<<1)
595
+ t = (t*t)>>prec
596
+ # Final formula
597
+ p = agm_fixed(s, t, prec)
598
+ return (pi_fixed(prec) << prec) // p
599
+
600
+ def log_taylor(x, prec, r=0):
601
+ """
602
+ Fixed-point calculation of log(x). It is assumed that x is close
603
+ enough to 1 for the Taylor series to converge quickly. Convergence
604
+ can be improved by specifying r > 0 to compute
605
+ log(x^(1/2^r))*2^r, at the cost of performing r square roots.
606
+
607
+ The caller must provide sufficient guard bits.
608
+ """
609
+ for i in xrange(r):
610
+ x = isqrt_fast(x<<prec)
611
+ one = MPZ_ONE << prec
612
+ v = ((x-one)<<prec)//(x+one)
613
+ sign = v < 0
614
+ if sign:
615
+ v = -v
616
+ v2 = (v*v) >> prec
617
+ v4 = (v2*v2) >> prec
618
+ s0 = v
619
+ s1 = v//3
620
+ v = (v*v4) >> prec
621
+ k = 5
622
+ while v:
623
+ s0 += v // k
624
+ k += 2
625
+ s1 += v // k
626
+ v = (v*v4) >> prec
627
+ k += 2
628
+ s1 = (s1*v2) >> prec
629
+ s = (s0+s1) << (1+r)
630
+ if sign:
631
+ return -s
632
+ return s
633
+
634
+ def log_taylor_cached(x, prec):
635
+ """
636
+ Fixed-point computation of log(x), assuming x in (0.5, 2)
637
+ and prec <= LOG_TAYLOR_PREC.
638
+ """
639
+ n = x >> (prec-LOG_TAYLOR_SHIFT)
640
+ cached_prec = cache_prec_steps[prec]
641
+ dprec = cached_prec - prec
642
+ if (n, cached_prec) in log_taylor_cache:
643
+ a, log_a = log_taylor_cache[n, cached_prec]
644
+ else:
645
+ a = n << (cached_prec - LOG_TAYLOR_SHIFT)
646
+ log_a = log_taylor(a, cached_prec, 8)
647
+ log_taylor_cache[n, cached_prec] = (a, log_a)
648
+ a >>= dprec
649
+ log_a >>= dprec
650
+ u = ((x - a) << prec) // a
651
+ v = (u << prec) // ((MPZ_TWO << prec) + u)
652
+ v2 = (v*v) >> prec
653
+ v4 = (v2*v2) >> prec
654
+ s0 = v
655
+ s1 = v//3
656
+ v = (v*v4) >> prec
657
+ k = 5
658
+ while v:
659
+ s0 += v//k
660
+ k += 2
661
+ s1 += v//k
662
+ v = (v*v4) >> prec
663
+ k += 2
664
+ s1 = (s1*v2) >> prec
665
+ s = (s0+s1) << 1
666
+ return log_a + s
667
+
668
+ def mpf_log(x, prec, rnd=round_fast):
669
+ """
670
+ Compute the natural logarithm of the mpf value x. If x is negative,
671
+ ComplexResult is raised.
672
+ """
673
+ sign, man, exp, bc = x
674
+ #------------------------------------------------------------------
675
+ # Handle special values
676
+ if not man:
677
+ if x == fzero: return fninf
678
+ if x == finf: return finf
679
+ if x == fnan: return fnan
680
+ if sign:
681
+ raise ComplexResult("logarithm of a negative number")
682
+ wp = prec + 20
683
+ #------------------------------------------------------------------
684
+ # Handle log(2^n) = log(n)*2.
685
+ # Here we catch the only possible exact value, log(1) = 0
686
+ if man == 1:
687
+ if not exp:
688
+ return fzero
689
+ return from_man_exp(exp*ln2_fixed(wp), -wp, prec, rnd)
690
+ mag = exp+bc
691
+ abs_mag = abs(mag)
692
+ #------------------------------------------------------------------
693
+ # Handle x = 1+eps, where log(x) ~ x. We need to check for
694
+ # cancellation when moving to fixed-point math and compensate
695
+ # by increasing the precision. Note that abs_mag in (0, 1) <=>
696
+ # 0.5 < x < 2 and x != 1
697
+ if abs_mag <= 1:
698
+ # Calculate t = x-1 to measure distance from 1 in bits
699
+ tsign = 1-abs_mag
700
+ if tsign:
701
+ tman = (MPZ_ONE<<bc) - man
702
+ else:
703
+ tman = man - (MPZ_ONE<<(bc-1))
704
+ tbc = bitcount(tman)
705
+ cancellation = bc - tbc
706
+ if cancellation > wp:
707
+ t = normalize(tsign, tman, abs_mag-bc, tbc, tbc, 'n')
708
+ return mpf_perturb(t, tsign, prec, rnd)
709
+ else:
710
+ wp += cancellation
711
+ # TODO: if close enough to 1, we could use Taylor series
712
+ # even in the AGM precision range, since the Taylor series
713
+ # converges rapidly
714
+ #------------------------------------------------------------------
715
+ # Another special case:
716
+ # n*log(2) is a good enough approximation
717
+ if abs_mag > 10000:
718
+ if bitcount(abs_mag) > wp:
719
+ return from_man_exp(exp*ln2_fixed(wp), -wp, prec, rnd)
720
+ #------------------------------------------------------------------
721
+ # General case.
722
+ # Perform argument reduction using log(x) = log(x*2^n) - n*log(2):
723
+ # If we are in the Taylor precision range, choose magnitude 0 or 1.
724
+ # If we are in the AGM precision range, choose magnitude -m for
725
+ # some large m; benchmarking on one machine showed m = prec/20 to be
726
+ # optimal between 1000 and 100,000 digits.
727
+ if wp <= LOG_TAYLOR_PREC:
728
+ m = log_taylor_cached(lshift(man, wp-bc), wp)
729
+ if mag:
730
+ m += mag*ln2_fixed(wp)
731
+ else:
732
+ optimal_mag = -wp//LOG_AGM_MAG_PREC_RATIO
733
+ n = optimal_mag - mag
734
+ x = mpf_shift(x, n)
735
+ wp += (-optimal_mag)
736
+ m = -log_agm(to_fixed(x, wp), wp)
737
+ m -= n*ln2_fixed(wp)
738
+ return from_man_exp(m, -wp, prec, rnd)
739
+
740
+ def mpf_log_hypot(a, b, prec, rnd):
741
+ """
742
+ Computes log(sqrt(a^2+b^2)) accurately.
743
+ """
744
+ # If either a or b is inf/nan/0, assume it to be a
745
+ if not b[1]:
746
+ a, b = b, a
747
+ # a is inf/nan/0
748
+ if not a[1]:
749
+ # both are inf/nan/0
750
+ if not b[1]:
751
+ if a == b == fzero:
752
+ return fninf
753
+ if fnan in (a, b):
754
+ return fnan
755
+ # at least one term is (+/- inf)^2
756
+ return finf
757
+ # only a is inf/nan/0
758
+ if a == fzero:
759
+ # log(sqrt(0+b^2)) = log(|b|)
760
+ return mpf_log(mpf_abs(b), prec, rnd)
761
+ if a == fnan:
762
+ return fnan
763
+ return finf
764
+ # Exact
765
+ a2 = mpf_mul(a,a)
766
+ b2 = mpf_mul(b,b)
767
+ extra = 20
768
+ # Not exact
769
+ h2 = mpf_add(a2, b2, prec+extra)
770
+ cancelled = mpf_add(h2, fnone, 10)
771
+ mag_cancelled = cancelled[2]+cancelled[3]
772
+ # Just redo the sum exactly if necessary (could be smarter
773
+ # and avoid memory allocation when a or b is precisely 1
774
+ # and the other is tiny...)
775
+ if cancelled == fzero or mag_cancelled < -extra//2:
776
+ h2 = mpf_add(a2, b2, prec+extra-min(a2[2],b2[2]))
777
+ return mpf_shift(mpf_log(h2, prec, rnd), -1)
778
+
779
+
780
+ #----------------------------------------------------------------------
781
+ # Inverse tangent
782
+ #
783
+
784
+ def atan_newton(x, prec):
785
+ if prec >= 100:
786
+ r = math.atan(int((x>>(prec-53)))/2.0**53)
787
+ else:
788
+ r = math.atan(int(x)/2.0**prec)
789
+ prevp = 50
790
+ r = MPZ(int(r * 2.0**53) >> (53-prevp))
791
+ extra_p = 50
792
+ for wp in giant_steps(prevp, prec):
793
+ wp += extra_p
794
+ r = r << (wp-prevp)
795
+ cos, sin = cos_sin_fixed(r, wp)
796
+ tan = (sin << wp) // cos
797
+ a = ((tan-rshift(x, prec-wp)) << wp) // ((MPZ_ONE<<wp) + ((tan**2)>>wp))
798
+ r = r - a
799
+ prevp = wp
800
+ return rshift(r, prevp-prec)
801
+
802
+ def atan_taylor_get_cached(n, prec):
803
+ # Taylor series with caching wins up to huge precisions
804
+ # To avoid unnecessary precomputation at low precision, we
805
+ # do it in steps
806
+ # Round to next power of 2
807
+ prec2 = (1<<(bitcount(prec-1))) + 20
808
+ dprec = prec2 - prec
809
+ if (n, prec2) in atan_taylor_cache:
810
+ a, atan_a = atan_taylor_cache[n, prec2]
811
+ else:
812
+ a = n << (prec2 - ATAN_TAYLOR_SHIFT)
813
+ atan_a = atan_newton(a, prec2)
814
+ atan_taylor_cache[n, prec2] = (a, atan_a)
815
+ return (a >> dprec), (atan_a >> dprec)
816
+
817
+ def atan_taylor(x, prec):
818
+ n = (x >> (prec-ATAN_TAYLOR_SHIFT))
819
+ a, atan_a = atan_taylor_get_cached(n, prec)
820
+ d = x - a
821
+ s0 = v = (d << prec) // ((a**2 >> prec) + (a*d >> prec) + (MPZ_ONE << prec))
822
+ v2 = (v**2 >> prec)
823
+ v4 = (v2 * v2) >> prec
824
+ s1 = v//3
825
+ v = (v * v4) >> prec
826
+ k = 5
827
+ while v:
828
+ s0 += v // k
829
+ k += 2
830
+ s1 += v // k
831
+ v = (v * v4) >> prec
832
+ k += 2
833
+ s1 = (s1 * v2) >> prec
834
+ s = s0 - s1
835
+ return atan_a + s
836
+
837
+ def atan_inf(sign, prec, rnd):
838
+ if not sign:
839
+ return mpf_shift(mpf_pi(prec, rnd), -1)
840
+ return mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
841
+
842
+ def mpf_atan(x, prec, rnd=round_fast):
843
+ sign, man, exp, bc = x
844
+ if not man:
845
+ if x == fzero: return fzero
846
+ if x == finf: return atan_inf(0, prec, rnd)
847
+ if x == fninf: return atan_inf(1, prec, rnd)
848
+ return fnan
849
+ mag = exp + bc
850
+ # Essentially infinity
851
+ if mag > prec+20:
852
+ return atan_inf(sign, prec, rnd)
853
+ # Essentially ~ x
854
+ if -mag > prec+20:
855
+ return mpf_perturb(x, 1-sign, prec, rnd)
856
+ wp = prec + 30 + abs(mag)
857
+ # For large x, use atan(x) = pi/2 - atan(1/x)
858
+ if mag >= 2:
859
+ x = mpf_rdiv_int(1, x, wp)
860
+ reciprocal = True
861
+ else:
862
+ reciprocal = False
863
+ t = to_fixed(x, wp)
864
+ if sign:
865
+ t = -t
866
+ if wp < ATAN_TAYLOR_PREC:
867
+ a = atan_taylor(t, wp)
868
+ else:
869
+ a = atan_newton(t, wp)
870
+ if reciprocal:
871
+ a = ((pi_fixed(wp)>>1)+1) - a
872
+ if sign:
873
+ a = -a
874
+ return from_man_exp(a, -wp, prec, rnd)
875
+
876
+ # TODO: cleanup the special cases
877
+ def mpf_atan2(y, x, prec, rnd=round_fast):
878
+ xsign, xman, xexp, xbc = x
879
+ ysign, yman, yexp, ybc = y
880
+ if not yman:
881
+ if y == fzero and x != fnan:
882
+ if mpf_sign(x) >= 0:
883
+ return fzero
884
+ return mpf_pi(prec, rnd)
885
+ if y in (finf, fninf):
886
+ if x in (finf, fninf):
887
+ return fnan
888
+ # pi/2
889
+ if y == finf:
890
+ return mpf_shift(mpf_pi(prec, rnd), -1)
891
+ # -pi/2
892
+ return mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
893
+ return fnan
894
+ if ysign:
895
+ return mpf_neg(mpf_atan2(mpf_neg(y), x, prec, negative_rnd[rnd]))
896
+ if not xman:
897
+ if x == fnan:
898
+ return fnan
899
+ if x == finf:
900
+ return fzero
901
+ if x == fninf:
902
+ return mpf_pi(prec, rnd)
903
+ if y == fzero:
904
+ return fzero
905
+ return mpf_shift(mpf_pi(prec, rnd), -1)
906
+ tquo = mpf_atan(mpf_div(y, x, prec+4), prec+4)
907
+ if xsign:
908
+ return mpf_add(mpf_pi(prec+4), tquo, prec, rnd)
909
+ else:
910
+ return mpf_pos(tquo, prec, rnd)
911
+
912
+ def mpf_asin(x, prec, rnd=round_fast):
913
+ sign, man, exp, bc = x
914
+ if bc+exp > 0 and x not in (fone, fnone):
915
+ raise ComplexResult("asin(x) is real only for -1 <= x <= 1")
916
+ # asin(x) = 2*atan(x/(1+sqrt(1-x**2)))
917
+ wp = prec + 15
918
+ a = mpf_mul(x, x)
919
+ b = mpf_add(fone, mpf_sqrt(mpf_sub(fone, a, wp), wp), wp)
920
+ c = mpf_div(x, b, wp)
921
+ return mpf_shift(mpf_atan(c, prec, rnd), 1)
922
+
923
+ def mpf_acos(x, prec, rnd=round_fast):
924
+ # acos(x) = 2*atan(sqrt(1-x**2)/(1+x))
925
+ sign, man, exp, bc = x
926
+ if bc + exp > 0:
927
+ if x not in (fone, fnone):
928
+ raise ComplexResult("acos(x) is real only for -1 <= x <= 1")
929
+ if x == fnone:
930
+ return mpf_pi(prec, rnd)
931
+ wp = prec + 15
932
+ a = mpf_mul(x, x)
933
+ b = mpf_sqrt(mpf_sub(fone, a, wp), wp)
934
+ c = mpf_div(b, mpf_add(fone, x, wp), wp)
935
+ return mpf_shift(mpf_atan(c, prec, rnd), 1)
936
+
937
+ def mpf_asinh(x, prec, rnd=round_fast):
938
+ wp = prec + 20
939
+ sign, man, exp, bc = x
940
+ mag = exp+bc
941
+ if mag < -8:
942
+ if mag < -wp:
943
+ return mpf_perturb(x, 1-sign, prec, rnd)
944
+ wp += (-mag)
945
+ # asinh(x) = log(x+sqrt(x**2+1))
946
+ # use reflection symmetry to avoid cancellation
947
+ q = mpf_sqrt(mpf_add(mpf_mul(x, x), fone, wp), wp)
948
+ q = mpf_add(mpf_abs(x), q, wp)
949
+ if sign:
950
+ return mpf_neg(mpf_log(q, prec, negative_rnd[rnd]))
951
+ else:
952
+ return mpf_log(q, prec, rnd)
953
+
954
+ def mpf_acosh(x, prec, rnd=round_fast):
955
+ # acosh(x) = log(x+sqrt(x**2-1))
956
+ wp = prec + 15
957
+ if mpf_cmp(x, fone) == -1:
958
+ raise ComplexResult("acosh(x) is real only for x >= 1")
959
+ q = mpf_sqrt(mpf_add(mpf_mul(x,x), fnone, wp), wp)
960
+ return mpf_log(mpf_add(x, q, wp), prec, rnd)
961
+
962
+ def mpf_atanh(x, prec, rnd=round_fast):
963
+ # atanh(x) = log((1+x)/(1-x))/2
964
+ sign, man, exp, bc = x
965
+ if (not man) and exp:
966
+ if x in (fzero, fnan):
967
+ return x
968
+ raise ComplexResult("atanh(x) is real only for -1 <= x <= 1")
969
+ mag = bc + exp
970
+ if mag > 0:
971
+ if mag == 1 and man == 1:
972
+ return [finf, fninf][sign]
973
+ raise ComplexResult("atanh(x) is real only for -1 <= x <= 1")
974
+ wp = prec + 15
975
+ if mag < -8:
976
+ if mag < -wp:
977
+ return mpf_perturb(x, sign, prec, rnd)
978
+ wp += (-mag)
979
+ a = mpf_add(x, fone, wp)
980
+ b = mpf_sub(fone, x, wp)
981
+ return mpf_shift(mpf_log(mpf_div(a, b, wp), prec, rnd), -1)
982
+
983
+ def mpf_fibonacci(x, prec, rnd=round_fast):
984
+ sign, man, exp, bc = x
985
+ if not man:
986
+ if x == fninf:
987
+ return fnan
988
+ return x
989
+ # F(2^n) ~= 2^(2^n)
990
+ size = abs(exp+bc)
991
+ if exp >= 0:
992
+ # Exact
993
+ if size < 10 or size <= bitcount(prec):
994
+ return from_int(ifib(to_int(x)), prec, rnd)
995
+ # Use the modified Binet formula
996
+ wp = prec + size + 20
997
+ a = mpf_phi(wp)
998
+ b = mpf_add(mpf_shift(a, 1), fnone, wp)
999
+ u = mpf_pow(a, x, wp)
1000
+ v = mpf_cos_pi(x, wp)
1001
+ v = mpf_div(v, u, wp)
1002
+ u = mpf_sub(u, v, wp)
1003
+ u = mpf_div(u, b, prec, rnd)
1004
+ return u
1005
+
1006
+
1007
+ #-------------------------------------------------------------------------------
1008
+ # Exponential-type functions
1009
+ #-------------------------------------------------------------------------------
1010
+
1011
+ def exponential_series(x, prec, type=0):
1012
+ """
1013
+ Taylor series for cosh/sinh or cos/sin.
1014
+
1015
+ type = 0 -- returns exp(x) (slightly faster than cosh+sinh)
1016
+ type = 1 -- returns (cosh(x), sinh(x))
1017
+ type = 2 -- returns (cos(x), sin(x))
1018
+ """
1019
+ if x < 0:
1020
+ x = -x
1021
+ sign = 1
1022
+ else:
1023
+ sign = 0
1024
+ r = int(0.5*prec**0.5)
1025
+ xmag = bitcount(x) - prec
1026
+ r = max(0, xmag + r)
1027
+ extra = 10 + 2*max(r,-xmag)
1028
+ wp = prec + extra
1029
+ x <<= (extra - r)
1030
+ one = MPZ_ONE << wp
1031
+ alt = (type == 2)
1032
+ if prec < EXP_SERIES_U_CUTOFF:
1033
+ x2 = a = (x*x) >> wp
1034
+ x4 = (x2*x2) >> wp
1035
+ s0 = s1 = MPZ_ZERO
1036
+ k = 2
1037
+ while a:
1038
+ a //= (k-1)*k; s0 += a; k += 2
1039
+ a //= (k-1)*k; s1 += a; k += 2
1040
+ a = (a*x4) >> wp
1041
+ s1 = (x2*s1) >> wp
1042
+ if alt:
1043
+ c = s1 - s0 + one
1044
+ else:
1045
+ c = s1 + s0 + one
1046
+ else:
1047
+ u = int(0.3*prec**0.35)
1048
+ x2 = a = (x*x) >> wp
1049
+ xpowers = [one, x2]
1050
+ for i in xrange(1, u):
1051
+ xpowers.append((xpowers[-1]*x2)>>wp)
1052
+ sums = [MPZ_ZERO] * u
1053
+ k = 2
1054
+ while a:
1055
+ for i in xrange(u):
1056
+ a //= (k-1)*k
1057
+ if alt and k & 2: sums[i] -= a
1058
+ else: sums[i] += a
1059
+ k += 2
1060
+ a = (a*xpowers[-1]) >> wp
1061
+ for i in xrange(1, u):
1062
+ sums[i] = (sums[i]*xpowers[i]) >> wp
1063
+ c = sum(sums) + one
1064
+ if type == 0:
1065
+ s = isqrt_fast(c*c - (one<<wp))
1066
+ if sign:
1067
+ v = c - s
1068
+ else:
1069
+ v = c + s
1070
+ for i in xrange(r):
1071
+ v = (v*v) >> wp
1072
+ return v >> extra
1073
+ else:
1074
+ # Repeatedly apply the double-angle formula
1075
+ # cosh(2*x) = 2*cosh(x)^2 - 1
1076
+ # cos(2*x) = 2*cos(x)^2 - 1
1077
+ pshift = wp-1
1078
+ for i in xrange(r):
1079
+ c = ((c*c) >> pshift) - one
1080
+ # With the abs, this is the same for sinh and sin
1081
+ s = isqrt_fast(abs((one<<wp) - c*c))
1082
+ if sign:
1083
+ s = -s
1084
+ return (c>>extra), (s>>extra)
1085
+
1086
+ def exp_basecase(x, prec):
1087
+ """
1088
+ Compute exp(x) as a fixed-point number. Works for any x,
1089
+ but for speed should have |x| < 1. For an arbitrary number,
1090
+ use exp(x) = exp(x-m*log(2)) * 2^m where m = floor(x/log(2)).
1091
+ """
1092
+ if prec > EXP_COSH_CUTOFF:
1093
+ return exponential_series(x, prec, 0)
1094
+ r = int(prec**0.5)
1095
+ prec += r
1096
+ s0 = s1 = (MPZ_ONE << prec)
1097
+ k = 2
1098
+ a = x2 = (x*x) >> prec
1099
+ while a:
1100
+ a //= k; s0 += a; k += 1
1101
+ a //= k; s1 += a; k += 1
1102
+ a = (a*x2) >> prec
1103
+ s1 = (s1*x) >> prec
1104
+ s = s0 + s1
1105
+ u = r
1106
+ while r:
1107
+ s = (s*s) >> prec
1108
+ r -= 1
1109
+ return s >> u
1110
+
1111
+ def exp_expneg_basecase(x, prec):
1112
+ """
1113
+ Computation of exp(x), exp(-x)
1114
+ """
1115
+ if prec > EXP_COSH_CUTOFF:
1116
+ cosh, sinh = exponential_series(x, prec, 1)
1117
+ return cosh+sinh, cosh-sinh
1118
+ a = exp_basecase(x, prec)
1119
+ b = (MPZ_ONE << (prec+prec)) // a
1120
+ return a, b
1121
+
1122
+ def cos_sin_basecase(x, prec):
1123
+ """
1124
+ Compute cos(x), sin(x) as fixed-point numbers, assuming x
1125
+ in [0, pi/2). For an arbitrary number, use x' = x - m*(pi/2)
1126
+ where m = floor(x/(pi/2)) along with quarter-period symmetries.
1127
+ """
1128
+ if prec > COS_SIN_CACHE_PREC:
1129
+ return exponential_series(x, prec, 2)
1130
+ precs = prec - COS_SIN_CACHE_STEP
1131
+ t = x >> precs
1132
+ n = int(t)
1133
+ if n not in cos_sin_cache:
1134
+ w = t<<(10+COS_SIN_CACHE_PREC-COS_SIN_CACHE_STEP)
1135
+ cos_t, sin_t = exponential_series(w, 10+COS_SIN_CACHE_PREC, 2)
1136
+ cos_sin_cache[n] = (cos_t>>10), (sin_t>>10)
1137
+ cos_t, sin_t = cos_sin_cache[n]
1138
+ offset = COS_SIN_CACHE_PREC - prec
1139
+ cos_t >>= offset
1140
+ sin_t >>= offset
1141
+ x -= t << precs
1142
+ cos = MPZ_ONE << prec
1143
+ sin = x
1144
+ k = 2
1145
+ a = -((x*x) >> prec)
1146
+ while a:
1147
+ a //= k; cos += a; k += 1; a = (a*x) >> prec
1148
+ a //= k; sin += a; k += 1; a = -((a*x) >> prec)
1149
+ return ((cos*cos_t-sin*sin_t) >> prec), ((sin*cos_t+cos*sin_t) >> prec)
1150
+
1151
+ def mpf_exp(x, prec, rnd=round_fast):
1152
+ sign, man, exp, bc = x
1153
+ if man:
1154
+ mag = bc + exp
1155
+ wp = prec + 14
1156
+ if sign:
1157
+ man = -man
1158
+ # TODO: the best cutoff depends on both x and the precision.
1159
+ if prec > 600 and exp >= 0:
1160
+ # Need about log2(exp(n)) ~= 1.45*mag extra precision
1161
+ e = mpf_e(wp+int(1.45*mag))
1162
+ return mpf_pow_int(e, man<<exp, prec, rnd)
1163
+ if mag < -wp:
1164
+ return mpf_perturb(fone, sign, prec, rnd)
1165
+ # |x| >= 2
1166
+ if mag > 1:
1167
+ # For large arguments: exp(2^mag*(1+eps)) =
1168
+ # exp(2^mag)*exp(2^mag*eps) = exp(2^mag)*(1 + 2^mag*eps + ...)
1169
+ # so about mag extra bits is required.
1170
+ wpmod = wp + mag
1171
+ offset = exp + wpmod
1172
+ if offset >= 0:
1173
+ t = man << offset
1174
+ else:
1175
+ t = man >> (-offset)
1176
+ lg2 = ln2_fixed(wpmod)
1177
+ n, t = divmod(t, lg2)
1178
+ n = int(n)
1179
+ t >>= mag
1180
+ else:
1181
+ offset = exp + wp
1182
+ if offset >= 0:
1183
+ t = man << offset
1184
+ else:
1185
+ t = man >> (-offset)
1186
+ n = 0
1187
+ man = exp_basecase(t, wp)
1188
+ return from_man_exp(man, n-wp, prec, rnd)
1189
+ if not exp:
1190
+ return fone
1191
+ if x == fninf:
1192
+ return fzero
1193
+ return x
1194
+
1195
+
1196
+ def mpf_cosh_sinh(x, prec, rnd=round_fast, tanh=0):
1197
+ """Simultaneously compute (cosh(x), sinh(x)) for real x"""
1198
+ sign, man, exp, bc = x
1199
+ if (not man) and exp:
1200
+ if tanh:
1201
+ if x == finf: return fone
1202
+ if x == fninf: return fnone
1203
+ return fnan
1204
+ if x == finf: return (finf, finf)
1205
+ if x == fninf: return (finf, fninf)
1206
+ return fnan, fnan
1207
+ mag = exp+bc
1208
+ wp = prec+14
1209
+ if mag < -4:
1210
+ # Extremely close to 0, sinh(x) ~= x and cosh(x) ~= 1
1211
+ if mag < -wp:
1212
+ if tanh:
1213
+ return mpf_perturb(x, 1-sign, prec, rnd)
1214
+ cosh = mpf_perturb(fone, 0, prec, rnd)
1215
+ sinh = mpf_perturb(x, sign, prec, rnd)
1216
+ return cosh, sinh
1217
+ # Fix for cancellation when computing sinh
1218
+ wp += (-mag)
1219
+ # Does exp(-2*x) vanish?
1220
+ if mag > 10:
1221
+ if 3*(1<<(mag-1)) > wp:
1222
+ # XXX: rounding
1223
+ if tanh:
1224
+ return mpf_perturb([fone,fnone][sign], 1-sign, prec, rnd)
1225
+ c = s = mpf_shift(mpf_exp(mpf_abs(x), prec, rnd), -1)
1226
+ if sign:
1227
+ s = mpf_neg(s)
1228
+ return c, s
1229
+ # |x| > 1
1230
+ if mag > 1:
1231
+ wpmod = wp + mag
1232
+ offset = exp + wpmod
1233
+ if offset >= 0:
1234
+ t = man << offset
1235
+ else:
1236
+ t = man >> (-offset)
1237
+ lg2 = ln2_fixed(wpmod)
1238
+ n, t = divmod(t, lg2)
1239
+ n = int(n)
1240
+ t >>= mag
1241
+ else:
1242
+ offset = exp + wp
1243
+ if offset >= 0:
1244
+ t = man << offset
1245
+ else:
1246
+ t = man >> (-offset)
1247
+ n = 0
1248
+ a, b = exp_expneg_basecase(t, wp)
1249
+ # TODO: optimize division precision
1250
+ cosh = a + (b>>(2*n))
1251
+ sinh = a - (b>>(2*n))
1252
+ if sign:
1253
+ sinh = -sinh
1254
+ if tanh:
1255
+ man = (sinh << wp) // cosh
1256
+ return from_man_exp(man, -wp, prec, rnd)
1257
+ else:
1258
+ cosh = from_man_exp(cosh, n-wp-1, prec, rnd)
1259
+ sinh = from_man_exp(sinh, n-wp-1, prec, rnd)
1260
+ return cosh, sinh
1261
+
1262
+
1263
+ def mod_pi2(man, exp, mag, wp):
1264
+ # Reduce to standard interval
1265
+ if mag > 0:
1266
+ i = 0
1267
+ while 1:
1268
+ cancellation_prec = 20 << i
1269
+ wpmod = wp + mag + cancellation_prec
1270
+ pi2 = pi_fixed(wpmod-1)
1271
+ pi4 = pi2 >> 1
1272
+ offset = wpmod + exp
1273
+ if offset >= 0:
1274
+ t = man << offset
1275
+ else:
1276
+ t = man >> (-offset)
1277
+ n, y = divmod(t, pi2)
1278
+ if y > pi4:
1279
+ small = pi2 - y
1280
+ else:
1281
+ small = y
1282
+ if small >> (wp+mag-10):
1283
+ n = int(n)
1284
+ t = y >> mag
1285
+ wp = wpmod - mag
1286
+ break
1287
+ i += 1
1288
+ else:
1289
+ wp += (-mag)
1290
+ offset = exp + wp
1291
+ if offset >= 0:
1292
+ t = man << offset
1293
+ else:
1294
+ t = man >> (-offset)
1295
+ n = 0
1296
+ return t, n, wp
1297
+
1298
+
1299
+ def mpf_cos_sin(x, prec, rnd=round_fast, which=0, pi=False):
1300
+ """
1301
+ which:
1302
+ 0 -- return cos(x), sin(x)
1303
+ 1 -- return cos(x)
1304
+ 2 -- return sin(x)
1305
+ 3 -- return tan(x)
1306
+
1307
+ if pi=True, compute for pi*x
1308
+ """
1309
+ sign, man, exp, bc = x
1310
+ if not man:
1311
+ if exp:
1312
+ c, s = fnan, fnan
1313
+ else:
1314
+ c, s = fone, fzero
1315
+ if which == 0: return c, s
1316
+ if which == 1: return c
1317
+ if which == 2: return s
1318
+ if which == 3: return s
1319
+
1320
+ mag = bc + exp
1321
+ wp = prec + 10
1322
+
1323
+ # Extremely small?
1324
+ if mag < 0:
1325
+ if mag < -wp:
1326
+ if pi:
1327
+ x = mpf_mul(x, mpf_pi(wp))
1328
+ c = mpf_perturb(fone, 1, prec, rnd)
1329
+ s = mpf_perturb(x, 1-sign, prec, rnd)
1330
+ if which == 0: return c, s
1331
+ if which == 1: return c
1332
+ if which == 2: return s
1333
+ if which == 3: return mpf_perturb(x, sign, prec, rnd)
1334
+ if pi:
1335
+ if exp >= -1:
1336
+ if exp == -1:
1337
+ c = fzero
1338
+ s = (fone, fnone)[bool(man & 2) ^ sign]
1339
+ elif exp == 0:
1340
+ c, s = (fnone, fzero)
1341
+ else:
1342
+ c, s = (fone, fzero)
1343
+ if which == 0: return c, s
1344
+ if which == 1: return c
1345
+ if which == 2: return s
1346
+ if which == 3: return mpf_div(s, c, prec, rnd)
1347
+ # Subtract nearest half-integer (= mod by pi/2)
1348
+ n = ((man >> (-exp-2)) + 1) >> 1
1349
+ man = man - (n << (-exp-1))
1350
+ mag2 = bitcount(man) + exp
1351
+ wp = prec + 10 - mag2
1352
+ offset = exp + wp
1353
+ if offset >= 0:
1354
+ t = man << offset
1355
+ else:
1356
+ t = man >> (-offset)
1357
+ t = (t*pi_fixed(wp)) >> wp
1358
+ else:
1359
+ t, n, wp = mod_pi2(man, exp, mag, wp)
1360
+ c, s = cos_sin_basecase(t, wp)
1361
+ m = n & 3
1362
+ if m == 1: c, s = -s, c
1363
+ elif m == 2: c, s = -c, -s
1364
+ elif m == 3: c, s = s, -c
1365
+ if sign:
1366
+ s = -s
1367
+ if which == 0:
1368
+ c = from_man_exp(c, -wp, prec, rnd)
1369
+ s = from_man_exp(s, -wp, prec, rnd)
1370
+ return c, s
1371
+ if which == 1:
1372
+ return from_man_exp(c, -wp, prec, rnd)
1373
+ if which == 2:
1374
+ return from_man_exp(s, -wp, prec, rnd)
1375
+ if which == 3:
1376
+ return from_rational(s, c, prec, rnd)
1377
+
1378
+ def mpf_cos(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 1)
1379
+ def mpf_sin(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 2)
1380
+ def mpf_tan(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 3)
1381
+ def mpf_cos_sin_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 0, 1)
1382
+ def mpf_cos_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 1, 1)
1383
+ def mpf_sin_pi(x, prec, rnd=round_fast): return mpf_cos_sin(x, prec, rnd, 2, 1)
1384
+ def mpf_cosh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd)[0]
1385
+ def mpf_sinh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd)[1]
1386
+ def mpf_tanh(x, prec, rnd=round_fast): return mpf_cosh_sinh(x, prec, rnd, tanh=1)
1387
+
1388
+
1389
+ # Low-overhead fixed-point versions
1390
+
1391
+ def cos_sin_fixed(x, prec, pi2=None):
1392
+ if pi2 is None:
1393
+ pi2 = pi_fixed(prec-1)
1394
+ n, t = divmod(x, pi2)
1395
+ n = int(n)
1396
+ c, s = cos_sin_basecase(t, prec)
1397
+ m = n & 3
1398
+ if m == 0: return c, s
1399
+ if m == 1: return -s, c
1400
+ if m == 2: return -c, -s
1401
+ if m == 3: return s, -c
1402
+
1403
+ def exp_fixed(x, prec, ln2=None):
1404
+ if ln2 is None:
1405
+ ln2 = ln2_fixed(prec)
1406
+ n, t = divmod(x, ln2)
1407
+ n = int(n)
1408
+ v = exp_basecase(t, prec)
1409
+ if n >= 0:
1410
+ return v << n
1411
+ else:
1412
+ return v >> (-n)
1413
+
1414
+
1415
+ if BACKEND == 'sage':
1416
+ try:
1417
+ import sage.libs.mpmath.ext_libmp as _lbmp
1418
+ mpf_sqrt = _lbmp.mpf_sqrt
1419
+ mpf_exp = _lbmp.mpf_exp
1420
+ mpf_log = _lbmp.mpf_log
1421
+ mpf_cos = _lbmp.mpf_cos
1422
+ mpf_sin = _lbmp.mpf_sin
1423
+ mpf_pow = _lbmp.mpf_pow
1424
+ exp_fixed = _lbmp.exp_fixed
1425
+ cos_sin_fixed = _lbmp.cos_sin_fixed
1426
+ log_int_fixed = _lbmp.log_int_fixed
1427
+ except (ImportError, AttributeError):
1428
+ print("Warning: Sage imports in libelefun failed")
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libhyper.py ADDED
@@ -0,0 +1,1150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module implements computation of hypergeometric and related
3
+ functions. In particular, it provides code for generic summation
4
+ of hypergeometric series. Optimized versions for various special
5
+ cases are also provided.
6
+ """
7
+
8
+ import operator
9
+ import math
10
+
11
+ from .backend import MPZ_ZERO, MPZ_ONE, BACKEND, xrange, exec_
12
+
13
+ from .libintmath import gcd
14
+
15
+ from .libmpf import (\
16
+ ComplexResult, round_fast, round_nearest,
17
+ negative_rnd, bitcount, to_fixed, from_man_exp, from_int, to_int,
18
+ from_rational,
19
+ fzero, fone, fnone, ftwo, finf, fninf, fnan,
20
+ mpf_sign, mpf_add, mpf_abs, mpf_pos,
21
+ mpf_cmp, mpf_lt, mpf_le, mpf_gt, mpf_min_max,
22
+ mpf_perturb, mpf_neg, mpf_shift, mpf_sub, mpf_mul, mpf_div,
23
+ sqrt_fixed, mpf_sqrt, mpf_rdiv_int, mpf_pow_int,
24
+ to_rational,
25
+ )
26
+
27
+ from .libelefun import (\
28
+ mpf_pi, mpf_exp, mpf_log, pi_fixed, mpf_cos_sin, mpf_cos, mpf_sin,
29
+ mpf_sqrt, agm_fixed,
30
+ )
31
+
32
+ from .libmpc import (\
33
+ mpc_one, mpc_sub, mpc_mul_mpf, mpc_mul, mpc_neg, complex_int_pow,
34
+ mpc_div, mpc_add_mpf, mpc_sub_mpf,
35
+ mpc_log, mpc_add, mpc_pos, mpc_shift,
36
+ mpc_is_infnan, mpc_zero, mpc_sqrt, mpc_abs,
37
+ mpc_mpf_div, mpc_square, mpc_exp
38
+ )
39
+
40
+ from .libintmath import ifac
41
+ from .gammazeta import mpf_gamma_int, mpf_euler, euler_fixed
42
+
43
+ class NoConvergence(Exception):
44
+ pass
45
+
46
+
47
+ #-----------------------------------------------------------------------#
48
+ # #
49
+ # Generic hypergeometric series #
50
+ # #
51
+ #-----------------------------------------------------------------------#
52
+
53
+ """
54
+ TODO:
55
+
56
+ 1. proper mpq parsing
57
+ 2. imaginary z special-cased (also: rational, integer?)
58
+ 3. more clever handling of series that don't converge because of stupid
59
+ upwards rounding
60
+ 4. checking for cancellation
61
+
62
+ """
63
+
64
+ def make_hyp_summator(key):
65
+ """
66
+ Returns a function that sums a generalized hypergeometric series,
67
+ for given parameter types (integer, rational, real, complex).
68
+
69
+ """
70
+ p, q, param_types, ztype = key
71
+
72
+ pstring = "".join(param_types)
73
+ fname = "hypsum_%i_%i_%s_%s_%s" % (p, q, pstring[:p], pstring[p:], ztype)
74
+ #print "generating hypsum", fname
75
+
76
+ have_complex_param = 'C' in param_types
77
+ have_complex_arg = ztype == 'C'
78
+ have_complex = have_complex_param or have_complex_arg
79
+
80
+ source = []
81
+ add = source.append
82
+
83
+ aint = []
84
+ arat = []
85
+ bint = []
86
+ brat = []
87
+ areal = []
88
+ breal = []
89
+ acomplex = []
90
+ bcomplex = []
91
+
92
+ #add("wp = prec + 40")
93
+ add("MAX = kwargs.get('maxterms', wp*100)")
94
+ add("HIGH = MPZ_ONE<<epsshift")
95
+ add("LOW = -HIGH")
96
+
97
+ # Setup code
98
+ add("SRE = PRE = one = (MPZ_ONE << wp)")
99
+ if have_complex:
100
+ add("SIM = PIM = MPZ_ZERO")
101
+
102
+ if have_complex_arg:
103
+ add("xsign, xm, xe, xbc = z[0]")
104
+ add("if xsign: xm = -xm")
105
+ add("ysign, ym, ye, ybc = z[1]")
106
+ add("if ysign: ym = -ym")
107
+ else:
108
+ add("xsign, xm, xe, xbc = z")
109
+ add("if xsign: xm = -xm")
110
+
111
+ add("offset = xe + wp")
112
+ add("if offset >= 0:")
113
+ add(" ZRE = xm << offset")
114
+ add("else:")
115
+ add(" ZRE = xm >> (-offset)")
116
+ if have_complex_arg:
117
+ add("offset = ye + wp")
118
+ add("if offset >= 0:")
119
+ add(" ZIM = ym << offset")
120
+ add("else:")
121
+ add(" ZIM = ym >> (-offset)")
122
+
123
+ for i, flag in enumerate(param_types):
124
+ W = ["A", "B"][i >= p]
125
+ if flag == 'Z':
126
+ ([aint,bint][i >= p]).append(i)
127
+ add("%sINT_%i = coeffs[%i]" % (W, i, i))
128
+ elif flag == 'Q':
129
+ ([arat,brat][i >= p]).append(i)
130
+ add("%sP_%i, %sQ_%i = coeffs[%i]._mpq_" % (W, i, W, i, i))
131
+ elif flag == 'R':
132
+ ([areal,breal][i >= p]).append(i)
133
+ add("xsign, xm, xe, xbc = coeffs[%i]._mpf_" % i)
134
+ add("if xsign: xm = -xm")
135
+ add("offset = xe + wp")
136
+ add("if offset >= 0:")
137
+ add(" %sREAL_%i = xm << offset" % (W, i))
138
+ add("else:")
139
+ add(" %sREAL_%i = xm >> (-offset)" % (W, i))
140
+ elif flag == 'C':
141
+ ([acomplex,bcomplex][i >= p]).append(i)
142
+ add("__re, __im = coeffs[%i]._mpc_" % i)
143
+ add("xsign, xm, xe, xbc = __re")
144
+ add("if xsign: xm = -xm")
145
+ add("ysign, ym, ye, ybc = __im")
146
+ add("if ysign: ym = -ym")
147
+
148
+ add("offset = xe + wp")
149
+ add("if offset >= 0:")
150
+ add(" %sCRE_%i = xm << offset" % (W, i))
151
+ add("else:")
152
+ add(" %sCRE_%i = xm >> (-offset)" % (W, i))
153
+ add("offset = ye + wp")
154
+ add("if offset >= 0:")
155
+ add(" %sCIM_%i = ym << offset" % (W, i))
156
+ add("else:")
157
+ add(" %sCIM_%i = ym >> (-offset)" % (W, i))
158
+ else:
159
+ raise ValueError
160
+
161
+ l_areal = len(areal)
162
+ l_breal = len(breal)
163
+ cancellable_real = min(l_areal, l_breal)
164
+ noncancellable_real_num = areal[cancellable_real:]
165
+ noncancellable_real_den = breal[cancellable_real:]
166
+
167
+ # LOOP
168
+ add("for n in xrange(1,10**8):")
169
+
170
+ add(" if n in magnitude_check:")
171
+ add(" p_mag = bitcount(abs(PRE))")
172
+ if have_complex:
173
+ add(" p_mag = max(p_mag, bitcount(abs(PIM)))")
174
+ add(" magnitude_check[n] = wp-p_mag")
175
+
176
+ # Real factors
177
+ multiplier = " * ".join(["AINT_#".replace("#", str(i)) for i in aint] + \
178
+ ["AP_#".replace("#", str(i)) for i in arat] + \
179
+ ["BQ_#".replace("#", str(i)) for i in brat])
180
+
181
+ divisor = " * ".join(["BINT_#".replace("#", str(i)) for i in bint] + \
182
+ ["BP_#".replace("#", str(i)) for i in brat] + \
183
+ ["AQ_#".replace("#", str(i)) for i in arat] + ["n"])
184
+
185
+ if multiplier:
186
+ add(" mul = " + multiplier)
187
+ add(" div = " + divisor)
188
+
189
+ # Check for singular terms
190
+ add(" if not div:")
191
+ if multiplier:
192
+ add(" if not mul:")
193
+ add(" break")
194
+ add(" raise ZeroDivisionError")
195
+
196
+ # Update product
197
+ if have_complex:
198
+
199
+ # TODO: when there are several real parameters and just a few complex
200
+ # (maybe just the complex argument), we only need to do about
201
+ # half as many ops if we accumulate the real factor in a single real variable
202
+ for k in range(cancellable_real): add(" PRE = PRE * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
203
+ for i in noncancellable_real_num: add(" PRE = (PRE * AREAL_#) >> wp".replace("#", str(i)))
204
+ for i in noncancellable_real_den: add(" PRE = (PRE << wp) // BREAL_#".replace("#", str(i)))
205
+ for k in range(cancellable_real): add(" PIM = PIM * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
206
+ for i in noncancellable_real_num: add(" PIM = (PIM * AREAL_#) >> wp".replace("#", str(i)))
207
+ for i in noncancellable_real_den: add(" PIM = (PIM << wp) // BREAL_#".replace("#", str(i)))
208
+
209
+ if multiplier:
210
+ if have_complex_arg:
211
+ add(" PRE, PIM = (mul*(PRE*ZRE-PIM*ZIM))//div, (mul*(PIM*ZRE+PRE*ZIM))//div")
212
+ add(" PRE >>= wp")
213
+ add(" PIM >>= wp")
214
+ else:
215
+ add(" PRE = ((mul * PRE * ZRE) >> wp) // div")
216
+ add(" PIM = ((mul * PIM * ZRE) >> wp) // div")
217
+ else:
218
+ if have_complex_arg:
219
+ add(" PRE, PIM = (PRE*ZRE-PIM*ZIM)//div, (PIM*ZRE+PRE*ZIM)//div")
220
+ add(" PRE >>= wp")
221
+ add(" PIM >>= wp")
222
+ else:
223
+ add(" PRE = ((PRE * ZRE) >> wp) // div")
224
+ add(" PIM = ((PIM * ZRE) >> wp) // div")
225
+
226
+ for i in acomplex:
227
+ add(" PRE, PIM = PRE*ACRE_#-PIM*ACIM_#, PIM*ACRE_#+PRE*ACIM_#".replace("#", str(i)))
228
+ add(" PRE >>= wp")
229
+ add(" PIM >>= wp")
230
+
231
+ for i in bcomplex:
232
+ add(" mag = BCRE_#*BCRE_#+BCIM_#*BCIM_#".replace("#", str(i)))
233
+ add(" re = PRE*BCRE_# + PIM*BCIM_#".replace("#", str(i)))
234
+ add(" im = PIM*BCRE_# - PRE*BCIM_#".replace("#", str(i)))
235
+ add(" PRE = (re << wp) // mag".replace("#", str(i)))
236
+ add(" PIM = (im << wp) // mag".replace("#", str(i)))
237
+
238
+ else:
239
+ for k in range(cancellable_real): add(" PRE = PRE * AREAL_%i // BREAL_%i" % (areal[k], breal[k]))
240
+ for i in noncancellable_real_num: add(" PRE = (PRE * AREAL_#) >> wp".replace("#", str(i)))
241
+ for i in noncancellable_real_den: add(" PRE = (PRE << wp) // BREAL_#".replace("#", str(i)))
242
+ if multiplier:
243
+ add(" PRE = ((PRE * mul * ZRE) >> wp) // div")
244
+ else:
245
+ add(" PRE = ((PRE * ZRE) >> wp) // div")
246
+
247
+ # Add product to sum
248
+ if have_complex:
249
+ add(" SRE += PRE")
250
+ add(" SIM += PIM")
251
+ add(" if (HIGH > PRE > LOW) and (HIGH > PIM > LOW):")
252
+ add(" break")
253
+ else:
254
+ add(" SRE += PRE")
255
+ add(" if HIGH > PRE > LOW:")
256
+ add(" break")
257
+
258
+ #add(" from mpmath import nprint, log, ldexp")
259
+ #add(" nprint([n, log(abs(PRE),2), ldexp(PRE,-wp)])")
260
+
261
+ add(" if n > MAX:")
262
+ add(" raise NoConvergence('Hypergeometric series converges too slowly. Try increasing maxterms.')")
263
+
264
+ # +1 all parameters for next loop
265
+ for i in aint: add(" AINT_# += 1".replace("#", str(i)))
266
+ for i in bint: add(" BINT_# += 1".replace("#", str(i)))
267
+ for i in arat: add(" AP_# += AQ_#".replace("#", str(i)))
268
+ for i in brat: add(" BP_# += BQ_#".replace("#", str(i)))
269
+ for i in areal: add(" AREAL_# += one".replace("#", str(i)))
270
+ for i in breal: add(" BREAL_# += one".replace("#", str(i)))
271
+ for i in acomplex: add(" ACRE_# += one".replace("#", str(i)))
272
+ for i in bcomplex: add(" BCRE_# += one".replace("#", str(i)))
273
+
274
+ if have_complex:
275
+ add("a = from_man_exp(SRE, -wp, prec, 'n')")
276
+ add("b = from_man_exp(SIM, -wp, prec, 'n')")
277
+
278
+ add("if SRE:")
279
+ add(" if SIM:")
280
+ add(" magn = max(a[2]+a[3], b[2]+b[3])")
281
+ add(" else:")
282
+ add(" magn = a[2]+a[3]")
283
+ add("elif SIM:")
284
+ add(" magn = b[2]+b[3]")
285
+ add("else:")
286
+ add(" magn = -wp+1")
287
+
288
+ add("return (a, b), True, magn")
289
+ else:
290
+ add("a = from_man_exp(SRE, -wp, prec, 'n')")
291
+
292
+ add("if SRE:")
293
+ add(" magn = a[2]+a[3]")
294
+ add("else:")
295
+ add(" magn = -wp+1")
296
+
297
+ add("return a, False, magn")
298
+
299
+ source = "\n".join((" " + line) for line in source)
300
+ source = ("def %s(coeffs, z, prec, wp, epsshift, magnitude_check, **kwargs):\n" % fname) + source
301
+
302
+ namespace = {}
303
+
304
+ exec_(source, globals(), namespace)
305
+
306
+ #print source
307
+ return source, namespace[fname]
308
+
309
+
310
+ if BACKEND == 'sage':
311
+
312
+ def make_hyp_summator(key):
313
+ """
314
+ Returns a function that sums a generalized hypergeometric series,
315
+ for given parameter types (integer, rational, real, complex).
316
+ """
317
+ from sage.libs.mpmath.ext_main import hypsum_internal
318
+ p, q, param_types, ztype = key
319
+ def _hypsum(coeffs, z, prec, wp, epsshift, magnitude_check, **kwargs):
320
+ return hypsum_internal(p, q, param_types, ztype, coeffs, z,
321
+ prec, wp, epsshift, magnitude_check, kwargs)
322
+
323
+ return "(none)", _hypsum
324
+
325
+
326
+ #-----------------------------------------------------------------------#
327
+ # #
328
+ # Error functions #
329
+ # #
330
+ #-----------------------------------------------------------------------#
331
+
332
+ # TODO: mpf_erf should call mpf_erfc when appropriate (currently
333
+ # only the converse delegation is implemented)
334
+
335
+ def mpf_erf(x, prec, rnd=round_fast):
336
+ sign, man, exp, bc = x
337
+ if not man:
338
+ if x == fzero: return fzero
339
+ if x == finf: return fone
340
+ if x== fninf: return fnone
341
+ return fnan
342
+ size = exp + bc
343
+ lg = math.log
344
+ # The approximation erf(x) = 1 is accurate to > x^2 * log(e,2) bits
345
+ if size > 3 and 2*(size-1) + 0.528766 > lg(prec,2):
346
+ if sign:
347
+ return mpf_perturb(fnone, 0, prec, rnd)
348
+ else:
349
+ return mpf_perturb(fone, 1, prec, rnd)
350
+ # erf(x) ~ 2*x/sqrt(pi) close to 0
351
+ if size < -prec:
352
+ # 2*x
353
+ x = mpf_shift(x,1)
354
+ c = mpf_sqrt(mpf_pi(prec+20), prec+20)
355
+ # TODO: interval rounding
356
+ return mpf_div(x, c, prec, rnd)
357
+ wp = prec + abs(size) + 25
358
+ # Taylor series for erf, fixed-point summation
359
+ t = abs(to_fixed(x, wp))
360
+ t2 = (t*t) >> wp
361
+ s, term, k = t, 12345, 1
362
+ while term:
363
+ t = ((t * t2) >> wp) // k
364
+ term = t // (2*k+1)
365
+ if k & 1:
366
+ s -= term
367
+ else:
368
+ s += term
369
+ k += 1
370
+ s = (s << (wp+1)) // sqrt_fixed(pi_fixed(wp), wp)
371
+ if sign:
372
+ s = -s
373
+ return from_man_exp(s, -wp, prec, rnd)
374
+
375
+ # If possible, we use the asymptotic series for erfc.
376
+ # This is an alternating divergent asymptotic series, so
377
+ # the error is at most equal to the first omitted term.
378
+ # Here we check if the smallest term is small enough
379
+ # for a given x and precision
380
+ def erfc_check_series(x, prec):
381
+ n = to_int(x)
382
+ if n**2 * 1.44 > prec:
383
+ return True
384
+ return False
385
+
386
+ def mpf_erfc(x, prec, rnd=round_fast):
387
+ sign, man, exp, bc = x
388
+ if not man:
389
+ if x == fzero: return fone
390
+ if x == finf: return fzero
391
+ if x == fninf: return ftwo
392
+ return fnan
393
+ wp = prec + 20
394
+ mag = bc+exp
395
+ # Preserve full accuracy when exponent grows huge
396
+ wp += max(0, 2*mag)
397
+ regular_erf = sign or mag < 2
398
+ if regular_erf or not erfc_check_series(x, wp):
399
+ if regular_erf:
400
+ return mpf_sub(fone, mpf_erf(x, prec+10, negative_rnd[rnd]), prec, rnd)
401
+ # 1-erf(x) ~ exp(-x^2), increase prec to deal with cancellation
402
+ n = to_int(x)+1
403
+ return mpf_sub(fone, mpf_erf(x, prec + int(n**2*1.44) + 10), prec, rnd)
404
+ s = term = MPZ_ONE << wp
405
+ term_prev = 0
406
+ t = (2 * to_fixed(x, wp) ** 2) >> wp
407
+ k = 1
408
+ while 1:
409
+ term = ((term * (2*k - 1)) << wp) // t
410
+ if k > 4 and term > term_prev or not term:
411
+ break
412
+ if k & 1:
413
+ s -= term
414
+ else:
415
+ s += term
416
+ term_prev = term
417
+ #print k, to_str(from_man_exp(term, -wp, 50), 10)
418
+ k += 1
419
+ s = (s << wp) // sqrt_fixed(pi_fixed(wp), wp)
420
+ s = from_man_exp(s, -wp, wp)
421
+ z = mpf_exp(mpf_neg(mpf_mul(x,x,wp),wp),wp)
422
+ y = mpf_div(mpf_mul(z, s, wp), x, prec, rnd)
423
+ return y
424
+
425
+
426
+ #-----------------------------------------------------------------------#
427
+ # #
428
+ # Exponential integrals #
429
+ # #
430
+ #-----------------------------------------------------------------------#
431
+
432
+ def ei_taylor(x, prec):
433
+ s = t = x
434
+ k = 2
435
+ while t:
436
+ t = ((t*x) >> prec) // k
437
+ s += t // k
438
+ k += 1
439
+ return s
440
+
441
+ def complex_ei_taylor(zre, zim, prec):
442
+ _abs = abs
443
+ sre = tre = zre
444
+ sim = tim = zim
445
+ k = 2
446
+ while _abs(tre) + _abs(tim) > 5:
447
+ tre, tim = ((tre*zre-tim*zim)//k)>>prec, ((tre*zim+tim*zre)//k)>>prec
448
+ sre += tre // k
449
+ sim += tim // k
450
+ k += 1
451
+ return sre, sim
452
+
453
+ def ei_asymptotic(x, prec):
454
+ one = MPZ_ONE << prec
455
+ x = t = ((one << prec) // x)
456
+ s = one + x
457
+ k = 2
458
+ while t:
459
+ t = (k*t*x) >> prec
460
+ s += t
461
+ k += 1
462
+ return s
463
+
464
+ def complex_ei_asymptotic(zre, zim, prec):
465
+ _abs = abs
466
+ one = MPZ_ONE << prec
467
+ M = (zim*zim + zre*zre) >> prec
468
+ # 1 / z
469
+ xre = tre = (zre << prec) // M
470
+ xim = tim = ((-zim) << prec) // M
471
+ sre = one + xre
472
+ sim = xim
473
+ k = 2
474
+ while _abs(tre) + _abs(tim) > 1000:
475
+ #print tre, tim
476
+ tre, tim = ((tre*xre-tim*xim)*k)>>prec, ((tre*xim+tim*xre)*k)>>prec
477
+ sre += tre
478
+ sim += tim
479
+ k += 1
480
+ if k > prec:
481
+ raise NoConvergence
482
+ return sre, sim
483
+
484
+ def mpf_ei(x, prec, rnd=round_fast, e1=False):
485
+ if e1:
486
+ x = mpf_neg(x)
487
+ sign, man, exp, bc = x
488
+ if e1 and not sign:
489
+ if x == fzero:
490
+ return finf
491
+ raise ComplexResult("E1(x) for x < 0")
492
+ if man:
493
+ xabs = 0, man, exp, bc
494
+ xmag = exp+bc
495
+ wp = prec + 20
496
+ can_use_asymp = xmag > wp
497
+ if not can_use_asymp:
498
+ if exp >= 0:
499
+ xabsint = man << exp
500
+ else:
501
+ xabsint = man >> (-exp)
502
+ can_use_asymp = xabsint > int(wp*0.693) + 10
503
+ if can_use_asymp:
504
+ if xmag > wp:
505
+ v = fone
506
+ else:
507
+ v = from_man_exp(ei_asymptotic(to_fixed(x, wp), wp), -wp)
508
+ v = mpf_mul(v, mpf_exp(x, wp), wp)
509
+ v = mpf_div(v, x, prec, rnd)
510
+ else:
511
+ wp += 2*int(to_int(xabs))
512
+ u = to_fixed(x, wp)
513
+ v = ei_taylor(u, wp) + euler_fixed(wp)
514
+ t1 = from_man_exp(v,-wp)
515
+ t2 = mpf_log(xabs,wp)
516
+ v = mpf_add(t1, t2, prec, rnd)
517
+ else:
518
+ if x == fzero: v = fninf
519
+ elif x == finf: v = finf
520
+ elif x == fninf: v = fzero
521
+ else: v = fnan
522
+ if e1:
523
+ v = mpf_neg(v)
524
+ return v
525
+
526
+ def mpc_ei(z, prec, rnd=round_fast, e1=False):
527
+ if e1:
528
+ z = mpc_neg(z)
529
+ a, b = z
530
+ asign, aman, aexp, abc = a
531
+ bsign, bman, bexp, bbc = b
532
+ if b == fzero:
533
+ if e1:
534
+ x = mpf_neg(mpf_ei(a, prec, rnd))
535
+ if not asign:
536
+ y = mpf_neg(mpf_pi(prec, rnd))
537
+ else:
538
+ y = fzero
539
+ return x, y
540
+ else:
541
+ return mpf_ei(a, prec, rnd), fzero
542
+ if a != fzero:
543
+ if not aman or not bman:
544
+ return (fnan, fnan)
545
+ wp = prec + 40
546
+ amag = aexp+abc
547
+ bmag = bexp+bbc
548
+ zmag = max(amag, bmag)
549
+ can_use_asymp = zmag > wp
550
+ if not can_use_asymp:
551
+ zabsint = abs(to_int(a)) + abs(to_int(b))
552
+ can_use_asymp = zabsint > int(wp*0.693) + 20
553
+ try:
554
+ if can_use_asymp:
555
+ if zmag > wp:
556
+ v = fone, fzero
557
+ else:
558
+ zre = to_fixed(a, wp)
559
+ zim = to_fixed(b, wp)
560
+ vre, vim = complex_ei_asymptotic(zre, zim, wp)
561
+ v = from_man_exp(vre, -wp), from_man_exp(vim, -wp)
562
+ v = mpc_mul(v, mpc_exp(z, wp), wp)
563
+ v = mpc_div(v, z, wp)
564
+ if e1:
565
+ v = mpc_neg(v, prec, rnd)
566
+ else:
567
+ x, y = v
568
+ if bsign:
569
+ v = mpf_pos(x, prec, rnd), mpf_sub(y, mpf_pi(wp), prec, rnd)
570
+ else:
571
+ v = mpf_pos(x, prec, rnd), mpf_add(y, mpf_pi(wp), prec, rnd)
572
+ return v
573
+ except NoConvergence:
574
+ pass
575
+ #wp += 2*max(0,zmag)
576
+ wp += 2*int(to_int(mpc_abs(z, 5)))
577
+ zre = to_fixed(a, wp)
578
+ zim = to_fixed(b, wp)
579
+ vre, vim = complex_ei_taylor(zre, zim, wp)
580
+ vre += euler_fixed(wp)
581
+ v = from_man_exp(vre,-wp), from_man_exp(vim,-wp)
582
+ if e1:
583
+ u = mpc_log(mpc_neg(z),wp)
584
+ else:
585
+ u = mpc_log(z,wp)
586
+ v = mpc_add(v, u, prec, rnd)
587
+ if e1:
588
+ v = mpc_neg(v)
589
+ return v
590
+
591
+ def mpf_e1(x, prec, rnd=round_fast):
592
+ return mpf_ei(x, prec, rnd, True)
593
+
594
+ def mpc_e1(x, prec, rnd=round_fast):
595
+ return mpc_ei(x, prec, rnd, True)
596
+
597
+ def mpf_expint(n, x, prec, rnd=round_fast, gamma=False):
598
+ """
599
+ E_n(x), n an integer, x real
600
+
601
+ With gamma=True, computes Gamma(n,x) (upper incomplete gamma function)
602
+
603
+ Returns (real, None) if real, otherwise (real, imag)
604
+ The imaginary part is an optional branch cut term
605
+
606
+ """
607
+ sign, man, exp, bc = x
608
+ if not man:
609
+ if gamma:
610
+ if x == fzero:
611
+ # Actually gamma function pole
612
+ if n <= 0:
613
+ return finf, None
614
+ return mpf_gamma_int(n, prec, rnd), None
615
+ if x == finf:
616
+ return fzero, None
617
+ # TODO: could return finite imaginary value at -inf
618
+ return fnan, fnan
619
+ else:
620
+ if x == fzero:
621
+ if n > 1:
622
+ return from_rational(1, n-1, prec, rnd), None
623
+ else:
624
+ return finf, None
625
+ if x == finf:
626
+ return fzero, None
627
+ return fnan, fnan
628
+ n_orig = n
629
+ if gamma:
630
+ n = 1-n
631
+ wp = prec + 20
632
+ xmag = exp + bc
633
+ # Beware of near-poles
634
+ if xmag < -10:
635
+ raise NotImplementedError
636
+ nmag = bitcount(abs(n))
637
+ have_imag = n > 0 and sign
638
+ negx = mpf_neg(x)
639
+ # Skip series if direct convergence
640
+ if n == 0 or 2*nmag - xmag < -wp:
641
+ if gamma:
642
+ v = mpf_exp(negx, wp)
643
+ re = mpf_mul(v, mpf_pow_int(x, n_orig-1, wp), prec, rnd)
644
+ else:
645
+ v = mpf_exp(negx, wp)
646
+ re = mpf_div(v, x, prec, rnd)
647
+ else:
648
+ # Finite number of terms, or...
649
+ can_use_asymptotic_series = -3*wp < n <= 0
650
+ # ...large enough?
651
+ if not can_use_asymptotic_series:
652
+ xi = abs(to_int(x))
653
+ m = min(max(1, xi-n), 2*wp)
654
+ siz = -n*nmag + (m+n)*bitcount(abs(m+n)) - m*xmag - (144*m//100)
655
+ tol = -wp-10
656
+ can_use_asymptotic_series = siz < tol
657
+ if can_use_asymptotic_series:
658
+ r = ((-MPZ_ONE) << (wp+wp)) // to_fixed(x, wp)
659
+ m = n
660
+ t = r*m
661
+ s = MPZ_ONE << wp
662
+ while m and t:
663
+ s += t
664
+ m += 1
665
+ t = (m*r*t) >> wp
666
+ v = mpf_exp(negx, wp)
667
+ if gamma:
668
+ # ~ exp(-x) * x^(n-1) * (1 + ...)
669
+ v = mpf_mul(v, mpf_pow_int(x, n_orig-1, wp), wp)
670
+ else:
671
+ # ~ exp(-x)/x * (1 + ...)
672
+ v = mpf_div(v, x, wp)
673
+ re = mpf_mul(v, from_man_exp(s, -wp), prec, rnd)
674
+ elif n == 1:
675
+ re = mpf_neg(mpf_ei(negx, prec, rnd))
676
+ elif n > 0 and n < 3*wp:
677
+ T1 = mpf_neg(mpf_ei(negx, wp))
678
+ if gamma:
679
+ if n_orig & 1:
680
+ T1 = mpf_neg(T1)
681
+ else:
682
+ T1 = mpf_mul(T1, mpf_pow_int(negx, n-1, wp), wp)
683
+ r = t = to_fixed(x, wp)
684
+ facs = [1] * (n-1)
685
+ for k in range(1,n-1):
686
+ facs[k] = facs[k-1] * k
687
+ facs = facs[::-1]
688
+ s = facs[0] << wp
689
+ for k in range(1, n-1):
690
+ if k & 1:
691
+ s -= facs[k] * t
692
+ else:
693
+ s += facs[k] * t
694
+ t = (t*r) >> wp
695
+ T2 = from_man_exp(s, -wp, wp)
696
+ T2 = mpf_mul(T2, mpf_exp(negx, wp))
697
+ if gamma:
698
+ T2 = mpf_mul(T2, mpf_pow_int(x, n_orig, wp), wp)
699
+ R = mpf_add(T1, T2)
700
+ re = mpf_div(R, from_int(ifac(n-1)), prec, rnd)
701
+ else:
702
+ raise NotImplementedError
703
+ if have_imag:
704
+ M = from_int(-ifac(n-1))
705
+ if gamma:
706
+ im = mpf_div(mpf_pi(wp), M, prec, rnd)
707
+ if n_orig & 1:
708
+ im = mpf_neg(im)
709
+ else:
710
+ im = mpf_div(mpf_mul(mpf_pi(wp), mpf_pow_int(negx, n_orig-1, wp), wp), M, prec, rnd)
711
+ return re, im
712
+ else:
713
+ return re, None
714
+
715
+ def mpf_ci_si_taylor(x, wp, which=0):
716
+ """
717
+ 0 - Ci(x) - (euler+log(x))
718
+ 1 - Si(x)
719
+ """
720
+ x = to_fixed(x, wp)
721
+ x2 = -(x*x) >> wp
722
+ if which == 0:
723
+ s, t, k = 0, (MPZ_ONE<<wp), 2
724
+ else:
725
+ s, t, k = x, x, 3
726
+ while t:
727
+ t = (t*x2//(k*(k-1)))>>wp
728
+ s += t//k
729
+ k += 2
730
+ return from_man_exp(s, -wp)
731
+
732
+ def mpc_ci_si_taylor(re, im, wp, which=0):
733
+ # The following code is only designed for small arguments,
734
+ # and not too small arguments (for relative accuracy)
735
+ if re[1]:
736
+ mag = re[2]+re[3]
737
+ elif im[1]:
738
+ mag = im[2]+im[3]
739
+ if im[1]:
740
+ mag = max(mag, im[2]+im[3])
741
+ if mag > 2 or mag < -wp:
742
+ raise NotImplementedError
743
+ wp += (2-mag)
744
+ zre = to_fixed(re, wp)
745
+ zim = to_fixed(im, wp)
746
+ z2re = (zim*zim-zre*zre)>>wp
747
+ z2im = (-2*zre*zim)>>wp
748
+ tre = zre
749
+ tim = zim
750
+ one = MPZ_ONE<<wp
751
+ if which == 0:
752
+ sre, sim, tre, tim, k = 0, 0, (MPZ_ONE<<wp), 0, 2
753
+ else:
754
+ sre, sim, tre, tim, k = zre, zim, zre, zim, 3
755
+ while max(abs(tre), abs(tim)) > 2:
756
+ f = k*(k-1)
757
+ tre, tim = ((tre*z2re-tim*z2im)//f)>>wp, ((tre*z2im+tim*z2re)//f)>>wp
758
+ sre += tre//k
759
+ sim += tim//k
760
+ k += 2
761
+ return from_man_exp(sre, -wp), from_man_exp(sim, -wp)
762
+
763
+ def mpf_ci_si(x, prec, rnd=round_fast, which=2):
764
+ """
765
+ Calculation of Ci(x), Si(x) for real x.
766
+
767
+ which = 0 -- returns (Ci(x), -)
768
+ which = 1 -- returns (Si(x), -)
769
+ which = 2 -- returns (Ci(x), Si(x))
770
+
771
+ Note: if x < 0, Ci(x) needs an additional imaginary term, pi*i.
772
+ """
773
+ wp = prec + 20
774
+ sign, man, exp, bc = x
775
+ ci, si = None, None
776
+ if not man:
777
+ if x == fzero:
778
+ return (fninf, fzero)
779
+ if x == fnan:
780
+ return (x, x)
781
+ ci = fzero
782
+ if which != 0:
783
+ if x == finf:
784
+ si = mpf_shift(mpf_pi(prec, rnd), -1)
785
+ if x == fninf:
786
+ si = mpf_neg(mpf_shift(mpf_pi(prec, negative_rnd[rnd]), -1))
787
+ return (ci, si)
788
+ # For small x: Ci(x) ~ euler + log(x), Si(x) ~ x
789
+ mag = exp+bc
790
+ if mag < -wp:
791
+ if which != 0:
792
+ si = mpf_perturb(x, 1-sign, prec, rnd)
793
+ if which != 1:
794
+ y = mpf_euler(wp)
795
+ xabs = mpf_abs(x)
796
+ ci = mpf_add(y, mpf_log(xabs, wp), prec, rnd)
797
+ return ci, si
798
+ # For huge x: Ci(x) ~ sin(x)/x, Si(x) ~ pi/2
799
+ elif mag > wp:
800
+ if which != 0:
801
+ if sign:
802
+ si = mpf_neg(mpf_pi(prec, negative_rnd[rnd]))
803
+ else:
804
+ si = mpf_pi(prec, rnd)
805
+ si = mpf_shift(si, -1)
806
+ if which != 1:
807
+ ci = mpf_div(mpf_sin(x, wp), x, prec, rnd)
808
+ return ci, si
809
+ else:
810
+ wp += abs(mag)
811
+ # Use an asymptotic series? The smallest value of n!/x^n
812
+ # occurs for n ~ x, where the magnitude is ~ exp(-x).
813
+ asymptotic = mag-1 > math.log(wp, 2)
814
+ # Case 1: convergent series near 0
815
+ if not asymptotic:
816
+ if which != 0:
817
+ si = mpf_pos(mpf_ci_si_taylor(x, wp, 1), prec, rnd)
818
+ if which != 1:
819
+ ci = mpf_ci_si_taylor(x, wp, 0)
820
+ ci = mpf_add(ci, mpf_euler(wp), wp)
821
+ ci = mpf_add(ci, mpf_log(mpf_abs(x), wp), prec, rnd)
822
+ return ci, si
823
+ x = mpf_abs(x)
824
+ # Case 2: asymptotic series for x >> 1
825
+ xf = to_fixed(x, wp)
826
+ xr = (MPZ_ONE<<(2*wp)) // xf # 1/x
827
+ s1 = (MPZ_ONE << wp)
828
+ s2 = xr
829
+ t = xr
830
+ k = 2
831
+ while t:
832
+ t = -t
833
+ t = (t*xr*k)>>wp
834
+ k += 1
835
+ s1 += t
836
+ t = (t*xr*k)>>wp
837
+ k += 1
838
+ s2 += t
839
+ s1 = from_man_exp(s1, -wp)
840
+ s2 = from_man_exp(s2, -wp)
841
+ s1 = mpf_div(s1, x, wp)
842
+ s2 = mpf_div(s2, x, wp)
843
+ cos, sin = mpf_cos_sin(x, wp)
844
+ # Ci(x) = sin(x)*s1-cos(x)*s2
845
+ # Si(x) = pi/2-cos(x)*s1-sin(x)*s2
846
+ if which != 0:
847
+ si = mpf_add(mpf_mul(cos, s1), mpf_mul(sin, s2), wp)
848
+ si = mpf_sub(mpf_shift(mpf_pi(wp), -1), si, wp)
849
+ if sign:
850
+ si = mpf_neg(si)
851
+ si = mpf_pos(si, prec, rnd)
852
+ if which != 1:
853
+ ci = mpf_sub(mpf_mul(sin, s1), mpf_mul(cos, s2), prec, rnd)
854
+ return ci, si
855
+
856
+ def mpf_ci(x, prec, rnd=round_fast):
857
+ if mpf_sign(x) < 0:
858
+ raise ComplexResult
859
+ return mpf_ci_si(x, prec, rnd, 0)[0]
860
+
861
+ def mpf_si(x, prec, rnd=round_fast):
862
+ return mpf_ci_si(x, prec, rnd, 1)[1]
863
+
864
+ def mpc_ci(z, prec, rnd=round_fast):
865
+ re, im = z
866
+ if im == fzero:
867
+ ci = mpf_ci_si(re, prec, rnd, 0)[0]
868
+ if mpf_sign(re) < 0:
869
+ return (ci, mpf_pi(prec, rnd))
870
+ return (ci, fzero)
871
+ wp = prec + 20
872
+ cre, cim = mpc_ci_si_taylor(re, im, wp, 0)
873
+ cre = mpf_add(cre, mpf_euler(wp), wp)
874
+ ci = mpc_add((cre, cim), mpc_log(z, wp), prec, rnd)
875
+ return ci
876
+
877
+ def mpc_si(z, prec, rnd=round_fast):
878
+ re, im = z
879
+ if im == fzero:
880
+ return (mpf_ci_si(re, prec, rnd, 1)[1], fzero)
881
+ wp = prec + 20
882
+ z = mpc_ci_si_taylor(re, im, wp, 1)
883
+ return mpc_pos(z, prec, rnd)
884
+
885
+
886
+ #-----------------------------------------------------------------------#
887
+ # #
888
+ # Bessel functions #
889
+ # #
890
+ #-----------------------------------------------------------------------#
891
+
892
+ # A Bessel function of the first kind of integer order, J_n(x), is
893
+ # given by the power series
894
+
895
+ # oo
896
+ # ___ k 2 k + n
897
+ # \ (-1) / x \
898
+ # J_n(x) = ) ----------- | - |
899
+ # /___ k! (k + n)! \ 2 /
900
+ # k = 0
901
+
902
+ # Simplifying the quotient between two successive terms gives the
903
+ # ratio x^2 / (-4*k*(k+n)). Hence, we only need one full-precision
904
+ # multiplication and one division by a small integer per term.
905
+ # The complex version is very similar, the only difference being
906
+ # that the multiplication is actually 4 multiplies.
907
+
908
+ # In the general case, we have
909
+ # J_v(x) = (x/2)**v / v! * 0F1(v+1, (-1/4)*z**2)
910
+
911
+ # TODO: for extremely large x, we could use an asymptotic
912
+ # trigonometric approximation.
913
+
914
+ # TODO: recompute at higher precision if the fixed-point mantissa
915
+ # is very small
916
+
917
+ def mpf_besseljn(n, x, prec, rounding=round_fast):
918
+ prec += 50
919
+ negate = n < 0 and n & 1
920
+ mag = x[2]+x[3]
921
+ n = abs(n)
922
+ wp = prec + 20 + n*bitcount(n)
923
+ if mag < 0:
924
+ wp -= n * mag
925
+ x = to_fixed(x, wp)
926
+ x2 = (x**2) >> wp
927
+ if not n:
928
+ s = t = MPZ_ONE << wp
929
+ else:
930
+ s = t = (x**n // ifac(n)) >> ((n-1)*wp + n)
931
+ k = 1
932
+ while t:
933
+ t = ((t * x2) // (-4*k*(k+n))) >> wp
934
+ s += t
935
+ k += 1
936
+ if negate:
937
+ s = -s
938
+ return from_man_exp(s, -wp, prec, rounding)
939
+
940
+ def mpc_besseljn(n, z, prec, rounding=round_fast):
941
+ negate = n < 0 and n & 1
942
+ n = abs(n)
943
+ origprec = prec
944
+ zre, zim = z
945
+ mag = max(zre[2]+zre[3], zim[2]+zim[3])
946
+ prec += 20 + n*bitcount(n) + abs(mag)
947
+ if mag < 0:
948
+ prec -= n * mag
949
+ zre = to_fixed(zre, prec)
950
+ zim = to_fixed(zim, prec)
951
+ z2re = (zre**2 - zim**2) >> prec
952
+ z2im = (zre*zim) >> (prec-1)
953
+ if not n:
954
+ sre = tre = MPZ_ONE << prec
955
+ sim = tim = MPZ_ZERO
956
+ else:
957
+ re, im = complex_int_pow(zre, zim, n)
958
+ sre = tre = (re // ifac(n)) >> ((n-1)*prec + n)
959
+ sim = tim = (im // ifac(n)) >> ((n-1)*prec + n)
960
+ k = 1
961
+ while abs(tre) + abs(tim) > 3:
962
+ p = -4*k*(k+n)
963
+ tre, tim = tre*z2re - tim*z2im, tim*z2re + tre*z2im
964
+ tre = (tre // p) >> prec
965
+ tim = (tim // p) >> prec
966
+ sre += tre
967
+ sim += tim
968
+ k += 1
969
+ if negate:
970
+ sre = -sre
971
+ sim = -sim
972
+ re = from_man_exp(sre, -prec, origprec, rounding)
973
+ im = from_man_exp(sim, -prec, origprec, rounding)
974
+ return (re, im)
975
+
976
+ def mpf_agm(a, b, prec, rnd=round_fast):
977
+ """
978
+ Computes the arithmetic-geometric mean agm(a,b) for
979
+ nonnegative mpf values a, b.
980
+ """
981
+ asign, aman, aexp, abc = a
982
+ bsign, bman, bexp, bbc = b
983
+ if asign or bsign:
984
+ raise ComplexResult("agm of a negative number")
985
+ # Handle inf, nan or zero in either operand
986
+ if not (aman and bman):
987
+ if a == fnan or b == fnan:
988
+ return fnan
989
+ if a == finf:
990
+ if b == fzero:
991
+ return fnan
992
+ return finf
993
+ if b == finf:
994
+ if a == fzero:
995
+ return fnan
996
+ return finf
997
+ # agm(0,x) = agm(x,0) = 0
998
+ return fzero
999
+ wp = prec + 20
1000
+ amag = aexp+abc
1001
+ bmag = bexp+bbc
1002
+ mag_delta = amag - bmag
1003
+ # Reduce to roughly the same magnitude using floating-point AGM
1004
+ abs_mag_delta = abs(mag_delta)
1005
+ if abs_mag_delta > 10:
1006
+ while abs_mag_delta > 10:
1007
+ a, b = mpf_shift(mpf_add(a,b,wp),-1), \
1008
+ mpf_sqrt(mpf_mul(a,b,wp),wp)
1009
+ abs_mag_delta //= 2
1010
+ asign, aman, aexp, abc = a
1011
+ bsign, bman, bexp, bbc = b
1012
+ amag = aexp+abc
1013
+ bmag = bexp+bbc
1014
+ mag_delta = amag - bmag
1015
+ #print to_float(a), to_float(b)
1016
+ # Use agm(a,b) = agm(x*a,x*b)/x to obtain a, b ~= 1
1017
+ min_mag = min(amag,bmag)
1018
+ max_mag = max(amag,bmag)
1019
+ n = 0
1020
+ # If too small, we lose precision when going to fixed-point
1021
+ if min_mag < -8:
1022
+ n = -min_mag
1023
+ # If too large, we waste time using fixed-point with large numbers
1024
+ elif max_mag > 20:
1025
+ n = -max_mag
1026
+ if n:
1027
+ a = mpf_shift(a, n)
1028
+ b = mpf_shift(b, n)
1029
+ #print to_float(a), to_float(b)
1030
+ af = to_fixed(a, wp)
1031
+ bf = to_fixed(b, wp)
1032
+ g = agm_fixed(af, bf, wp)
1033
+ return from_man_exp(g, -wp-n, prec, rnd)
1034
+
1035
+ def mpf_agm1(a, prec, rnd=round_fast):
1036
+ """
1037
+ Computes the arithmetic-geometric mean agm(1,a) for a nonnegative
1038
+ mpf value a.
1039
+ """
1040
+ return mpf_agm(fone, a, prec, rnd)
1041
+
1042
+ def mpc_agm(a, b, prec, rnd=round_fast):
1043
+ """
1044
+ Complex AGM.
1045
+
1046
+ TODO:
1047
+ * check that convergence works as intended
1048
+ * optimize
1049
+ * select a nonarbitrary branch
1050
+ """
1051
+ if mpc_is_infnan(a) or mpc_is_infnan(b):
1052
+ return fnan, fnan
1053
+ if mpc_zero in (a, b):
1054
+ return fzero, fzero
1055
+ if mpc_neg(a) == b:
1056
+ return fzero, fzero
1057
+ wp = prec+20
1058
+ eps = mpf_shift(fone, -wp+10)
1059
+ while 1:
1060
+ a1 = mpc_shift(mpc_add(a, b, wp), -1)
1061
+ b1 = mpc_sqrt(mpc_mul(a, b, wp), wp)
1062
+ a, b = a1, b1
1063
+ size = mpf_min_max([mpc_abs(a,10), mpc_abs(b,10)])[1]
1064
+ err = mpc_abs(mpc_sub(a, b, 10), 10)
1065
+ if size == fzero or mpf_lt(err, mpf_mul(eps, size)):
1066
+ return a
1067
+
1068
+ def mpc_agm1(a, prec, rnd=round_fast):
1069
+ return mpc_agm(mpc_one, a, prec, rnd)
1070
+
1071
+ def mpf_ellipk(x, prec, rnd=round_fast):
1072
+ if not x[1]:
1073
+ if x == fzero:
1074
+ return mpf_shift(mpf_pi(prec, rnd), -1)
1075
+ if x == fninf:
1076
+ return fzero
1077
+ if x == fnan:
1078
+ return x
1079
+ if x == fone:
1080
+ return finf
1081
+ # TODO: for |x| << 1/2, one could use fall back to
1082
+ # pi/2 * hyp2f1_rat((1,2),(1,2),(1,1), x)
1083
+ wp = prec + 15
1084
+ # Use K(x) = pi/2/agm(1,a) where a = sqrt(1-x)
1085
+ # The sqrt raises ComplexResult if x > 0
1086
+ a = mpf_sqrt(mpf_sub(fone, x, wp), wp)
1087
+ v = mpf_agm1(a, wp)
1088
+ r = mpf_div(mpf_pi(wp), v, prec, rnd)
1089
+ return mpf_shift(r, -1)
1090
+
1091
+ def mpc_ellipk(z, prec, rnd=round_fast):
1092
+ re, im = z
1093
+ if im == fzero:
1094
+ if re == finf:
1095
+ return mpc_zero
1096
+ if mpf_le(re, fone):
1097
+ return mpf_ellipk(re, prec, rnd), fzero
1098
+ wp = prec + 15
1099
+ a = mpc_sqrt(mpc_sub(mpc_one, z, wp), wp)
1100
+ v = mpc_agm1(a, wp)
1101
+ r = mpc_mpf_div(mpf_pi(wp), v, prec, rnd)
1102
+ return mpc_shift(r, -1)
1103
+
1104
+ def mpf_ellipe(x, prec, rnd=round_fast):
1105
+ # http://functions.wolfram.com/EllipticIntegrals/
1106
+ # EllipticK/20/01/0001/
1107
+ # E = (1-m)*(K'(m)*2*m + K(m))
1108
+ sign, man, exp, bc = x
1109
+ if not man:
1110
+ if x == fzero:
1111
+ return mpf_shift(mpf_pi(prec, rnd), -1)
1112
+ if x == fninf:
1113
+ return finf
1114
+ if x == fnan:
1115
+ return x
1116
+ if x == finf:
1117
+ raise ComplexResult
1118
+ if x == fone:
1119
+ return fone
1120
+ wp = prec+20
1121
+ mag = exp+bc
1122
+ if mag < -wp:
1123
+ return mpf_shift(mpf_pi(prec, rnd), -1)
1124
+ # Compute a finite difference for K'
1125
+ p = max(mag, 0) - wp
1126
+ h = mpf_shift(fone, p)
1127
+ K = mpf_ellipk(x, 2*wp)
1128
+ Kh = mpf_ellipk(mpf_sub(x, h), 2*wp)
1129
+ Kdiff = mpf_shift(mpf_sub(K, Kh), -p)
1130
+ t = mpf_sub(fone, x)
1131
+ b = mpf_mul(Kdiff, mpf_shift(x,1), wp)
1132
+ return mpf_mul(t, mpf_add(K, b), prec, rnd)
1133
+
1134
+ def mpc_ellipe(z, prec, rnd=round_fast):
1135
+ re, im = z
1136
+ if im == fzero:
1137
+ if re == finf:
1138
+ return (fzero, finf)
1139
+ if mpf_le(re, fone):
1140
+ return mpf_ellipe(re, prec, rnd), fzero
1141
+ wp = prec + 15
1142
+ mag = mpc_abs(z, 1)
1143
+ p = max(mag[2]+mag[3], 0) - wp
1144
+ h = mpf_shift(fone, p)
1145
+ K = mpc_ellipk(z, 2*wp)
1146
+ Kh = mpc_ellipk(mpc_add_mpf(z, h, 2*wp), 2*wp)
1147
+ Kdiff = mpc_shift(mpc_sub(Kh, K, wp), -p)
1148
+ t = mpc_sub(mpc_one, z, wp)
1149
+ b = mpc_mul(Kdiff, mpc_shift(z,1), wp)
1150
+ return mpc_mul(t, mpc_add(K, b, wp), prec, rnd)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libintmath.py ADDED
@@ -0,0 +1,584 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for integer math.
3
+
4
+ TODO: rename, cleanup, perhaps move the gmpy wrapper code
5
+ here from settings.py
6
+
7
+ """
8
+
9
+ import math
10
+ from bisect import bisect
11
+
12
+ from .backend import xrange
13
+ from .backend import BACKEND, gmpy, sage, sage_utils, MPZ, MPZ_ONE, MPZ_ZERO
14
+
15
+ small_trailing = [0] * 256
16
+ for j in range(1,8):
17
+ small_trailing[1<<j::1<<(j+1)] = [j] * (1<<(7-j))
18
+
19
+ def giant_steps(start, target, n=2):
20
+ """
21
+ Return a list of integers ~=
22
+
23
+ [start, n*start, ..., target/n^2, target/n, target]
24
+
25
+ but conservatively rounded so that the quotient between two
26
+ successive elements is actually slightly less than n.
27
+
28
+ With n = 2, this describes suitable precision steps for a
29
+ quadratically convergent algorithm such as Newton's method;
30
+ with n = 3 steps for cubic convergence (Halley's method), etc.
31
+
32
+ >>> giant_steps(50,1000)
33
+ [66, 128, 253, 502, 1000]
34
+ >>> giant_steps(50,1000,4)
35
+ [65, 252, 1000]
36
+
37
+ """
38
+ L = [target]
39
+ while L[-1] > start*n:
40
+ L = L + [L[-1]//n + 2]
41
+ return L[::-1]
42
+
43
+ def rshift(x, n):
44
+ """For an integer x, calculate x >> n with the fastest (floor)
45
+ rounding. Unlike the plain Python expression (x >> n), n is
46
+ allowed to be negative, in which case a left shift is performed."""
47
+ if n >= 0: return x >> n
48
+ else: return x << (-n)
49
+
50
+ def lshift(x, n):
51
+ """For an integer x, calculate x << n. Unlike the plain Python
52
+ expression (x << n), n is allowed to be negative, in which case a
53
+ right shift with default (floor) rounding is performed."""
54
+ if n >= 0: return x << n
55
+ else: return x >> (-n)
56
+
57
+ if BACKEND == 'sage':
58
+ import operator
59
+ rshift = operator.rshift
60
+ lshift = operator.lshift
61
+
62
+ def python_trailing(n):
63
+ """Count the number of trailing zero bits in abs(n)."""
64
+ if not n:
65
+ return 0
66
+ low_byte = n & 0xff
67
+ if low_byte:
68
+ return small_trailing[low_byte]
69
+ t = 8
70
+ n >>= 8
71
+ while not n & 0xff:
72
+ n >>= 8
73
+ t += 8
74
+ return t + small_trailing[n & 0xff]
75
+
76
+ if BACKEND == 'gmpy':
77
+ if gmpy.version() >= '2':
78
+ def gmpy_trailing(n):
79
+ """Count the number of trailing zero bits in abs(n) using gmpy."""
80
+ if n: return MPZ(n).bit_scan1()
81
+ else: return 0
82
+ else:
83
+ def gmpy_trailing(n):
84
+ """Count the number of trailing zero bits in abs(n) using gmpy."""
85
+ if n: return MPZ(n).scan1()
86
+ else: return 0
87
+
88
+ # Small powers of 2
89
+ powers = [1<<_ for _ in range(300)]
90
+
91
+ def python_bitcount(n):
92
+ """Calculate bit size of the nonnegative integer n."""
93
+ bc = bisect(powers, n)
94
+ if bc != 300:
95
+ return bc
96
+ bc = int(math.log(n, 2)) - 4
97
+ return bc + bctable[n>>bc]
98
+
99
+ def gmpy_bitcount(n):
100
+ """Calculate bit size of the nonnegative integer n."""
101
+ if n: return MPZ(n).numdigits(2)
102
+ else: return 0
103
+
104
+ #def sage_bitcount(n):
105
+ # if n: return MPZ(n).nbits()
106
+ # else: return 0
107
+
108
+ def sage_trailing(n):
109
+ return MPZ(n).trailing_zero_bits()
110
+
111
+ if BACKEND == 'gmpy':
112
+ bitcount = gmpy_bitcount
113
+ trailing = gmpy_trailing
114
+ elif BACKEND == 'sage':
115
+ sage_bitcount = sage_utils.bitcount
116
+ bitcount = sage_bitcount
117
+ trailing = sage_trailing
118
+ else:
119
+ bitcount = python_bitcount
120
+ trailing = python_trailing
121
+
122
+ if BACKEND == 'gmpy' and 'bit_length' in dir(gmpy):
123
+ bitcount = gmpy.bit_length
124
+
125
+ # Used to avoid slow function calls as far as possible
126
+ trailtable = [trailing(n) for n in range(256)]
127
+ bctable = [bitcount(n) for n in range(1024)]
128
+
129
+ # TODO: speed up for bases 2, 4, 8, 16, ...
130
+
131
+ def bin_to_radix(x, xbits, base, bdigits):
132
+ """Changes radix of a fixed-point number; i.e., converts
133
+ x * 2**xbits to floor(x * 10**bdigits)."""
134
+ return x * (MPZ(base)**bdigits) >> xbits
135
+
136
+ stddigits = '0123456789abcdefghijklmnopqrstuvwxyz'
137
+
138
+ def small_numeral(n, base=10, digits=stddigits):
139
+ """Return the string numeral of a positive integer in an arbitrary
140
+ base. Most efficient for small input."""
141
+ if base == 10:
142
+ return str(n)
143
+ digs = []
144
+ while n:
145
+ n, digit = divmod(n, base)
146
+ digs.append(digits[digit])
147
+ return "".join(digs[::-1])
148
+
149
+ def numeral_python(n, base=10, size=0, digits=stddigits):
150
+ """Represent the integer n as a string of digits in the given base.
151
+ Recursive division is used to make this function about 3x faster
152
+ than Python's str() for converting integers to decimal strings.
153
+
154
+ The 'size' parameters specifies the number of digits in n; this
155
+ number is only used to determine splitting points and need not be
156
+ exact."""
157
+ if n <= 0:
158
+ if not n:
159
+ return "0"
160
+ return "-" + numeral(-n, base, size, digits)
161
+ # Fast enough to do directly
162
+ if size < 250:
163
+ return small_numeral(n, base, digits)
164
+ # Divide in half
165
+ half = (size // 2) + (size & 1)
166
+ A, B = divmod(n, base**half)
167
+ ad = numeral(A, base, half, digits)
168
+ bd = numeral(B, base, half, digits).rjust(half, "0")
169
+ return ad + bd
170
+
171
+ def numeral_gmpy(n, base=10, size=0, digits=stddigits):
172
+ """Represent the integer n as a string of digits in the given base.
173
+ Recursive division is used to make this function about 3x faster
174
+ than Python's str() for converting integers to decimal strings.
175
+
176
+ The 'size' parameters specifies the number of digits in n; this
177
+ number is only used to determine splitting points and need not be
178
+ exact."""
179
+ if n < 0:
180
+ return "-" + numeral(-n, base, size, digits)
181
+ # gmpy.digits() may cause a segmentation fault when trying to convert
182
+ # extremely large values to a string. The size limit may need to be
183
+ # adjusted on some platforms, but 1500000 works on Windows and Linux.
184
+ if size < 1500000:
185
+ return gmpy.digits(n, base)
186
+ # Divide in half
187
+ half = (size // 2) + (size & 1)
188
+ A, B = divmod(n, MPZ(base)**half)
189
+ ad = numeral(A, base, half, digits)
190
+ bd = numeral(B, base, half, digits).rjust(half, "0")
191
+ return ad + bd
192
+
193
+ if BACKEND == "gmpy":
194
+ numeral = numeral_gmpy
195
+ else:
196
+ numeral = numeral_python
197
+
198
+ _1_800 = 1<<800
199
+ _1_600 = 1<<600
200
+ _1_400 = 1<<400
201
+ _1_200 = 1<<200
202
+ _1_100 = 1<<100
203
+ _1_50 = 1<<50
204
+
205
+ def isqrt_small_python(x):
206
+ """
207
+ Correctly (floor) rounded integer square root, using
208
+ division. Fast up to ~200 digits.
209
+ """
210
+ if not x:
211
+ return x
212
+ if x < _1_800:
213
+ # Exact with IEEE double precision arithmetic
214
+ if x < _1_50:
215
+ return int(x**0.5)
216
+ # Initial estimate can be any integer >= the true root; round up
217
+ r = int(x**0.5 * 1.00000000000001) + 1
218
+ else:
219
+ bc = bitcount(x)
220
+ n = bc//2
221
+ r = int((x>>(2*n-100))**0.5+2)<<(n-50) # +2 is to round up
222
+ # The following iteration now precisely computes floor(sqrt(x))
223
+ # See e.g. Crandall & Pomerance, "Prime Numbers: A Computational
224
+ # Perspective"
225
+ while 1:
226
+ y = (r+x//r)>>1
227
+ if y >= r:
228
+ return r
229
+ r = y
230
+
231
+ def isqrt_fast_python(x):
232
+ """
233
+ Fast approximate integer square root, computed using division-free
234
+ Newton iteration for large x. For random integers the result is almost
235
+ always correct (floor(sqrt(x))), but is 1 ulp too small with a roughly
236
+ 0.1% probability. If x is very close to an exact square, the answer is
237
+ 1 ulp wrong with high probability.
238
+
239
+ With 0 guard bits, the largest error over a set of 10^5 random
240
+ inputs of size 1-10^5 bits was 3 ulp. The use of 10 guard bits
241
+ almost certainly guarantees a max 1 ulp error.
242
+ """
243
+ # Use direct division-based iteration if sqrt(x) < 2^400
244
+ # Assume floating-point square root accurate to within 1 ulp, then:
245
+ # 0 Newton iterations good to 52 bits
246
+ # 1 Newton iterations good to 104 bits
247
+ # 2 Newton iterations good to 208 bits
248
+ # 3 Newton iterations good to 416 bits
249
+ if x < _1_800:
250
+ y = int(x**0.5)
251
+ if x >= _1_100:
252
+ y = (y + x//y) >> 1
253
+ if x >= _1_200:
254
+ y = (y + x//y) >> 1
255
+ if x >= _1_400:
256
+ y = (y + x//y) >> 1
257
+ return y
258
+ bc = bitcount(x)
259
+ guard_bits = 10
260
+ x <<= 2*guard_bits
261
+ bc += 2*guard_bits
262
+ bc += (bc&1)
263
+ hbc = bc//2
264
+ startprec = min(50, hbc)
265
+ # Newton iteration for 1/sqrt(x), with floating-point starting value
266
+ r = int(2.0**(2*startprec) * (x >> (bc-2*startprec)) ** -0.5)
267
+ pp = startprec
268
+ for p in giant_steps(startprec, hbc):
269
+ # r**2, scaled from real size 2**(-bc) to 2**p
270
+ r2 = (r*r) >> (2*pp - p)
271
+ # x*r**2, scaled from real size ~1.0 to 2**p
272
+ xr2 = ((x >> (bc-p)) * r2) >> p
273
+ # New value of r, scaled from real size 2**(-bc/2) to 2**p
274
+ r = (r * ((3<<p) - xr2)) >> (pp+1)
275
+ pp = p
276
+ # (1/sqrt(x))*x = sqrt(x)
277
+ return (r*(x>>hbc)) >> (p+guard_bits)
278
+
279
+ def sqrtrem_python(x):
280
+ """Correctly rounded integer (floor) square root with remainder."""
281
+ # to check cutoff:
282
+ # plot(lambda x: timing(isqrt, 2**int(x)), [0,2000])
283
+ if x < _1_600:
284
+ y = isqrt_small_python(x)
285
+ return y, x - y*y
286
+ y = isqrt_fast_python(x) + 1
287
+ rem = x - y*y
288
+ # Correct remainder
289
+ while rem < 0:
290
+ y -= 1
291
+ rem += (1+2*y)
292
+ else:
293
+ if rem:
294
+ while rem > 2*(1+y):
295
+ y += 1
296
+ rem -= (1+2*y)
297
+ return y, rem
298
+
299
+ def isqrt_python(x):
300
+ """Integer square root with correct (floor) rounding."""
301
+ return sqrtrem_python(x)[0]
302
+
303
+ def sqrt_fixed(x, prec):
304
+ return isqrt_fast(x<<prec)
305
+
306
+ sqrt_fixed2 = sqrt_fixed
307
+
308
+ if BACKEND == 'gmpy':
309
+ if gmpy.version() >= '2':
310
+ isqrt_small = isqrt_fast = isqrt = gmpy.isqrt
311
+ sqrtrem = gmpy.isqrt_rem
312
+ else:
313
+ isqrt_small = isqrt_fast = isqrt = gmpy.sqrt
314
+ sqrtrem = gmpy.sqrtrem
315
+ elif BACKEND == 'sage':
316
+ isqrt_small = isqrt_fast = isqrt = \
317
+ getattr(sage_utils, "isqrt", lambda n: MPZ(n).isqrt())
318
+ sqrtrem = lambda n: MPZ(n).sqrtrem()
319
+ else:
320
+ isqrt_small = isqrt_small_python
321
+ isqrt_fast = isqrt_fast_python
322
+ isqrt = isqrt_python
323
+ sqrtrem = sqrtrem_python
324
+
325
+
326
+ def ifib(n, _cache={}):
327
+ """Computes the nth Fibonacci number as an integer, for
328
+ integer n."""
329
+ if n < 0:
330
+ return (-1)**(-n+1) * ifib(-n)
331
+ if n in _cache:
332
+ return _cache[n]
333
+ m = n
334
+ # Use Dijkstra's logarithmic algorithm
335
+ # The following implementation is basically equivalent to
336
+ # http://en.literateprograms.org/Fibonacci_numbers_(Scheme)
337
+ a, b, p, q = MPZ_ONE, MPZ_ZERO, MPZ_ZERO, MPZ_ONE
338
+ while n:
339
+ if n & 1:
340
+ aq = a*q
341
+ a, b = b*q+aq+a*p, b*p+aq
342
+ n -= 1
343
+ else:
344
+ qq = q*q
345
+ p, q = p*p+qq, qq+2*p*q
346
+ n >>= 1
347
+ if m < 250:
348
+ _cache[m] = b
349
+ return b
350
+
351
+ MAX_FACTORIAL_CACHE = 1000
352
+
353
+ def ifac(n, memo={0:1, 1:1}):
354
+ """Return n factorial (for integers n >= 0 only)."""
355
+ f = memo.get(n)
356
+ if f:
357
+ return f
358
+ k = len(memo)
359
+ p = memo[k-1]
360
+ MAX = MAX_FACTORIAL_CACHE
361
+ while k <= n:
362
+ p *= k
363
+ if k <= MAX:
364
+ memo[k] = p
365
+ k += 1
366
+ return p
367
+
368
+ def ifac2(n, memo_pair=[{0:1}, {1:1}]):
369
+ """Return n!! (double factorial), integers n >= 0 only."""
370
+ memo = memo_pair[n&1]
371
+ f = memo.get(n)
372
+ if f:
373
+ return f
374
+ k = max(memo)
375
+ p = memo[k]
376
+ MAX = MAX_FACTORIAL_CACHE
377
+ while k < n:
378
+ k += 2
379
+ p *= k
380
+ if k <= MAX:
381
+ memo[k] = p
382
+ return p
383
+
384
+ if BACKEND == 'gmpy':
385
+ ifac = gmpy.fac
386
+ elif BACKEND == 'sage':
387
+ ifac = lambda n: int(sage.factorial(n))
388
+ ifib = sage.fibonacci
389
+
390
+ def list_primes(n):
391
+ n = n + 1
392
+ sieve = list(xrange(n))
393
+ sieve[:2] = [0, 0]
394
+ for i in xrange(2, int(n**0.5)+1):
395
+ if sieve[i]:
396
+ for j in xrange(i**2, n, i):
397
+ sieve[j] = 0
398
+ return [p for p in sieve if p]
399
+
400
+ if BACKEND == 'sage':
401
+ # Note: it is *VERY* important for performance that we convert
402
+ # the list to Python ints.
403
+ def list_primes(n):
404
+ return [int(_) for _ in sage.primes(n+1)]
405
+
406
+ small_odd_primes = (3,5,7,11,13,17,19,23,29,31,37,41,43,47)
407
+ small_odd_primes_set = set(small_odd_primes)
408
+
409
+ def isprime(n):
410
+ """
411
+ Determines whether n is a prime number. A probabilistic test is
412
+ performed if n is very large. No special trick is used for detecting
413
+ perfect powers.
414
+
415
+ >>> sum(list_primes(100000))
416
+ 454396537
417
+ >>> sum(n*isprime(n) for n in range(100000))
418
+ 454396537
419
+
420
+ """
421
+ n = int(n)
422
+ if not n & 1:
423
+ return n == 2
424
+ if n < 50:
425
+ return n in small_odd_primes_set
426
+ for p in small_odd_primes:
427
+ if not n % p:
428
+ return False
429
+ m = n-1
430
+ s = trailing(m)
431
+ d = m >> s
432
+ def test(a):
433
+ x = pow(a,d,n)
434
+ if x == 1 or x == m:
435
+ return True
436
+ for r in xrange(1,s):
437
+ x = x**2 % n
438
+ if x == m:
439
+ return True
440
+ return False
441
+ # See http://primes.utm.edu/prove/prove2_3.html
442
+ if n < 1373653:
443
+ witnesses = [2,3]
444
+ elif n < 341550071728321:
445
+ witnesses = [2,3,5,7,11,13,17]
446
+ else:
447
+ witnesses = small_odd_primes
448
+ for a in witnesses:
449
+ if not test(a):
450
+ return False
451
+ return True
452
+
453
+ def moebius(n):
454
+ """
455
+ Evaluates the Moebius function which is `mu(n) = (-1)^k` if `n`
456
+ is a product of `k` distinct primes and `mu(n) = 0` otherwise.
457
+
458
+ TODO: speed up using factorization
459
+ """
460
+ n = abs(int(n))
461
+ if n < 2:
462
+ return n
463
+ factors = []
464
+ for p in xrange(2, n+1):
465
+ if not (n % p):
466
+ if not (n % p**2):
467
+ return 0
468
+ if not sum(p % f for f in factors):
469
+ factors.append(p)
470
+ return (-1)**len(factors)
471
+
472
+ def gcd(*args):
473
+ a = 0
474
+ for b in args:
475
+ if a:
476
+ while b:
477
+ a, b = b, a % b
478
+ else:
479
+ a = b
480
+ return a
481
+
482
+
483
+ # Comment by Juan Arias de Reyna:
484
+ #
485
+ # I learn this method to compute EulerE[2n] from van de Lune.
486
+ #
487
+ # We apply the formula EulerE[2n] = (-1)^n 2**(-2n) sum_{j=0}^n a(2n,2j+1)
488
+ #
489
+ # where the numbers a(n,j) vanish for j > n+1 or j <= -1 and satisfies
490
+ #
491
+ # a(0,-1) = a(0,0) = 0; a(0,1)= 1; a(0,2) = a(0,3) = 0
492
+ #
493
+ # a(n,j) = a(n-1,j) when n+j is even
494
+ # a(n,j) = (j-1) a(n-1,j-1) + (j+1) a(n-1,j+1) when n+j is odd
495
+ #
496
+ #
497
+ # But we can use only one array unidimensional a(j) since to compute
498
+ # a(n,j) we only need to know a(n-1,k) where k and j are of different parity
499
+ # and we have not to conserve the used values.
500
+ #
501
+ # We cached up the values of Euler numbers to sufficiently high order.
502
+ #
503
+ # Important Observation: If we pretend to use the numbers
504
+ # EulerE[1], EulerE[2], ... , EulerE[n]
505
+ # it is convenient to compute first EulerE[n], since the algorithm
506
+ # computes first all
507
+ # the previous ones, and keeps them in the CACHE
508
+
509
+ MAX_EULER_CACHE = 500
510
+
511
+ def eulernum(m, _cache={0:MPZ_ONE}):
512
+ r"""
513
+ Computes the Euler numbers `E(n)`, which can be defined as
514
+ coefficients of the Taylor expansion of `1/cosh x`:
515
+
516
+ .. math ::
517
+
518
+ \frac{1}{\cosh x} = \sum_{n=0}^\infty \frac{E_n}{n!} x^n
519
+
520
+ Example::
521
+
522
+ >>> [int(eulernum(n)) for n in range(11)]
523
+ [1, 0, -1, 0, 5, 0, -61, 0, 1385, 0, -50521]
524
+ >>> [int(eulernum(n)) for n in range(11)] # test cache
525
+ [1, 0, -1, 0, 5, 0, -61, 0, 1385, 0, -50521]
526
+
527
+ """
528
+ # for odd m > 1, the Euler numbers are zero
529
+ if m & 1:
530
+ return MPZ_ZERO
531
+ f = _cache.get(m)
532
+ if f:
533
+ return f
534
+ MAX = MAX_EULER_CACHE
535
+ n = m
536
+ a = [MPZ(_) for _ in [0,0,1,0,0,0]]
537
+ for n in range(1, m+1):
538
+ for j in range(n+1, -1, -2):
539
+ a[j+1] = (j-1)*a[j] + (j+1)*a[j+2]
540
+ a.append(0)
541
+ suma = 0
542
+ for k in range(n+1, -1, -2):
543
+ suma += a[k+1]
544
+ if n <= MAX:
545
+ _cache[n] = ((-1)**(n//2))*(suma // 2**n)
546
+ if n == m:
547
+ return ((-1)**(n//2))*suma // 2**n
548
+
549
+ def stirling1(n, k):
550
+ """
551
+ Stirling number of the first kind.
552
+ """
553
+ if n < 0 or k < 0:
554
+ raise ValueError
555
+ if k >= n:
556
+ return MPZ(n == k)
557
+ if k < 1:
558
+ return MPZ_ZERO
559
+ L = [MPZ_ZERO] * (k+1)
560
+ L[1] = MPZ_ONE
561
+ for m in xrange(2, n+1):
562
+ for j in xrange(min(k, m), 0, -1):
563
+ L[j] = (m-1) * L[j] + L[j-1]
564
+ return (-1)**(n+k) * L[k]
565
+
566
+ def stirling2(n, k):
567
+ """
568
+ Stirling number of the second kind.
569
+ """
570
+ if n < 0 or k < 0:
571
+ raise ValueError
572
+ if k >= n:
573
+ return MPZ(n == k)
574
+ if k <= 1:
575
+ return MPZ(k == 1)
576
+ s = MPZ_ZERO
577
+ t = MPZ_ONE
578
+ for j in xrange(k+1):
579
+ if (k + j) & 1:
580
+ s -= t * MPZ(j)**n
581
+ else:
582
+ s += t * MPZ(j)**n
583
+ t = t * (k - j) // (j + 1)
584
+ return s // ifac(k)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/libmp/libmpc.py ADDED
@@ -0,0 +1,835 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Low-level functions for complex arithmetic.
3
+ """
4
+
5
+ import sys
6
+
7
+ from .backend import MPZ, MPZ_ZERO, MPZ_ONE, MPZ_TWO, BACKEND
8
+
9
+ from .libmpf import (\
10
+ round_floor, round_ceiling, round_down, round_up,
11
+ round_nearest, round_fast, bitcount,
12
+ bctable, normalize, normalize1, reciprocal_rnd, rshift, lshift, giant_steps,
13
+ negative_rnd,
14
+ to_str, to_fixed, from_man_exp, from_float, to_float, from_int, to_int,
15
+ fzero, fone, ftwo, fhalf, finf, fninf, fnan, fnone,
16
+ mpf_abs, mpf_pos, mpf_neg, mpf_add, mpf_sub, mpf_mul,
17
+ mpf_div, mpf_mul_int, mpf_shift, mpf_sqrt, mpf_hypot,
18
+ mpf_rdiv_int, mpf_floor, mpf_ceil, mpf_nint, mpf_frac,
19
+ mpf_sign, mpf_hash,
20
+ ComplexResult
21
+ )
22
+
23
+ from .libelefun import (\
24
+ mpf_pi, mpf_exp, mpf_log, mpf_cos_sin, mpf_cosh_sinh, mpf_tan, mpf_pow_int,
25
+ mpf_log_hypot,
26
+ mpf_cos_sin_pi, mpf_phi,
27
+ mpf_cos, mpf_sin, mpf_cos_pi, mpf_sin_pi,
28
+ mpf_atan, mpf_atan2, mpf_cosh, mpf_sinh, mpf_tanh,
29
+ mpf_asin, mpf_acos, mpf_acosh, mpf_nthroot, mpf_fibonacci
30
+ )
31
+
32
+ # An mpc value is a (real, imag) tuple
33
+ mpc_one = fone, fzero
34
+ mpc_zero = fzero, fzero
35
+ mpc_two = ftwo, fzero
36
+ mpc_half = (fhalf, fzero)
37
+
38
+ _infs = (finf, fninf)
39
+ _infs_nan = (finf, fninf, fnan)
40
+
41
+ def mpc_is_inf(z):
42
+ """Check if either real or imaginary part is infinite"""
43
+ re, im = z
44
+ if re in _infs: return True
45
+ if im in _infs: return True
46
+ return False
47
+
48
+ def mpc_is_infnan(z):
49
+ """Check if either real or imaginary part is infinite or nan"""
50
+ re, im = z
51
+ if re in _infs_nan: return True
52
+ if im in _infs_nan: return True
53
+ return False
54
+
55
+ def mpc_to_str(z, dps, **kwargs):
56
+ re, im = z
57
+ rs = to_str(re, dps)
58
+ if im[0]:
59
+ return rs + " - " + to_str(mpf_neg(im), dps, **kwargs) + "j"
60
+ else:
61
+ return rs + " + " + to_str(im, dps, **kwargs) + "j"
62
+
63
+ def mpc_to_complex(z, strict=False, rnd=round_fast):
64
+ re, im = z
65
+ return complex(to_float(re, strict, rnd), to_float(im, strict, rnd))
66
+
67
+ def mpc_hash(z):
68
+ if sys.version_info >= (3, 2):
69
+ re, im = z
70
+ h = mpf_hash(re) + sys.hash_info.imag * mpf_hash(im)
71
+ # Need to reduce either module 2^32 or 2^64
72
+ h = h % (2**sys.hash_info.width)
73
+ return int(h)
74
+ else:
75
+ try:
76
+ return hash(mpc_to_complex(z, strict=True))
77
+ except OverflowError:
78
+ return hash(z)
79
+
80
+ def mpc_conjugate(z, prec, rnd=round_fast):
81
+ re, im = z
82
+ return re, mpf_neg(im, prec, rnd)
83
+
84
+ def mpc_is_nonzero(z):
85
+ return z != mpc_zero
86
+
87
+ def mpc_add(z, w, prec, rnd=round_fast):
88
+ a, b = z
89
+ c, d = w
90
+ return mpf_add(a, c, prec, rnd), mpf_add(b, d, prec, rnd)
91
+
92
+ def mpc_add_mpf(z, x, prec, rnd=round_fast):
93
+ a, b = z
94
+ return mpf_add(a, x, prec, rnd), b
95
+
96
+ def mpc_sub(z, w, prec=0, rnd=round_fast):
97
+ a, b = z
98
+ c, d = w
99
+ return mpf_sub(a, c, prec, rnd), mpf_sub(b, d, prec, rnd)
100
+
101
+ def mpc_sub_mpf(z, p, prec=0, rnd=round_fast):
102
+ a, b = z
103
+ return mpf_sub(a, p, prec, rnd), b
104
+
105
+ def mpc_pos(z, prec, rnd=round_fast):
106
+ a, b = z
107
+ return mpf_pos(a, prec, rnd), mpf_pos(b, prec, rnd)
108
+
109
+ def mpc_neg(z, prec=None, rnd=round_fast):
110
+ a, b = z
111
+ return mpf_neg(a, prec, rnd), mpf_neg(b, prec, rnd)
112
+
113
+ def mpc_shift(z, n):
114
+ a, b = z
115
+ return mpf_shift(a, n), mpf_shift(b, n)
116
+
117
+ def mpc_abs(z, prec, rnd=round_fast):
118
+ """Absolute value of a complex number, |a+bi|.
119
+ Returns an mpf value."""
120
+ a, b = z
121
+ return mpf_hypot(a, b, prec, rnd)
122
+
123
+ def mpc_arg(z, prec, rnd=round_fast):
124
+ """Argument of a complex number. Returns an mpf value."""
125
+ a, b = z
126
+ return mpf_atan2(b, a, prec, rnd)
127
+
128
+ def mpc_floor(z, prec, rnd=round_fast):
129
+ a, b = z
130
+ return mpf_floor(a, prec, rnd), mpf_floor(b, prec, rnd)
131
+
132
+ def mpc_ceil(z, prec, rnd=round_fast):
133
+ a, b = z
134
+ return mpf_ceil(a, prec, rnd), mpf_ceil(b, prec, rnd)
135
+
136
+ def mpc_nint(z, prec, rnd=round_fast):
137
+ a, b = z
138
+ return mpf_nint(a, prec, rnd), mpf_nint(b, prec, rnd)
139
+
140
+ def mpc_frac(z, prec, rnd=round_fast):
141
+ a, b = z
142
+ return mpf_frac(a, prec, rnd), mpf_frac(b, prec, rnd)
143
+
144
+
145
+ def mpc_mul(z, w, prec, rnd=round_fast):
146
+ """
147
+ Complex multiplication.
148
+
149
+ Returns the real and imaginary part of (a+bi)*(c+di), rounded to
150
+ the specified precision. The rounding mode applies to the real and
151
+ imaginary parts separately.
152
+ """
153
+ a, b = z
154
+ c, d = w
155
+ p = mpf_mul(a, c)
156
+ q = mpf_mul(b, d)
157
+ r = mpf_mul(a, d)
158
+ s = mpf_mul(b, c)
159
+ re = mpf_sub(p, q, prec, rnd)
160
+ im = mpf_add(r, s, prec, rnd)
161
+ return re, im
162
+
163
+ def mpc_square(z, prec, rnd=round_fast):
164
+ # (a+b*I)**2 == a**2 - b**2 + 2*I*a*b
165
+ a, b = z
166
+ p = mpf_mul(a,a)
167
+ q = mpf_mul(b,b)
168
+ r = mpf_mul(a,b, prec, rnd)
169
+ re = mpf_sub(p, q, prec, rnd)
170
+ im = mpf_shift(r, 1)
171
+ return re, im
172
+
173
+ def mpc_mul_mpf(z, p, prec, rnd=round_fast):
174
+ a, b = z
175
+ re = mpf_mul(a, p, prec, rnd)
176
+ im = mpf_mul(b, p, prec, rnd)
177
+ return re, im
178
+
179
+ def mpc_mul_imag_mpf(z, x, prec, rnd=round_fast):
180
+ """
181
+ Multiply the mpc value z by I*x where x is an mpf value.
182
+ """
183
+ a, b = z
184
+ re = mpf_neg(mpf_mul(b, x, prec, rnd))
185
+ im = mpf_mul(a, x, prec, rnd)
186
+ return re, im
187
+
188
+ def mpc_mul_int(z, n, prec, rnd=round_fast):
189
+ a, b = z
190
+ re = mpf_mul_int(a, n, prec, rnd)
191
+ im = mpf_mul_int(b, n, prec, rnd)
192
+ return re, im
193
+
194
+ def mpc_div(z, w, prec, rnd=round_fast):
195
+ a, b = z
196
+ c, d = w
197
+ wp = prec + 10
198
+ # mag = c*c + d*d
199
+ mag = mpf_add(mpf_mul(c, c), mpf_mul(d, d), wp)
200
+ # (a*c+b*d)/mag, (b*c-a*d)/mag
201
+ t = mpf_add(mpf_mul(a,c), mpf_mul(b,d), wp)
202
+ u = mpf_sub(mpf_mul(b,c), mpf_mul(a,d), wp)
203
+ return mpf_div(t,mag,prec,rnd), mpf_div(u,mag,prec,rnd)
204
+
205
+ def mpc_div_mpf(z, p, prec, rnd=round_fast):
206
+ """Calculate z/p where p is real"""
207
+ a, b = z
208
+ re = mpf_div(a, p, prec, rnd)
209
+ im = mpf_div(b, p, prec, rnd)
210
+ return re, im
211
+
212
+ def mpc_reciprocal(z, prec, rnd=round_fast):
213
+ """Calculate 1/z efficiently"""
214
+ a, b = z
215
+ m = mpf_add(mpf_mul(a,a),mpf_mul(b,b),prec+10)
216
+ re = mpf_div(a, m, prec, rnd)
217
+ im = mpf_neg(mpf_div(b, m, prec, rnd))
218
+ return re, im
219
+
220
+ def mpc_mpf_div(p, z, prec, rnd=round_fast):
221
+ """Calculate p/z where p is real efficiently"""
222
+ a, b = z
223
+ m = mpf_add(mpf_mul(a,a),mpf_mul(b,b), prec+10)
224
+ re = mpf_div(mpf_mul(a,p), m, prec, rnd)
225
+ im = mpf_div(mpf_neg(mpf_mul(b,p)), m, prec, rnd)
226
+ return re, im
227
+
228
+ def complex_int_pow(a, b, n):
229
+ """Complex integer power: computes (a+b*I)**n exactly for
230
+ nonnegative n (a and b must be Python ints)."""
231
+ wre = 1
232
+ wim = 0
233
+ while n:
234
+ if n & 1:
235
+ wre, wim = wre*a - wim*b, wim*a + wre*b
236
+ n -= 1
237
+ a, b = a*a - b*b, 2*a*b
238
+ n //= 2
239
+ return wre, wim
240
+
241
+ def mpc_pow(z, w, prec, rnd=round_fast):
242
+ if w[1] == fzero:
243
+ return mpc_pow_mpf(z, w[0], prec, rnd)
244
+ return mpc_exp(mpc_mul(mpc_log(z, prec+10), w, prec+10), prec, rnd)
245
+
246
+ def mpc_pow_mpf(z, p, prec, rnd=round_fast):
247
+ psign, pman, pexp, pbc = p
248
+ if pexp >= 0:
249
+ return mpc_pow_int(z, (-1)**psign * (pman<<pexp), prec, rnd)
250
+ if pexp == -1:
251
+ sqrtz = mpc_sqrt(z, prec+10)
252
+ return mpc_pow_int(sqrtz, (-1)**psign * pman, prec, rnd)
253
+ return mpc_exp(mpc_mul_mpf(mpc_log(z, prec+10), p, prec+10), prec, rnd)
254
+
255
+ def mpc_pow_int(z, n, prec, rnd=round_fast):
256
+ a, b = z
257
+ if b == fzero:
258
+ return mpf_pow_int(a, n, prec, rnd), fzero
259
+ if a == fzero:
260
+ v = mpf_pow_int(b, n, prec, rnd)
261
+ n %= 4
262
+ if n == 0:
263
+ return v, fzero
264
+ elif n == 1:
265
+ return fzero, v
266
+ elif n == 2:
267
+ return mpf_neg(v), fzero
268
+ elif n == 3:
269
+ return fzero, mpf_neg(v)
270
+ if n == 0: return mpc_one
271
+ if n == 1: return mpc_pos(z, prec, rnd)
272
+ if n == 2: return mpc_square(z, prec, rnd)
273
+ if n == -1: return mpc_reciprocal(z, prec, rnd)
274
+ if n < 0: return mpc_reciprocal(mpc_pow_int(z, -n, prec+4), prec, rnd)
275
+ asign, aman, aexp, abc = a
276
+ bsign, bman, bexp, bbc = b
277
+ if asign: aman = -aman
278
+ if bsign: bman = -bman
279
+ de = aexp - bexp
280
+ abs_de = abs(de)
281
+ exact_size = n*(abs_de + max(abc, bbc))
282
+ if exact_size < 10000:
283
+ if de > 0:
284
+ aman <<= de
285
+ aexp = bexp
286
+ else:
287
+ bman <<= (-de)
288
+ bexp = aexp
289
+ re, im = complex_int_pow(aman, bman, n)
290
+ re = from_man_exp(re, int(n*aexp), prec, rnd)
291
+ im = from_man_exp(im, int(n*bexp), prec, rnd)
292
+ return re, im
293
+ return mpc_exp(mpc_mul_int(mpc_log(z, prec+10), n, prec+10), prec, rnd)
294
+
295
+ def mpc_sqrt(z, prec, rnd=round_fast):
296
+ """Complex square root (principal branch).
297
+
298
+ We have sqrt(a+bi) = sqrt((r+a)/2) + b/sqrt(2*(r+a))*i where
299
+ r = abs(a+bi), when a+bi is not a negative real number."""
300
+ a, b = z
301
+ if b == fzero:
302
+ if a == fzero:
303
+ return (a, b)
304
+ # When a+bi is a negative real number, we get a real sqrt times i
305
+ if a[0]:
306
+ im = mpf_sqrt(mpf_neg(a), prec, rnd)
307
+ return (fzero, im)
308
+ else:
309
+ re = mpf_sqrt(a, prec, rnd)
310
+ return (re, fzero)
311
+ wp = prec+20
312
+ if not a[0]: # case a positive
313
+ t = mpf_add(mpc_abs((a, b), wp), a, wp) # t = abs(a+bi) + a
314
+ u = mpf_shift(t, -1) # u = t/2
315
+ re = mpf_sqrt(u, prec, rnd) # re = sqrt(u)
316
+ v = mpf_shift(t, 1) # v = 2*t
317
+ w = mpf_sqrt(v, wp) # w = sqrt(v)
318
+ im = mpf_div(b, w, prec, rnd) # im = b / w
319
+ else: # case a negative
320
+ t = mpf_sub(mpc_abs((a, b), wp), a, wp) # t = abs(a+bi) - a
321
+ u = mpf_shift(t, -1) # u = t/2
322
+ im = mpf_sqrt(u, prec, rnd) # im = sqrt(u)
323
+ v = mpf_shift(t, 1) # v = 2*t
324
+ w = mpf_sqrt(v, wp) # w = sqrt(v)
325
+ re = mpf_div(b, w, prec, rnd) # re = b/w
326
+ if b[0]:
327
+ re = mpf_neg(re)
328
+ im = mpf_neg(im)
329
+ return re, im
330
+
331
+ def mpc_nthroot_fixed(a, b, n, prec):
332
+ # a, b signed integers at fixed precision prec
333
+ start = 50
334
+ a1 = int(rshift(a, prec - n*start))
335
+ b1 = int(rshift(b, prec - n*start))
336
+ try:
337
+ r = (a1 + 1j * b1)**(1.0/n)
338
+ re = r.real
339
+ im = r.imag
340
+ re = MPZ(int(re))
341
+ im = MPZ(int(im))
342
+ except OverflowError:
343
+ a1 = from_int(a1, start)
344
+ b1 = from_int(b1, start)
345
+ fn = from_int(n)
346
+ nth = mpf_rdiv_int(1, fn, start)
347
+ re, im = mpc_pow((a1, b1), (nth, fzero), start)
348
+ re = to_int(re)
349
+ im = to_int(im)
350
+ extra = 10
351
+ prevp = start
352
+ extra1 = n
353
+ for p in giant_steps(start, prec+extra):
354
+ # this is slow for large n, unlike int_pow_fixed
355
+ re2, im2 = complex_int_pow(re, im, n-1)
356
+ re2 = rshift(re2, (n-1)*prevp - p - extra1)
357
+ im2 = rshift(im2, (n-1)*prevp - p - extra1)
358
+ r4 = (re2*re2 + im2*im2) >> (p + extra1)
359
+ ap = rshift(a, prec - p)
360
+ bp = rshift(b, prec - p)
361
+ rec = (ap * re2 + bp * im2) >> p
362
+ imc = (-ap * im2 + bp * re2) >> p
363
+ reb = (rec << p) // r4
364
+ imb = (imc << p) // r4
365
+ re = (reb + (n-1)*lshift(re, p-prevp))//n
366
+ im = (imb + (n-1)*lshift(im, p-prevp))//n
367
+ prevp = p
368
+ return re, im
369
+
370
+ def mpc_nthroot(z, n, prec, rnd=round_fast):
371
+ """
372
+ Complex n-th root.
373
+
374
+ Use Newton method as in the real case when it is faster,
375
+ otherwise use z**(1/n)
376
+ """
377
+ a, b = z
378
+ if a[0] == 0 and b == fzero:
379
+ re = mpf_nthroot(a, n, prec, rnd)
380
+ return (re, fzero)
381
+ if n < 2:
382
+ if n == 0:
383
+ return mpc_one
384
+ if n == 1:
385
+ return mpc_pos((a, b), prec, rnd)
386
+ if n == -1:
387
+ return mpc_div(mpc_one, (a, b), prec, rnd)
388
+ inverse = mpc_nthroot((a, b), -n, prec+5, reciprocal_rnd[rnd])
389
+ return mpc_div(mpc_one, inverse, prec, rnd)
390
+ if n <= 20:
391
+ prec2 = int(1.2 * (prec + 10))
392
+ asign, aman, aexp, abc = a
393
+ bsign, bman, bexp, bbc = b
394
+ pf = mpc_abs((a,b), prec)
395
+ if pf[-2] + pf[-1] > -10 and pf[-2] + pf[-1] < prec:
396
+ af = to_fixed(a, prec2)
397
+ bf = to_fixed(b, prec2)
398
+ re, im = mpc_nthroot_fixed(af, bf, n, prec2)
399
+ extra = 10
400
+ re = from_man_exp(re, -prec2-extra, prec2, rnd)
401
+ im = from_man_exp(im, -prec2-extra, prec2, rnd)
402
+ return re, im
403
+ fn = from_int(n)
404
+ prec2 = prec+10 + 10
405
+ nth = mpf_rdiv_int(1, fn, prec2)
406
+ re, im = mpc_pow((a, b), (nth, fzero), prec2, rnd)
407
+ re = normalize(re[0], re[1], re[2], re[3], prec, rnd)
408
+ im = normalize(im[0], im[1], im[2], im[3], prec, rnd)
409
+ return re, im
410
+
411
+ def mpc_cbrt(z, prec, rnd=round_fast):
412
+ """
413
+ Complex cubic root.
414
+ """
415
+ return mpc_nthroot(z, 3, prec, rnd)
416
+
417
+ def mpc_exp(z, prec, rnd=round_fast):
418
+ """
419
+ Complex exponential function.
420
+
421
+ We use the direct formula exp(a+bi) = exp(a) * (cos(b) + sin(b)*i)
422
+ for the computation. This formula is very nice because it is
423
+ pefectly stable; since we just do real multiplications, the only
424
+ numerical errors that can creep in are single-ulp rounding errors.
425
+
426
+ The formula is efficient since mpmath's real exp is quite fast and
427
+ since we can compute cos and sin simultaneously.
428
+
429
+ It is no problem if a and b are large; if the implementations of
430
+ exp/cos/sin are accurate and efficient for all real numbers, then
431
+ so is this function for all complex numbers.
432
+ """
433
+ a, b = z
434
+ if a == fzero:
435
+ return mpf_cos_sin(b, prec, rnd)
436
+ if b == fzero:
437
+ return mpf_exp(a, prec, rnd), fzero
438
+ mag = mpf_exp(a, prec+4, rnd)
439
+ c, s = mpf_cos_sin(b, prec+4, rnd)
440
+ re = mpf_mul(mag, c, prec, rnd)
441
+ im = mpf_mul(mag, s, prec, rnd)
442
+ return re, im
443
+
444
+ def mpc_log(z, prec, rnd=round_fast):
445
+ re = mpf_log_hypot(z[0], z[1], prec, rnd)
446
+ im = mpc_arg(z, prec, rnd)
447
+ return re, im
448
+
449
+ def mpc_cos(z, prec, rnd=round_fast):
450
+ """Complex cosine. The formula used is cos(a+bi) = cos(a)*cosh(b) -
451
+ sin(a)*sinh(b)*i.
452
+
453
+ The same comments apply as for the complex exp: only real
454
+ multiplications are pewrormed, so no cancellation errors are
455
+ possible. The formula is also efficient since we can compute both
456
+ pairs (cos, sin) and (cosh, sinh) in single stwps."""
457
+ a, b = z
458
+ if b == fzero:
459
+ return mpf_cos(a, prec, rnd), fzero
460
+ if a == fzero:
461
+ return mpf_cosh(b, prec, rnd), fzero
462
+ wp = prec + 6
463
+ c, s = mpf_cos_sin(a, wp)
464
+ ch, sh = mpf_cosh_sinh(b, wp)
465
+ re = mpf_mul(c, ch, prec, rnd)
466
+ im = mpf_mul(s, sh, prec, rnd)
467
+ return re, mpf_neg(im)
468
+
469
+ def mpc_sin(z, prec, rnd=round_fast):
470
+ """Complex sine. We have sin(a+bi) = sin(a)*cosh(b) +
471
+ cos(a)*sinh(b)*i. See the docstring for mpc_cos for additional
472
+ comments."""
473
+ a, b = z
474
+ if b == fzero:
475
+ return mpf_sin(a, prec, rnd), fzero
476
+ if a == fzero:
477
+ return fzero, mpf_sinh(b, prec, rnd)
478
+ wp = prec + 6
479
+ c, s = mpf_cos_sin(a, wp)
480
+ ch, sh = mpf_cosh_sinh(b, wp)
481
+ re = mpf_mul(s, ch, prec, rnd)
482
+ im = mpf_mul(c, sh, prec, rnd)
483
+ return re, im
484
+
485
+ def mpc_tan(z, prec, rnd=round_fast):
486
+ """Complex tangent. Computed as tan(a+bi) = sin(2a)/M + sinh(2b)/M*i
487
+ where M = cos(2a) + cosh(2b)."""
488
+ a, b = z
489
+ asign, aman, aexp, abc = a
490
+ bsign, bman, bexp, bbc = b
491
+ if b == fzero: return mpf_tan(a, prec, rnd), fzero
492
+ if a == fzero: return fzero, mpf_tanh(b, prec, rnd)
493
+ wp = prec + 15
494
+ a = mpf_shift(a, 1)
495
+ b = mpf_shift(b, 1)
496
+ c, s = mpf_cos_sin(a, wp)
497
+ ch, sh = mpf_cosh_sinh(b, wp)
498
+ # TODO: handle cancellation when c ~= -1 and ch ~= 1
499
+ mag = mpf_add(c, ch, wp)
500
+ re = mpf_div(s, mag, prec, rnd)
501
+ im = mpf_div(sh, mag, prec, rnd)
502
+ return re, im
503
+
504
+ def mpc_cos_pi(z, prec, rnd=round_fast):
505
+ a, b = z
506
+ if b == fzero:
507
+ return mpf_cos_pi(a, prec, rnd), fzero
508
+ b = mpf_mul(b, mpf_pi(prec+5), prec+5)
509
+ if a == fzero:
510
+ return mpf_cosh(b, prec, rnd), fzero
511
+ wp = prec + 6
512
+ c, s = mpf_cos_sin_pi(a, wp)
513
+ ch, sh = mpf_cosh_sinh(b, wp)
514
+ re = mpf_mul(c, ch, prec, rnd)
515
+ im = mpf_mul(s, sh, prec, rnd)
516
+ return re, mpf_neg(im)
517
+
518
+ def mpc_sin_pi(z, prec, rnd=round_fast):
519
+ a, b = z
520
+ if b == fzero:
521
+ return mpf_sin_pi(a, prec, rnd), fzero
522
+ b = mpf_mul(b, mpf_pi(prec+5), prec+5)
523
+ if a == fzero:
524
+ return fzero, mpf_sinh(b, prec, rnd)
525
+ wp = prec + 6
526
+ c, s = mpf_cos_sin_pi(a, wp)
527
+ ch, sh = mpf_cosh_sinh(b, wp)
528
+ re = mpf_mul(s, ch, prec, rnd)
529
+ im = mpf_mul(c, sh, prec, rnd)
530
+ return re, im
531
+
532
+ def mpc_cos_sin(z, prec, rnd=round_fast):
533
+ a, b = z
534
+ if a == fzero:
535
+ ch, sh = mpf_cosh_sinh(b, prec, rnd)
536
+ return (ch, fzero), (fzero, sh)
537
+ if b == fzero:
538
+ c, s = mpf_cos_sin(a, prec, rnd)
539
+ return (c, fzero), (s, fzero)
540
+ wp = prec + 6
541
+ c, s = mpf_cos_sin(a, wp)
542
+ ch, sh = mpf_cosh_sinh(b, wp)
543
+ cre = mpf_mul(c, ch, prec, rnd)
544
+ cim = mpf_mul(s, sh, prec, rnd)
545
+ sre = mpf_mul(s, ch, prec, rnd)
546
+ sim = mpf_mul(c, sh, prec, rnd)
547
+ return (cre, mpf_neg(cim)), (sre, sim)
548
+
549
+ def mpc_cos_sin_pi(z, prec, rnd=round_fast):
550
+ a, b = z
551
+ if b == fzero:
552
+ c, s = mpf_cos_sin_pi(a, prec, rnd)
553
+ return (c, fzero), (s, fzero)
554
+ b = mpf_mul(b, mpf_pi(prec+5), prec+5)
555
+ if a == fzero:
556
+ ch, sh = mpf_cosh_sinh(b, prec, rnd)
557
+ return (ch, fzero), (fzero, sh)
558
+ wp = prec + 6
559
+ c, s = mpf_cos_sin_pi(a, wp)
560
+ ch, sh = mpf_cosh_sinh(b, wp)
561
+ cre = mpf_mul(c, ch, prec, rnd)
562
+ cim = mpf_mul(s, sh, prec, rnd)
563
+ sre = mpf_mul(s, ch, prec, rnd)
564
+ sim = mpf_mul(c, sh, prec, rnd)
565
+ return (cre, mpf_neg(cim)), (sre, sim)
566
+
567
+ def mpc_cosh(z, prec, rnd=round_fast):
568
+ """Complex hyperbolic cosine. Computed as cosh(z) = cos(z*i)."""
569
+ a, b = z
570
+ return mpc_cos((b, mpf_neg(a)), prec, rnd)
571
+
572
+ def mpc_sinh(z, prec, rnd=round_fast):
573
+ """Complex hyperbolic sine. Computed as sinh(z) = -i*sin(z*i)."""
574
+ a, b = z
575
+ b, a = mpc_sin((b, a), prec, rnd)
576
+ return a, b
577
+
578
+ def mpc_tanh(z, prec, rnd=round_fast):
579
+ """Complex hyperbolic tangent. Computed as tanh(z) = -i*tan(z*i)."""
580
+ a, b = z
581
+ b, a = mpc_tan((b, a), prec, rnd)
582
+ return a, b
583
+
584
+ # TODO: avoid loss of accuracy
585
+ def mpc_atan(z, prec, rnd=round_fast):
586
+ a, b = z
587
+ # atan(z) = (I/2)*(log(1-I*z) - log(1+I*z))
588
+ # x = 1-I*z = 1 + b - I*a
589
+ # y = 1+I*z = 1 - b + I*a
590
+ wp = prec + 15
591
+ x = mpf_add(fone, b, wp), mpf_neg(a)
592
+ y = mpf_sub(fone, b, wp), a
593
+ l1 = mpc_log(x, wp)
594
+ l2 = mpc_log(y, wp)
595
+ a, b = mpc_sub(l1, l2, prec, rnd)
596
+ # (I/2) * (a+b*I) = (-b/2 + a/2*I)
597
+ v = mpf_neg(mpf_shift(b,-1)), mpf_shift(a,-1)
598
+ # Subtraction at infinity gives correct real part but
599
+ # wrong imaginary part (should be zero)
600
+ if v[1] == fnan and mpc_is_inf(z):
601
+ v = (v[0], fzero)
602
+ return v
603
+
604
+ beta_crossover = from_float(0.6417)
605
+ alpha_crossover = from_float(1.5)
606
+
607
+ def acos_asin(z, prec, rnd, n):
608
+ """ complex acos for n = 0, asin for n = 1
609
+ The algorithm is described in
610
+ T.E. Hull, T.F. Fairgrieve and P.T.P. Tang
611
+ 'Implementing the Complex Arcsine and Arcosine Functions
612
+ using Exception Handling',
613
+ ACM Trans. on Math. Software Vol. 23 (1997), p299
614
+ The complex acos and asin can be defined as
615
+ acos(z) = acos(beta) - I*sign(a)* log(alpha + sqrt(alpha**2 -1))
616
+ asin(z) = asin(beta) + I*sign(a)* log(alpha + sqrt(alpha**2 -1))
617
+ where z = a + I*b
618
+ alpha = (1/2)*(r + s); beta = (1/2)*(r - s) = a/alpha
619
+ r = sqrt((a+1)**2 + y**2); s = sqrt((a-1)**2 + y**2)
620
+ These expressions are rewritten in different ways in different
621
+ regions, delimited by two crossovers alpha_crossover and beta_crossover,
622
+ and by abs(a) <= 1, in order to improve the numerical accuracy.
623
+ """
624
+ a, b = z
625
+ wp = prec + 10
626
+ # special cases with real argument
627
+ if b == fzero:
628
+ am = mpf_sub(fone, mpf_abs(a), wp)
629
+ # case abs(a) <= 1
630
+ if not am[0]:
631
+ if n == 0:
632
+ return mpf_acos(a, prec, rnd), fzero
633
+ else:
634
+ return mpf_asin(a, prec, rnd), fzero
635
+ # cases abs(a) > 1
636
+ else:
637
+ # case a < -1
638
+ if a[0]:
639
+ pi = mpf_pi(prec, rnd)
640
+ c = mpf_acosh(mpf_neg(a), prec, rnd)
641
+ if n == 0:
642
+ return pi, mpf_neg(c)
643
+ else:
644
+ return mpf_neg(mpf_shift(pi, -1)), c
645
+ # case a > 1
646
+ else:
647
+ c = mpf_acosh(a, prec, rnd)
648
+ if n == 0:
649
+ return fzero, c
650
+ else:
651
+ pi = mpf_pi(prec, rnd)
652
+ return mpf_shift(pi, -1), mpf_neg(c)
653
+ asign = bsign = 0
654
+ if a[0]:
655
+ a = mpf_neg(a)
656
+ asign = 1
657
+ if b[0]:
658
+ b = mpf_neg(b)
659
+ bsign = 1
660
+ am = mpf_sub(fone, a, wp)
661
+ ap = mpf_add(fone, a, wp)
662
+ r = mpf_hypot(ap, b, wp)
663
+ s = mpf_hypot(am, b, wp)
664
+ alpha = mpf_shift(mpf_add(r, s, wp), -1)
665
+ beta = mpf_div(a, alpha, wp)
666
+ b2 = mpf_mul(b,b, wp)
667
+ # case beta <= beta_crossover
668
+ if not mpf_sub(beta_crossover, beta, wp)[0]:
669
+ if n == 0:
670
+ re = mpf_acos(beta, wp)
671
+ else:
672
+ re = mpf_asin(beta, wp)
673
+ else:
674
+ # to compute the real part in this region use the identity
675
+ # asin(beta) = atan(beta/sqrt(1-beta**2))
676
+ # beta/sqrt(1-beta**2) = (alpha + a) * (alpha - a)
677
+ # alpha + a is numerically accurate; alpha - a can have
678
+ # cancellations leading to numerical inaccuracies, so rewrite
679
+ # it in differente ways according to the region
680
+ Ax = mpf_add(alpha, a, wp)
681
+ # case a <= 1
682
+ if not am[0]:
683
+ # c = b*b/(r + (a+1)); d = (s + (1-a))
684
+ # alpha - a = (1/2)*(c + d)
685
+ # case n=0: re = atan(sqrt((1/2) * Ax * (c + d))/a)
686
+ # case n=1: re = atan(a/sqrt((1/2) * Ax * (c + d)))
687
+ c = mpf_div(b2, mpf_add(r, ap, wp), wp)
688
+ d = mpf_add(s, am, wp)
689
+ re = mpf_shift(mpf_mul(Ax, mpf_add(c, d, wp), wp), -1)
690
+ if n == 0:
691
+ re = mpf_atan(mpf_div(mpf_sqrt(re, wp), a, wp), wp)
692
+ else:
693
+ re = mpf_atan(mpf_div(a, mpf_sqrt(re, wp), wp), wp)
694
+ else:
695
+ # c = Ax/(r + (a+1)); d = Ax/(s - (1-a))
696
+ # alpha - a = (1/2)*(c + d)
697
+ # case n = 0: re = atan(b*sqrt(c + d)/2/a)
698
+ # case n = 1: re = atan(a/(b*sqrt(c + d)/2)
699
+ c = mpf_div(Ax, mpf_add(r, ap, wp), wp)
700
+ d = mpf_div(Ax, mpf_sub(s, am, wp), wp)
701
+ re = mpf_shift(mpf_add(c, d, wp), -1)
702
+ re = mpf_mul(b, mpf_sqrt(re, wp), wp)
703
+ if n == 0:
704
+ re = mpf_atan(mpf_div(re, a, wp), wp)
705
+ else:
706
+ re = mpf_atan(mpf_div(a, re, wp), wp)
707
+ # to compute alpha + sqrt(alpha**2 - 1), if alpha <= alpha_crossover
708
+ # replace it with 1 + Am1 + sqrt(Am1*(alpha+1)))
709
+ # where Am1 = alpha -1
710
+ # if alpha <= alpha_crossover:
711
+ if not mpf_sub(alpha_crossover, alpha, wp)[0]:
712
+ c1 = mpf_div(b2, mpf_add(r, ap, wp), wp)
713
+ # case a < 1
714
+ if mpf_neg(am)[0]:
715
+ # Am1 = (1/2) * (b*b/(r + (a+1)) + b*b/(s + (1-a))
716
+ c2 = mpf_add(s, am, wp)
717
+ c2 = mpf_div(b2, c2, wp)
718
+ Am1 = mpf_shift(mpf_add(c1, c2, wp), -1)
719
+ else:
720
+ # Am1 = (1/2) * (b*b/(r + (a+1)) + (s - (1-a)))
721
+ c2 = mpf_sub(s, am, wp)
722
+ Am1 = mpf_shift(mpf_add(c1, c2, wp), -1)
723
+ # im = log(1 + Am1 + sqrt(Am1*(alpha+1)))
724
+ im = mpf_mul(Am1, mpf_add(alpha, fone, wp), wp)
725
+ im = mpf_log(mpf_add(fone, mpf_add(Am1, mpf_sqrt(im, wp), wp), wp), wp)
726
+ else:
727
+ # im = log(alpha + sqrt(alpha*alpha - 1))
728
+ im = mpf_sqrt(mpf_sub(mpf_mul(alpha, alpha, wp), fone, wp), wp)
729
+ im = mpf_log(mpf_add(alpha, im, wp), wp)
730
+ if asign:
731
+ if n == 0:
732
+ re = mpf_sub(mpf_pi(wp), re, wp)
733
+ else:
734
+ re = mpf_neg(re)
735
+ if not bsign and n == 0:
736
+ im = mpf_neg(im)
737
+ if bsign and n == 1:
738
+ im = mpf_neg(im)
739
+ re = normalize(re[0], re[1], re[2], re[3], prec, rnd)
740
+ im = normalize(im[0], im[1], im[2], im[3], prec, rnd)
741
+ return re, im
742
+
743
+ def mpc_acos(z, prec, rnd=round_fast):
744
+ return acos_asin(z, prec, rnd, 0)
745
+
746
+ def mpc_asin(z, prec, rnd=round_fast):
747
+ return acos_asin(z, prec, rnd, 1)
748
+
749
+ def mpc_asinh(z, prec, rnd=round_fast):
750
+ # asinh(z) = I * asin(-I z)
751
+ a, b = z
752
+ a, b = mpc_asin((b, mpf_neg(a)), prec, rnd)
753
+ return mpf_neg(b), a
754
+
755
+ def mpc_acosh(z, prec, rnd=round_fast):
756
+ # acosh(z) = -I * acos(z) for Im(acos(z)) <= 0
757
+ # +I * acos(z) otherwise
758
+ a, b = mpc_acos(z, prec, rnd)
759
+ if b[0] or b == fzero:
760
+ return mpf_neg(b), a
761
+ else:
762
+ return b, mpf_neg(a)
763
+
764
+ def mpc_atanh(z, prec, rnd=round_fast):
765
+ # atanh(z) = (log(1+z)-log(1-z))/2
766
+ wp = prec + 15
767
+ a = mpc_add(z, mpc_one, wp)
768
+ b = mpc_sub(mpc_one, z, wp)
769
+ a = mpc_log(a, wp)
770
+ b = mpc_log(b, wp)
771
+ v = mpc_shift(mpc_sub(a, b, wp), -1)
772
+ # Subtraction at infinity gives correct imaginary part but
773
+ # wrong real part (should be zero)
774
+ if v[0] == fnan and mpc_is_inf(z):
775
+ v = (fzero, v[1])
776
+ return v
777
+
778
+ def mpc_fibonacci(z, prec, rnd=round_fast):
779
+ re, im = z
780
+ if im == fzero:
781
+ return (mpf_fibonacci(re, prec, rnd), fzero)
782
+ size = max(abs(re[2]+re[3]), abs(re[2]+re[3]))
783
+ wp = prec + size + 20
784
+ a = mpf_phi(wp)
785
+ b = mpf_add(mpf_shift(a, 1), fnone, wp)
786
+ u = mpc_pow((a, fzero), z, wp)
787
+ v = mpc_cos_pi(z, wp)
788
+ v = mpc_div(v, u, wp)
789
+ u = mpc_sub(u, v, wp)
790
+ u = mpc_div_mpf(u, b, prec, rnd)
791
+ return u
792
+
793
+ def mpf_expj(x, prec, rnd='f'):
794
+ raise ComplexResult
795
+
796
+ def mpc_expj(z, prec, rnd='f'):
797
+ re, im = z
798
+ if im == fzero:
799
+ return mpf_cos_sin(re, prec, rnd)
800
+ if re == fzero:
801
+ return mpf_exp(mpf_neg(im), prec, rnd), fzero
802
+ ey = mpf_exp(mpf_neg(im), prec+10)
803
+ c, s = mpf_cos_sin(re, prec+10)
804
+ re = mpf_mul(ey, c, prec, rnd)
805
+ im = mpf_mul(ey, s, prec, rnd)
806
+ return re, im
807
+
808
+ def mpf_expjpi(x, prec, rnd='f'):
809
+ raise ComplexResult
810
+
811
+ def mpc_expjpi(z, prec, rnd='f'):
812
+ re, im = z
813
+ if im == fzero:
814
+ return mpf_cos_sin_pi(re, prec, rnd)
815
+ sign, man, exp, bc = im
816
+ wp = prec+10
817
+ if man:
818
+ wp += max(0, exp+bc)
819
+ im = mpf_neg(mpf_mul(mpf_pi(wp), im, wp))
820
+ if re == fzero:
821
+ return mpf_exp(im, prec, rnd), fzero
822
+ ey = mpf_exp(im, prec+10)
823
+ c, s = mpf_cos_sin_pi(re, prec+10)
824
+ re = mpf_mul(ey, c, prec, rnd)
825
+ im = mpf_mul(ey, s, prec, rnd)
826
+ return re, im
827
+
828
+
829
+ if BACKEND == 'sage':
830
+ try:
831
+ import sage.libs.mpmath.ext_libmp as _lbmp
832
+ mpc_exp = _lbmp.mpc_exp
833
+ mpc_sqrt = _lbmp.mpc_sqrt
834
+ except (ImportError, AttributeError):
835
+ print("Warning: Sage imports in libmpc failed")
tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_functions2.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c96a8c60ccaff3dbe94603afb496582f94300b3dd5c8ec016ff0c7e71f975baf
3
+ size 172649
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/__init__.py ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/Openacc/cupti_openacc.h ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2017 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #include <cuda_stdint.h>
51
+
52
+ #if !defined(_CUPTI_OPENACC_H_)
53
+ #define _CUPTI_OPENACC_H_
54
+
55
+ #ifndef CUPTIAPI
56
+ #ifdef _WIN32
57
+ #define CUPTIAPI __stdcall
58
+ #else
59
+ #define CUPTIAPI
60
+ #endif
61
+ #endif
62
+
63
+ #if defined(__LP64__)
64
+ #define CUPTILP64 1
65
+ #elif defined(_WIN64)
66
+ #define CUPTILP64 1
67
+ #else
68
+ #undef CUPTILP64
69
+ #endif
70
+
71
+ #if defined(__cplusplus)
72
+ extern "C" {
73
+ #endif
74
+
75
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
76
+ #pragma GCC visibility push(default)
77
+ #endif
78
+
79
+ /**
80
+ * \brief Initialize OpenACC support
81
+ *
82
+ * \param profRegister function of type acc_prof_reg as obtained from acc_register_library
83
+ * \param profUnregister function of type acc_prof_reg as obtained from acc_register_library
84
+ * \param profLookup function of type acc_prof_lookup as obtained from acc_register_library
85
+ */
86
+ CUptiResult CUPTIAPI
87
+ cuptiOpenACCInitialize(void *profRegister, void *profUnregister, void *profLookup);
88
+
89
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
90
+ #pragma GCC visibility pop
91
+ #endif
92
+
93
+ #if defined(__cplusplus)
94
+ }
95
+ #endif
96
+
97
+ #endif /*_CUPTI_OPENACC_H_*/
98
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_activity.h ADDED
The diff for this file is too large to render. See raw diff
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_events.h ADDED
@@ -0,0 +1,1371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2021 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_EVENTS_H_)
51
+ #define _CUPTI_EVENTS_H_
52
+
53
+ #include <cuda.h>
54
+ #include <string.h>
55
+ #include <cuda_stdint.h>
56
+ #include <cupti_result.h>
57
+
58
+ #ifndef CUPTIAPI
59
+ #ifdef _WIN32
60
+ #define CUPTIAPI __stdcall
61
+ #else
62
+ #define CUPTIAPI
63
+ #endif
64
+ #endif
65
+
66
+ #if defined(__cplusplus)
67
+ extern "C" {
68
+ #endif
69
+
70
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
71
+ #pragma GCC visibility push(default)
72
+ #endif
73
+
74
+ /**
75
+ * \defgroup CUPTI_EVENT_API CUPTI Event API
76
+ * Functions, types, and enums that implement the CUPTI Event API.
77
+ *
78
+ * \note CUPTI event API from the header cupti_events.h are not supported on devices
79
+ * with compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
80
+ * These API will be deprecated in a future CUDA release. These are replaced by
81
+ * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
82
+ * in the headers nvperf_host.h and nvperf_target.h which are supported on
83
+ * devices with compute capability 7.0 and higher (i.e. Volta and later GPU
84
+ * architectures).
85
+ *
86
+ * @{
87
+ */
88
+
89
+ /**
90
+ * \brief ID for an event.
91
+ *
92
+ * An event represents a countable activity, action, or occurrence on
93
+ * the device.
94
+ */
95
+ typedef uint32_t CUpti_EventID;
96
+
97
+ /**
98
+ * \brief ID for an event domain.
99
+ *
100
+ * ID for an event domain. An event domain represents a group of
101
+ * related events. A device may have multiple instances of a domain,
102
+ * indicating that the device can simultaneously record multiple
103
+ * instances of each event within that domain.
104
+ */
105
+ typedef uint32_t CUpti_EventDomainID;
106
+
107
+ /**
108
+ * \brief A group of events.
109
+ *
110
+ * An event group is a collection of events that are managed
111
+ * together. All events in an event group must belong to the same
112
+ * domain.
113
+ */
114
+ typedef void *CUpti_EventGroup;
115
+
116
+ /**
117
+ * \brief Device class.
118
+ *
119
+ * Enumeration of device classes for device attribute
120
+ * CUPTI_DEVICE_ATTR_DEVICE_CLASS.
121
+ */
122
+ typedef enum {
123
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_TESLA = 0,
124
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_QUADRO = 1,
125
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_GEFORCE = 2,
126
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS_TEGRA = 3,
127
+ } CUpti_DeviceAttributeDeviceClass;
128
+
129
+ /**
130
+ * \brief Device attributes.
131
+ *
132
+ * CUPTI device attributes. These attributes can be read using \ref
133
+ * cuptiDeviceGetAttribute.
134
+ */
135
+ typedef enum {
136
+ /**
137
+ * Number of event IDs for a device. Value is a uint32_t.
138
+ */
139
+ CUPTI_DEVICE_ATTR_MAX_EVENT_ID = 1,
140
+ /**
141
+ * Number of event domain IDs for a device. Value is a uint32_t.
142
+ */
143
+ CUPTI_DEVICE_ATTR_MAX_EVENT_DOMAIN_ID = 2,
144
+ /**
145
+ * Get global memory bandwidth in Kbytes/sec. Value is a uint64_t.
146
+ */
147
+ CUPTI_DEVICE_ATTR_GLOBAL_MEMORY_BANDWIDTH = 3,
148
+ /**
149
+ * Get theoretical maximum number of instructions per cycle. Value
150
+ * is a uint32_t.
151
+ */
152
+ CUPTI_DEVICE_ATTR_INSTRUCTION_PER_CYCLE = 4,
153
+ /**
154
+ * Get theoretical maximum number of single precision instructions
155
+ * that can be executed per second. Value is a uint64_t.
156
+ */
157
+ CUPTI_DEVICE_ATTR_INSTRUCTION_THROUGHPUT_SINGLE_PRECISION = 5,
158
+ /**
159
+ * Get number of frame buffers for device. Value is a uint64_t.
160
+ */
161
+ CUPTI_DEVICE_ATTR_MAX_FRAME_BUFFERS = 6,
162
+ /**
163
+ * Get PCIE link rate in Mega bits/sec for device. Return 0 if bus-type
164
+ * is non-PCIE. Value is a uint64_t.
165
+ */
166
+ CUPTI_DEVICE_ATTR_PCIE_LINK_RATE = 7,
167
+ /**
168
+ * Get PCIE link width for device. Return 0 if bus-type
169
+ * is non-PCIE. Value is a uint64_t.
170
+ */
171
+ CUPTI_DEVICE_ATTR_PCIE_LINK_WIDTH = 8,
172
+ /**
173
+ * Get PCIE generation for device. Return 0 if bus-type
174
+ * is non-PCIE. Value is a uint64_t.
175
+ */
176
+ CUPTI_DEVICE_ATTR_PCIE_GEN = 9,
177
+ /**
178
+ * Get the class for the device. Value is a
179
+ * CUpti_DeviceAttributeDeviceClass.
180
+ */
181
+ CUPTI_DEVICE_ATTR_DEVICE_CLASS = 10,
182
+ /**
183
+ * Get the peak single precision flop per cycle. Value is a uint64_t.
184
+ */
185
+ CUPTI_DEVICE_ATTR_FLOP_SP_PER_CYCLE = 11,
186
+ /**
187
+ * Get the peak double precision flop per cycle. Value is a uint64_t.
188
+ */
189
+ CUPTI_DEVICE_ATTR_FLOP_DP_PER_CYCLE = 12,
190
+ /**
191
+ * Get number of L2 units. Value is a uint64_t.
192
+ */
193
+ CUPTI_DEVICE_ATTR_MAX_L2_UNITS = 13,
194
+ /**
195
+ * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_SHARED
196
+ * preference. Value is a uint64_t.
197
+ */
198
+ CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_SHARED = 14,
199
+ /**
200
+ * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_L1
201
+ * preference. Value is a uint64_t.
202
+ */
203
+ CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_L1 = 15,
204
+ /**
205
+ * Get the maximum shared memory for the CU_FUNC_CACHE_PREFER_EQUAL
206
+ * preference. Value is a uint64_t.
207
+ */
208
+ CUPTI_DEVICE_ATTR_MAX_SHARED_MEMORY_CACHE_CONFIG_PREFER_EQUAL = 16,
209
+ /**
210
+ * Get the peak half precision flop per cycle. Value is a uint64_t.
211
+ */
212
+ CUPTI_DEVICE_ATTR_FLOP_HP_PER_CYCLE = 17,
213
+ /**
214
+ * Check if Nvlink is connected to device. Returns 1, if at least one
215
+ * Nvlink is connected to the device, returns 0 otherwise.
216
+ * Value is a uint32_t.
217
+ */
218
+ CUPTI_DEVICE_ATTR_NVLINK_PRESENT = 18,
219
+ /**
220
+ * Check if Nvlink is present between GPU and CPU. Returns Bandwidth,
221
+ * in Bytes/sec, if Nvlink is present, returns 0 otherwise.
222
+ * Value is a uint64_t.
223
+ */
224
+ CUPTI_DEVICE_ATTR_GPU_CPU_NVLINK_BW = 19,
225
+ /**
226
+ * Check if NVSwitch is present in the underlying topology.
227
+ * Returns 1, if present, returns 0 otherwise.
228
+ * Value is a uint32_t.
229
+ */
230
+ CUPTI_DEVICE_ATTR_NVSWITCH_PRESENT = 20,
231
+ CUPTI_DEVICE_ATTR_FORCE_INT = 0x7fffffff,
232
+ } CUpti_DeviceAttribute;
233
+
234
+ /**
235
+ * \brief Event domain attributes.
236
+ *
237
+ * Event domain attributes. Except where noted, all the attributes can
238
+ * be read using either \ref cuptiDeviceGetEventDomainAttribute or
239
+ * \ref cuptiEventDomainGetAttribute.
240
+ */
241
+ typedef enum {
242
+ /**
243
+ * Event domain name. Value is a null terminated const c-string.
244
+ */
245
+ CUPTI_EVENT_DOMAIN_ATTR_NAME = 0,
246
+ /**
247
+ * Number of instances of the domain for which event counts will be
248
+ * collected. The domain may have additional instances that cannot
249
+ * be profiled (see CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT).
250
+ * Can be read only with \ref
251
+ * cuptiDeviceGetEventDomainAttribute. Value is a uint32_t.
252
+ */
253
+ CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT = 1,
254
+ /**
255
+ * Total number of instances of the domain, including instances that
256
+ * cannot be profiled. Use CUPTI_EVENT_DOMAIN_ATTR_INSTANCE_COUNT
257
+ * to get the number of instances that can be profiled. Can be read
258
+ * only with \ref cuptiDeviceGetEventDomainAttribute. Value is a
259
+ * uint32_t.
260
+ */
261
+ CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT = 3,
262
+ /**
263
+ * Collection method used for events contained in the event domain.
264
+ * Value is a \ref CUpti_EventCollectionMethod.
265
+ */
266
+ CUPTI_EVENT_DOMAIN_ATTR_COLLECTION_METHOD = 4,
267
+
268
+ CUPTI_EVENT_DOMAIN_ATTR_FORCE_INT = 0x7fffffff,
269
+ } CUpti_EventDomainAttribute;
270
+
271
+ /**
272
+ * \brief The collection method used for an event.
273
+ *
274
+ * The collection method indicates how an event is collected.
275
+ */
276
+ typedef enum {
277
+ /**
278
+ * Event is collected using a hardware global performance monitor.
279
+ */
280
+ CUPTI_EVENT_COLLECTION_METHOD_PM = 0,
281
+ /**
282
+ * Event is collected using a hardware SM performance monitor.
283
+ */
284
+ CUPTI_EVENT_COLLECTION_METHOD_SM = 1,
285
+ /**
286
+ * Event is collected using software instrumentation.
287
+ */
288
+ CUPTI_EVENT_COLLECTION_METHOD_INSTRUMENTED = 2,
289
+ /**
290
+ * Event is collected using NvLink throughput counter method.
291
+ */
292
+ CUPTI_EVENT_COLLECTION_METHOD_NVLINK_TC = 3,
293
+ CUPTI_EVENT_COLLECTION_METHOD_FORCE_INT = 0x7fffffff
294
+ } CUpti_EventCollectionMethod;
295
+
296
+ /**
297
+ * \brief Event group attributes.
298
+ *
299
+ * Event group attributes. These attributes can be read using \ref
300
+ * cuptiEventGroupGetAttribute. Attributes marked [rw] can also be
301
+ * written using \ref cuptiEventGroupSetAttribute.
302
+ */
303
+ typedef enum {
304
+ /**
305
+ * The domain to which the event group is bound. This attribute is
306
+ * set when the first event is added to the group. Value is a
307
+ * CUpti_EventDomainID.
308
+ */
309
+ CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID = 0,
310
+ /**
311
+ * [rw] Profile all the instances of the domain for this
312
+ * eventgroup. This feature can be used to get load balancing
313
+ * across all instances of a domain. Value is an integer.
314
+ */
315
+ CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES = 1,
316
+ /**
317
+ * [rw] Reserved for user data.
318
+ */
319
+ CUPTI_EVENT_GROUP_ATTR_USER_DATA = 2,
320
+ /**
321
+ * Number of events in the group. Value is a uint32_t.
322
+ */
323
+ CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS = 3,
324
+ /**
325
+ * Enumerates events in the group. Value is a pointer to buffer of
326
+ * size sizeof(CUpti_EventID) * num_of_events in the eventgroup.
327
+ * num_of_events can be queried using
328
+ * CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS.
329
+ */
330
+ CUPTI_EVENT_GROUP_ATTR_EVENTS = 4,
331
+ /**
332
+ * Number of instances of the domain bound to this event group that
333
+ * will be counted. Value is a uint32_t.
334
+ */
335
+ CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT = 5,
336
+ /**
337
+ * Event group scope can be set to CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
338
+ * CUPTI_EVENT_PROFILING_SCOPE_CONTEXT for an eventGroup, before
339
+ * adding any event.
340
+ * Sets the scope of eventgroup as CUPTI_EVENT_PROFILING_SCOPE_DEVICE or
341
+ * CUPTI_EVENT_PROFILING_SCOPE_CONTEXT when the scope of the events
342
+ * that will be added is CUPTI_EVENT_PROFILING_SCOPE_BOTH.
343
+ * If profiling scope of event is either
344
+ * CUPTI_EVENT_PROFILING_SCOPE_DEVICE or CUPTI_EVENT_PROFILING_SCOPE_CONTEXT
345
+ * then setting this attribute will not affect the default scope.
346
+ * It is not allowed to add events of different scope to same eventgroup.
347
+ * Value is a uint32_t.
348
+ */
349
+ CUPTI_EVENT_GROUP_ATTR_PROFILING_SCOPE = 6,
350
+ CUPTI_EVENT_GROUP_ATTR_FORCE_INT = 0x7fffffff,
351
+ } CUpti_EventGroupAttribute;
352
+
353
+ /**
354
+ * \brief Profiling scope for event.
355
+ *
356
+ * Profiling scope of event indicates if the event can be collected at context
357
+ * scope or device scope or both i.e. it can be collected at any of context or
358
+ * device scope.
359
+ */
360
+ typedef enum {
361
+ /**
362
+ * Event is collected at context scope.
363
+ */
364
+ CUPTI_EVENT_PROFILING_SCOPE_CONTEXT = 0,
365
+ /**
366
+ * Event is collected at device scope.
367
+ */
368
+ CUPTI_EVENT_PROFILING_SCOPE_DEVICE = 1,
369
+ /**
370
+ * Event can be collected at device or context scope.
371
+ * The scope can be set using \ref cuptiEventGroupSetAttribute API.
372
+ */
373
+ CUPTI_EVENT_PROFILING_SCOPE_BOTH = 2,
374
+ CUPTI_EVENT_PROFILING_SCOPE_FORCE_INT = 0x7fffffff
375
+ } CUpti_EventProfilingScope;
376
+
377
+ /**
378
+ * \brief Event attributes.
379
+ *
380
+ * Event attributes. These attributes can be read using \ref
381
+ * cuptiEventGetAttribute.
382
+ */
383
+ typedef enum {
384
+ /**
385
+ * Event name. Value is a null terminated const c-string.
386
+ */
387
+ CUPTI_EVENT_ATTR_NAME = 0,
388
+ /**
389
+ * Short description of event. Value is a null terminated const
390
+ * c-string.
391
+ */
392
+ CUPTI_EVENT_ATTR_SHORT_DESCRIPTION = 1,
393
+ /**
394
+ * Long description of event. Value is a null terminated const
395
+ * c-string.
396
+ */
397
+ CUPTI_EVENT_ATTR_LONG_DESCRIPTION = 2,
398
+ /**
399
+ * Category of event. Value is CUpti_EventCategory.
400
+ */
401
+ CUPTI_EVENT_ATTR_CATEGORY = 3,
402
+ /**
403
+ * Profiling scope of the events. It can be either device or context or both.
404
+ * Value is a \ref CUpti_EventProfilingScope.
405
+ */
406
+ CUPTI_EVENT_ATTR_PROFILING_SCOPE = 5,
407
+
408
+ CUPTI_EVENT_ATTR_FORCE_INT = 0x7fffffff,
409
+ } CUpti_EventAttribute;
410
+
411
+ /**
412
+ * \brief Event collection modes.
413
+ *
414
+ * The event collection mode determines the period over which the
415
+ * events within the enabled event groups will be collected.
416
+ */
417
+ typedef enum {
418
+ /**
419
+ * Events are collected for the entire duration between the
420
+ * cuptiEventGroupEnable and cuptiEventGroupDisable calls.
421
+ * Event values are reset when the events are read.
422
+ * For CUDA toolkit v6.0 and older this was the default mode.
423
+ */
424
+ CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS = 0,
425
+ /**
426
+ * Events are collected only for the durations of kernel executions
427
+ * that occur between the cuptiEventGroupEnable and
428
+ * cuptiEventGroupDisable calls. Event collection begins when a
429
+ * kernel execution begins, and stops when kernel execution
430
+ * completes. Event values are reset to zero when each kernel
431
+ * execution begins. If multiple kernel executions occur between the
432
+ * cuptiEventGroupEnable and cuptiEventGroupDisable calls then the
433
+ * event values must be read after each kernel launch if those
434
+ * events need to be associated with the specific kernel launch.
435
+ * Note that collection in this mode may significantly change the
436
+ * overall performance characteristics of the application because
437
+ * kernel executions that occur between the cuptiEventGroupEnable and
438
+ * cuptiEventGroupDisable calls are serialized on the GPU.
439
+ * This is the default mode from CUDA toolkit v6.5
440
+ */
441
+ CUPTI_EVENT_COLLECTION_MODE_KERNEL = 1,
442
+ CUPTI_EVENT_COLLECTION_MODE_FORCE_INT = 0x7fffffff
443
+ } CUpti_EventCollectionMode;
444
+
445
+ /**
446
+ * \brief An event category.
447
+ *
448
+ * Each event is assigned to a category that represents the general
449
+ * type of the event. A event's category is accessed using \ref
450
+ * cuptiEventGetAttribute and the CUPTI_EVENT_ATTR_CATEGORY attribute.
451
+ */
452
+ typedef enum {
453
+ /**
454
+ * An instruction related event.
455
+ */
456
+ CUPTI_EVENT_CATEGORY_INSTRUCTION = 0,
457
+ /**
458
+ * A memory related event.
459
+ */
460
+ CUPTI_EVENT_CATEGORY_MEMORY = 1,
461
+ /**
462
+ * A cache related event.
463
+ */
464
+ CUPTI_EVENT_CATEGORY_CACHE = 2,
465
+ /**
466
+ * A profile-trigger event.
467
+ */
468
+ CUPTI_EVENT_CATEGORY_PROFILE_TRIGGER = 3,
469
+ /**
470
+ * A system event.
471
+ */
472
+ CUPTI_EVENT_CATEGORY_SYSTEM = 4,
473
+ CUPTI_EVENT_CATEGORY_FORCE_INT = 0x7fffffff
474
+ } CUpti_EventCategory;
475
+
476
+ /**
477
+ * \brief The overflow value for a CUPTI event.
478
+ *
479
+ * The CUPTI event value that indicates an overflow.
480
+ */
481
+ #define CUPTI_EVENT_OVERFLOW ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
482
+
483
+ /**
484
+ * \brief The value that indicates the event value is invalid
485
+ */
486
+ #define CUPTI_EVENT_INVALID ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
487
+
488
+ /**
489
+ * \brief Flags for cuptiEventGroupReadEvent an
490
+ * cuptiEventGroupReadAllEvents.
491
+ *
492
+ * Flags for \ref cuptiEventGroupReadEvent an \ref
493
+ * cuptiEventGroupReadAllEvents.
494
+ */
495
+ typedef enum {
496
+ /**
497
+ * No flags.
498
+ */
499
+ CUPTI_EVENT_READ_FLAG_NONE = 0,
500
+ CUPTI_EVENT_READ_FLAG_FORCE_INT = 0x7fffffff,
501
+ } CUpti_ReadEventFlags;
502
+
503
+
504
+ /**
505
+ * \brief A set of event groups.
506
+ *
507
+ * A set of event groups. When returned by \ref
508
+ * cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
509
+ * a set indicates that event groups that can be enabled at the same
510
+ * time (i.e. all the events in the set can be collected
511
+ * simultaneously).
512
+ */
513
+ typedef struct {
514
+ /**
515
+ * The number of event groups in the set.
516
+ */
517
+ uint32_t numEventGroups;
518
+ /**
519
+ * An array of \p numEventGroups event groups.
520
+ */
521
+ CUpti_EventGroup *eventGroups;
522
+ } CUpti_EventGroupSet;
523
+
524
+ /**
525
+ * \brief A set of event group sets.
526
+ *
527
+ * A set of event group sets. When returned by \ref
528
+ * cuptiEventGroupSetsCreate and \ref cuptiMetricCreateEventGroupSets
529
+ * a CUpti_EventGroupSets indicates the number of passes required to
530
+ * collect all the events, and the event groups that should be
531
+ * collected during each pass.
532
+ */
533
+ typedef struct {
534
+ /**
535
+ * Number of event group sets.
536
+ */
537
+ uint32_t numSets;
538
+ /**
539
+ * An array of \p numSets event group sets.
540
+ */
541
+ CUpti_EventGroupSet *sets;
542
+ } CUpti_EventGroupSets;
543
+
544
+ /**
545
+ * \brief Set the event collection mode.
546
+ *
547
+ * Set the event collection mode for a \p context. The \p mode
548
+ * controls the event collection behavior of all events in event
549
+ * groups created in the \p context. This API is invalid in kernel
550
+ * replay mode.
551
+ * \note \b Thread-safety: this function is thread safe.
552
+ *
553
+ * \param context The context
554
+ * \param mode The event collection mode
555
+ *
556
+ * \retval CUPTI_SUCCESS
557
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
558
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
559
+ * \retval CUPTI_ERROR_INVALID_OPERATION if called when replay mode is enabled
560
+ * \retval CUPTI_ERROR_NOT_SUPPORTED if mode is not supported on the device
561
+ */
562
+
563
+ CUptiResult CUPTIAPI cuptiSetEventCollectionMode(CUcontext context,
564
+ CUpti_EventCollectionMode mode);
565
+
566
+ /**
567
+ * \brief Read a device attribute.
568
+ *
569
+ * Read a device attribute and return it in \p *value.
570
+ * \note \b Thread-safety: this function is thread safe.
571
+ *
572
+ * \param device The CUDA device
573
+ * \param attrib The attribute to read
574
+ * \param valueSize Size of buffer pointed by the value, and
575
+ * returns the number of bytes written to \p value
576
+ * \param value Returns the value of the attribute
577
+ *
578
+ * \retval CUPTI_SUCCESS
579
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
580
+ * \retval CUPTI_ERROR_INVALID_DEVICE
581
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
582
+ * is NULL, or if \p attrib is not a device attribute
583
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
584
+ * attribute values, indicates that the \p value buffer is too small
585
+ * to hold the attribute value.
586
+ */
587
+ CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device,
588
+ CUpti_DeviceAttribute attrib,
589
+ size_t *valueSize,
590
+ void *value);
591
+
592
+ /**
593
+ * \brief Read a device timestamp.
594
+ *
595
+ * Returns the device timestamp in \p *timestamp. The timestamp is
596
+ * reported in nanoseconds and indicates the time since the device was
597
+ * last reset.
598
+ * \note \b Thread-safety: this function is thread safe.
599
+ *
600
+ * \param context A context on the device from which to get the timestamp
601
+ * \param timestamp Returns the device timestamp
602
+ *
603
+ * \retval CUPTI_SUCCESS
604
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
605
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
606
+ * \retval CUPTI_ERROR_INVALID_PARAMETER is \p timestamp is NULL
607
+
608
+ * **DEPRECATED** This API is deprecated as of CUDA 11.3
609
+ */
610
+ CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context,
611
+ uint64_t *timestamp);
612
+
613
+ /**
614
+ * \brief Get the number of domains for a device.
615
+ *
616
+ * Returns the number of domains in \p numDomains for a device.
617
+ * \note \b Thread-safety: this function is thread safe.
618
+ *
619
+ * \param device The CUDA device
620
+ * \param numDomains Returns the number of domains
621
+ *
622
+ * \retval CUPTI_SUCCESS
623
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
624
+ * \retval CUPTI_ERROR_INVALID_DEVICE
625
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
626
+ */
627
+ CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device,
628
+ uint32_t *numDomains);
629
+
630
+ /**
631
+ * \brief Get the event domains for a device.
632
+ *
633
+ * Returns the event domains IDs in \p domainArray for a device. The
634
+ * size of the \p domainArray buffer is given by \p
635
+ * *arraySizeBytes. The size of the \p domainArray buffer must be at
636
+ * least \p numdomains * sizeof(CUpti_EventDomainID) or else all
637
+ * domains will not be returned. The value returned in \p
638
+ * *arraySizeBytes contains the number of bytes returned in \p
639
+ * domainArray.
640
+ * \note \b Thread-safety: this function is thread safe.
641
+ *
642
+ * \param device The CUDA device
643
+ * \param arraySizeBytes The size of \p domainArray in bytes, and
644
+ * returns the number of bytes written to \p domainArray
645
+ * \param domainArray Returns the IDs of the event domains for the device
646
+ *
647
+ * \retval CUPTI_SUCCESS
648
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
649
+ * \retval CUPTI_ERROR_INVALID_DEVICE
650
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
651
+ * \p domainArray are NULL
652
+ */
653
+ CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains(CUdevice device,
654
+ size_t *arraySizeBytes,
655
+ CUpti_EventDomainID *domainArray);
656
+
657
+ /**
658
+ * \brief Read an event domain attribute.
659
+ *
660
+ * Returns an event domain attribute in \p *value. The size of the \p
661
+ * value buffer is given by \p *valueSize. The value returned in \p
662
+ * *valueSize contains the number of bytes returned in \p value.
663
+ *
664
+ * If the attribute value is a c-string that is longer than \p
665
+ * *valueSize, then only the first \p *valueSize characters will be
666
+ * returned and there will be no terminating null byte.
667
+ * \note \b Thread-safety: this function is thread safe.
668
+ *
669
+ * \param device The CUDA device
670
+ * \param eventDomain ID of the event domain
671
+ * \param attrib The event domain attribute to read
672
+ * \param valueSize The size of the \p value buffer in bytes, and
673
+ * returns the number of bytes written to \p value
674
+ * \param value Returns the attribute's value
675
+ *
676
+ * \retval CUPTI_SUCCESS
677
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
678
+ * \retval CUPTI_ERROR_INVALID_DEVICE
679
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
680
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
681
+ * is NULL, or if \p attrib is not an event domain attribute
682
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
683
+ * attribute values, indicates that the \p value buffer is too small
684
+ * to hold the attribute value.
685
+ */
686
+ CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute(CUdevice device,
687
+ CUpti_EventDomainID eventDomain,
688
+ CUpti_EventDomainAttribute attrib,
689
+ size_t *valueSize,
690
+ void *value);
691
+
692
+ /**
693
+ * \brief Get the number of event domains available on any device.
694
+ *
695
+ * Returns the total number of event domains available on any
696
+ * CUDA-capable device.
697
+ * \note \b Thread-safety: this function is thread safe.
698
+ *
699
+ * \param numDomains Returns the number of domains
700
+ *
701
+ * \retval CUPTI_SUCCESS
702
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numDomains is NULL
703
+ */
704
+ CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t *numDomains);
705
+
706
+ /**
707
+ * \brief Get the event domains available on any device.
708
+ *
709
+ * Returns all the event domains available on any CUDA-capable device.
710
+ * Event domain IDs are returned in \p domainArray. The size of the \p
711
+ * domainArray buffer is given by \p *arraySizeBytes. The size of the
712
+ * \p domainArray buffer must be at least \p numDomains *
713
+ * sizeof(CUpti_EventDomainID) or all domains will not be
714
+ * returned. The value returned in \p *arraySizeBytes contains the
715
+ * number of bytes returned in \p domainArray.
716
+ * \note \b Thread-safety: this function is thread safe.
717
+ *
718
+ * \param arraySizeBytes The size of \p domainArray in bytes, and
719
+ * returns the number of bytes written to \p domainArray
720
+ * \param domainArray Returns all the event domains
721
+ *
722
+ * \retval CUPTI_SUCCESS
723
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or
724
+ * \p domainArray are NULL
725
+ */
726
+ CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t *arraySizeBytes,
727
+ CUpti_EventDomainID *domainArray);
728
+
729
+ /**
730
+ * \brief Read an event domain attribute.
731
+ *
732
+ * Returns an event domain attribute in \p *value. The size of the \p
733
+ * value buffer is given by \p *valueSize. The value returned in \p
734
+ * *valueSize contains the number of bytes returned in \p value.
735
+ *
736
+ * If the attribute value is a c-string that is longer than \p
737
+ * *valueSize, then only the first \p *valueSize characters will be
738
+ * returned and there will be no terminating null byte.
739
+ * \note \b Thread-safety: this function is thread safe.
740
+ *
741
+ * \param eventDomain ID of the event domain
742
+ * \param attrib The event domain attribute to read
743
+ * \param valueSize The size of the \p value buffer in bytes, and
744
+ * returns the number of bytes written to \p value
745
+ * \param value Returns the attribute's value
746
+ *
747
+ * \retval CUPTI_SUCCESS
748
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
749
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
750
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
751
+ * is NULL, or if \p attrib is not an event domain attribute
752
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
753
+ * attribute values, indicates that the \p value buffer is too small
754
+ * to hold the attribute value.
755
+ */
756
+ CUptiResult CUPTIAPI cuptiEventDomainGetAttribute(CUpti_EventDomainID eventDomain,
757
+ CUpti_EventDomainAttribute attrib,
758
+ size_t *valueSize,
759
+ void *value);
760
+
761
+ /**
762
+ * \brief Get number of events in a domain.
763
+ *
764
+ * Returns the number of events in \p numEvents for a domain.
765
+ * \note \b Thread-safety: this function is thread safe.
766
+ *
767
+ * \param eventDomain ID of the event domain
768
+ * \param numEvents Returns the number of events in the domain
769
+ *
770
+ * \retval CUPTI_SUCCESS
771
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
772
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
773
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p numEvents is NULL
774
+ */
775
+ CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents(CUpti_EventDomainID eventDomain,
776
+ uint32_t *numEvents);
777
+
778
+ /**
779
+ * \brief Get the events in a domain.
780
+ *
781
+ * Returns the event IDs in \p eventArray for a domain. The size of
782
+ * the \p eventArray buffer is given by \p *arraySizeBytes. The size
783
+ * of the \p eventArray buffer must be at least \p numdomainevents *
784
+ * sizeof(CUpti_EventID) or else all events will not be returned. The
785
+ * value returned in \p *arraySizeBytes contains the number of bytes
786
+ * returned in \p eventArray.
787
+ * \note \b Thread-safety: this function is thread safe.
788
+ *
789
+ * \param eventDomain ID of the event domain
790
+ * \param arraySizeBytes The size of \p eventArray in bytes, and
791
+ * returns the number of bytes written to \p eventArray
792
+ * \param eventArray Returns the IDs of the events in the domain
793
+ *
794
+ * \retval CUPTI_SUCCESS
795
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
796
+ * \retval CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID
797
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p arraySizeBytes or \p
798
+ * eventArray are NULL
799
+ */
800
+ CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain,
801
+ size_t *arraySizeBytes,
802
+ CUpti_EventID *eventArray);
803
+
804
+ /**
805
+ * \brief Get an event attribute.
806
+ *
807
+ * Returns an event attribute in \p *value. The size of the \p
808
+ * value buffer is given by \p *valueSize. The value returned in \p
809
+ * *valueSize contains the number of bytes returned in \p value.
810
+ *
811
+ * If the attribute value is a c-string that is longer than \p
812
+ * *valueSize, then only the first \p *valueSize characters will be
813
+ * returned and there will be no terminating null byte.
814
+ * \note \b Thread-safety: this function is thread safe.
815
+ *
816
+ * \param event ID of the event
817
+ * \param attrib The event attribute to read
818
+ * \param valueSize The size of the \p value buffer in bytes, and
819
+ * returns the number of bytes written to \p value
820
+ * \param value Returns the attribute's value
821
+ *
822
+ * \retval CUPTI_SUCCESS
823
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
824
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
825
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
826
+ * is NULL, or if \p attrib is not an event attribute
827
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
828
+ * attribute values, indicates that the \p value buffer is too small
829
+ * to hold the attribute value.
830
+ */
831
+ CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event,
832
+ CUpti_EventAttribute attrib,
833
+ size_t *valueSize,
834
+ void *value);
835
+
836
+ /**
837
+ * \brief Find an event by name.
838
+ *
839
+ * Find an event by name and return the event ID in \p *event.
840
+ * \note \b Thread-safety: this function is thread safe.
841
+ *
842
+ * \param device The CUDA device
843
+ * \param eventName The name of the event to find
844
+ * \param event Returns the ID of the found event or undefined if
845
+ * unable to find the event
846
+ *
847
+ * \retval CUPTI_SUCCESS
848
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
849
+ * \retval CUPTI_ERROR_INVALID_DEVICE
850
+ * \retval CUPTI_ERROR_INVALID_EVENT_NAME if unable to find an event
851
+ * with name \p eventName. In this case \p *event is undefined
852
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventName or \p event are NULL
853
+ */
854
+ CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device,
855
+ const char *eventName,
856
+ CUpti_EventID *event);
857
+
858
+ /**
859
+ * \brief Create a new event group for a context.
860
+ *
861
+ * Creates a new event group for \p context and returns the new group
862
+ * in \p *eventGroup.
863
+ * \note \p flags are reserved for future use and should be set to zero.
864
+ * \note \b Thread-safety: this function is thread safe.
865
+ *
866
+ * \param context The context for the event group
867
+ * \param eventGroup Returns the new event group
868
+ * \param flags Reserved - must be zero
869
+ *
870
+ * \retval CUPTI_SUCCESS
871
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
872
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
873
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY
874
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
875
+ */
876
+ CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context,
877
+ CUpti_EventGroup *eventGroup,
878
+ uint32_t flags);
879
+
880
+ /**
881
+ * \brief Destroy an event group.
882
+ *
883
+ * Destroy an \p eventGroup and free its resources. An event group
884
+ * cannot be destroyed if it is enabled.
885
+ * \note \b Thread-safety: this function is thread safe.
886
+ *
887
+ * \param eventGroup The event group to destroy
888
+ *
889
+ * \retval CUPTI_SUCCESS
890
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
891
+ * \retval CUPTI_ERROR_INVALID_OPERATION if the event group is enabled
892
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if eventGroup is NULL
893
+ */
894
+ CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup);
895
+
896
+ /**
897
+ * \brief Read an event group attribute.
898
+ *
899
+ * Read an event group attribute and return it in \p *value.
900
+ * \note \b Thread-safety: this function is thread safe but client
901
+ * must guard against simultaneous destruction or modification of \p
902
+ * eventGroup (for example, client must guard against simultaneous
903
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
904
+ * etc.), and must guard against simultaneous destruction of the
905
+ * context in which \p eventGroup was created (for example, client
906
+ * must guard against simultaneous calls to cudaDeviceReset,
907
+ * cuCtxDestroy, etc.).
908
+ *
909
+ * \param eventGroup The event group
910
+ * \param attrib The attribute to read
911
+ * \param valueSize Size of buffer pointed by the value, and
912
+ * returns the number of bytes written to \p value
913
+ * \param value Returns the value of the attribute
914
+ *
915
+ * \retval CUPTI_SUCCESS
916
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
917
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
918
+ * is NULL, or if \p attrib is not an eventgroup attribute
919
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT For non-c-string
920
+ * attribute values, indicates that the \p value buffer is too small
921
+ * to hold the attribute value.
922
+ */
923
+ CUptiResult CUPTIAPI cuptiEventGroupGetAttribute(CUpti_EventGroup eventGroup,
924
+ CUpti_EventGroupAttribute attrib,
925
+ size_t *valueSize,
926
+ void *value);
927
+
928
+ /**
929
+ * \brief Write an event group attribute.
930
+ *
931
+ * Write an event group attribute.
932
+ * \note \b Thread-safety: this function is thread safe.
933
+ *
934
+ * \param eventGroup The event group
935
+ * \param attrib The attribute to write
936
+ * \param valueSize The size, in bytes, of the value
937
+ * \param value The attribute value to write
938
+ *
939
+ * \retval CUPTI_SUCCESS
940
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
941
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p valueSize or \p value
942
+ * is NULL, or if \p attrib is not an event group attribute, or if
943
+ * \p attrib is not a writable attribute
944
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT Indicates that
945
+ * the \p value buffer is too small to hold the attribute value.
946
+ */
947
+ CUptiResult CUPTIAPI cuptiEventGroupSetAttribute(CUpti_EventGroup eventGroup,
948
+ CUpti_EventGroupAttribute attrib,
949
+ size_t valueSize,
950
+ void *value);
951
+
952
+ /**
953
+ * \brief Add an event to an event group.
954
+ *
955
+ * Add an event to an event group. The event add can fail for a number of reasons:
956
+ * \li The event group is enabled
957
+ * \li The event does not belong to the same event domain as the
958
+ * events that are already in the event group
959
+ * \li Device limitations on the events that can belong to the same group
960
+ * \li The event group is full
961
+ *
962
+ * \note \b Thread-safety: this function is thread safe.
963
+ *
964
+ * \param eventGroup The event group
965
+ * \param event The event to add to the group
966
+ *
967
+ * \retval CUPTI_SUCCESS
968
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
969
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
970
+ * \retval CUPTI_ERROR_OUT_OF_MEMORY
971
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
972
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p event belongs to a
973
+ * different event domain than the events already in \p eventGroup, or
974
+ * if a device limitation prevents \p event from being collected at
975
+ * the same time as the events already in \p eventGroup
976
+ * \retval CUPTI_ERROR_MAX_LIMIT_REACHED if \p eventGroup is full
977
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
978
+ */
979
+ CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup,
980
+ CUpti_EventID event);
981
+
982
+ /**
983
+ * \brief Remove an event from an event group.
984
+ *
985
+ * Remove \p event from the an event group. The event cannot be
986
+ * removed if the event group is enabled.
987
+ * \note \b Thread-safety: this function is thread safe.
988
+ *
989
+ * \param eventGroup The event group
990
+ * \param event The event to remove from the group
991
+ *
992
+ * \retval CUPTI_SUCCESS
993
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
994
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
995
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
996
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
997
+ */
998
+ CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup,
999
+ CUpti_EventID event);
1000
+
1001
+ /**
1002
+ * \brief Remove all events from an event group.
1003
+ *
1004
+ * Remove all events from an event group. Events cannot be removed if
1005
+ * the event group is enabled.
1006
+ * \note \b Thread-safety: this function is thread safe.
1007
+ *
1008
+ * \param eventGroup The event group
1009
+ *
1010
+ * \retval CUPTI_SUCCESS
1011
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1012
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is enabled
1013
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1014
+ */
1015
+ CUptiResult CUPTIAPI cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup);
1016
+
1017
+ /**
1018
+ * \brief Zero all the event counts in an event group.
1019
+ *
1020
+ * Zero all the event counts in an event group.
1021
+ * \note \b Thread-safety: this function is thread safe but client
1022
+ * must guard against simultaneous destruction or modification of \p
1023
+ * eventGroup (for example, client must guard against simultaneous
1024
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
1025
+ * etc.), and must guard against simultaneous destruction of the
1026
+ * context in which \p eventGroup was created (for example, client
1027
+ * must guard against simultaneous calls to cudaDeviceReset,
1028
+ * cuCtxDestroy, etc.).
1029
+ *
1030
+ * \param eventGroup The event group
1031
+ *
1032
+ * \retval CUPTI_SUCCESS
1033
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1034
+ * \retval CUPTI_ERROR_HARDWARE
1035
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1036
+ */
1037
+ CUptiResult CUPTIAPI cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup);
1038
+
1039
+ /**
1040
+ * \brief Enable an event group.
1041
+ *
1042
+ * Enable an event group. Enabling an event group zeros the value of
1043
+ * all the events in the group and then starts collection of those
1044
+ * events.
1045
+ * \note \b Thread-safety: this function is thread safe.
1046
+ *
1047
+ * \param eventGroup The event group
1048
+ *
1049
+ * \retval CUPTI_SUCCESS
1050
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1051
+ * \retval CUPTI_ERROR_HARDWARE
1052
+ * \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
1053
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
1054
+ * enabled due to other already enabled event groups
1055
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1056
+ * \retval CUPTI_ERROR_HARDWARE_BUSY if another client is profiling
1057
+ * and hardware is busy
1058
+ */
1059
+ CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup);
1060
+
1061
+ /**
1062
+ * \brief Disable an event group.
1063
+ *
1064
+ * Disable an event group. Disabling an event group stops collection
1065
+ * of events contained in the group.
1066
+ * \note \b Thread-safety: this function is thread safe.
1067
+ *
1068
+ * \param eventGroup The event group
1069
+ *
1070
+ * \retval CUPTI_SUCCESS
1071
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1072
+ * \retval CUPTI_ERROR_HARDWARE
1073
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup is NULL
1074
+ */
1075
+ CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup);
1076
+
1077
+ /**
1078
+ * \brief Read the value for an event in an event group.
1079
+ *
1080
+ * Read the value for an event in an event group. The event value is
1081
+ * returned in the \p eventValueBuffer buffer. \p
1082
+ * eventValueBufferSizeBytes indicates the size of the \p
1083
+ * eventValueBuffer buffer. The buffer must be at least sizeof(uint64)
1084
+ * if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set
1085
+ * on the group containing the event. The buffer must be at least
1086
+ * (sizeof(uint64) * number of domain instances) if
1087
+ * ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is set on the
1088
+ * group.
1089
+ *
1090
+ * If any instance of an event counter overflows, the value returned
1091
+ * for that event instance will be ::CUPTI_EVENT_OVERFLOW.
1092
+ *
1093
+ * The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
1094
+ *
1095
+ * Reading an event from a disabled event group is not allowed. After
1096
+ * being read, an event's value is reset to zero.
1097
+ * \note \b Thread-safety: this function is thread safe but client
1098
+ * must guard against simultaneous destruction or modification of \p
1099
+ * eventGroup (for example, client must guard against simultaneous
1100
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
1101
+ * etc.), and must guard against simultaneous destruction of the
1102
+ * context in which \p eventGroup was created (for example, client
1103
+ * must guard against simultaneous calls to cudaDeviceReset,
1104
+ * cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
1105
+ * called simultaneously with this function, then returned event
1106
+ * values are undefined.
1107
+ *
1108
+ * \param eventGroup The event group
1109
+ * \param flags Flags controlling the reading mode
1110
+ * \param event The event to read
1111
+ * \param eventValueBufferSizeBytes The size of \p eventValueBuffer
1112
+ * in bytes, and returns the number of bytes written to \p
1113
+ * eventValueBuffer
1114
+ * \param eventValueBuffer Returns the event value(s)
1115
+ *
1116
+ * \retval CUPTI_SUCCESS
1117
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1118
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
1119
+ * \retval CUPTI_ERROR_HARDWARE
1120
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
1121
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
1122
+ * eventValueBufferSizeBytes or \p eventValueBuffer is NULL
1123
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
1124
+ * is not sufficient
1125
+ */
1126
+ CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup,
1127
+ CUpti_ReadEventFlags flags,
1128
+ CUpti_EventID event,
1129
+ size_t *eventValueBufferSizeBytes,
1130
+ uint64_t *eventValueBuffer);
1131
+
1132
+ /**
1133
+ * \brief Read the values for all the events in an event group.
1134
+ *
1135
+ * Read the values for all the events in an event group. The event
1136
+ * values are returned in the \p eventValueBuffer buffer. \p
1137
+ * eventValueBufferSizeBytes indicates the size of \p
1138
+ * eventValueBuffer. The buffer must be at least (sizeof(uint64) *
1139
+ * number of events in group) if
1140
+ * ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is not set on
1141
+ * the group containing the events. The buffer must be at least
1142
+ * (sizeof(uint64) * number of domain instances * number of events in
1143
+ * group) if ::CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES is
1144
+ * set on the group.
1145
+ *
1146
+ * The data format returned in \p eventValueBuffer is:
1147
+ * - domain instance 0: event0 event1 ... eventN
1148
+ * - domain instance 1: event0 event1 ... eventN
1149
+ * - ...
1150
+ * - domain instance M: event0 event1 ... eventN
1151
+ *
1152
+ * The event order in \p eventValueBuffer is returned in \p
1153
+ * eventIdArray. The size of \p eventIdArray is specified in \p
1154
+ * eventIdArraySizeBytes. The size should be at least
1155
+ * (sizeof(CUpti_EventID) * number of events in group).
1156
+ *
1157
+ * If any instance of any event counter overflows, the value returned
1158
+ * for that event instance will be ::CUPTI_EVENT_OVERFLOW.
1159
+ *
1160
+ * The only allowed value for \p flags is ::CUPTI_EVENT_READ_FLAG_NONE.
1161
+ *
1162
+ * Reading events from a disabled event group is not allowed. After
1163
+ * being read, an event's value is reset to zero.
1164
+ * \note \b Thread-safety: this function is thread safe but client
1165
+ * must guard against simultaneous destruction or modification of \p
1166
+ * eventGroup (for example, client must guard against simultaneous
1167
+ * calls to \ref cuptiEventGroupDestroy, \ref cuptiEventGroupAddEvent,
1168
+ * etc.), and must guard against simultaneous destruction of the
1169
+ * context in which \p eventGroup was created (for example, client
1170
+ * must guard against simultaneous calls to cudaDeviceReset,
1171
+ * cuCtxDestroy, etc.). If \ref cuptiEventGroupResetAllEvents is
1172
+ * called simultaneously with this function, then returned event
1173
+ * values are undefined.
1174
+ *
1175
+ * \param eventGroup The event group
1176
+ * \param flags Flags controlling the reading mode
1177
+ * \param eventValueBufferSizeBytes The size of \p eventValueBuffer in
1178
+ * bytes, and returns the number of bytes written to \p
1179
+ * eventValueBuffer
1180
+ * \param eventValueBuffer Returns the event values
1181
+ * \param eventIdArraySizeBytes The size of \p eventIdArray in bytes,
1182
+ * and returns the number of bytes written to \p eventIdArray
1183
+ * \param eventIdArray Returns the IDs of the events in the same order
1184
+ * as the values return in eventValueBuffer.
1185
+ * \param numEventIdsRead Returns the number of event IDs returned
1186
+ * in \p eventIdArray
1187
+ *
1188
+ * \retval CUPTI_SUCCESS
1189
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1190
+ * \retval CUPTI_ERROR_HARDWARE
1191
+ * \retval CUPTI_ERROR_INVALID_OPERATION if \p eventGroup is disabled
1192
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroup, \p
1193
+ * eventValueBufferSizeBytes, \p eventValueBuffer, \p
1194
+ * eventIdArraySizeBytes, \p eventIdArray or \p numEventIdsRead is
1195
+ * NULL
1196
+ * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT if size of \p eventValueBuffer
1197
+ * or \p eventIdArray is not sufficient
1198
+ */
1199
+ CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents(CUpti_EventGroup eventGroup,
1200
+ CUpti_ReadEventFlags flags,
1201
+ size_t *eventValueBufferSizeBytes,
1202
+ uint64_t *eventValueBuffer,
1203
+ size_t *eventIdArraySizeBytes,
1204
+ CUpti_EventID *eventIdArray,
1205
+ size_t *numEventIdsRead);
1206
+
1207
+ /**
1208
+ * \brief For a set of events, get the grouping that indicates the
1209
+ * number of passes and the event groups necessary to collect the
1210
+ * events.
1211
+ *
1212
+ * The number of events that can be collected simultaneously varies by
1213
+ * device and by the type of the events. When events can be collected
1214
+ * simultaneously, they may need to be grouped into multiple event
1215
+ * groups because they are from different event domains. This function
1216
+ * takes a set of events and determines how many passes are required
1217
+ * to collect all those events, and which events can be collected
1218
+ * simultaneously in each pass.
1219
+ *
1220
+ * The CUpti_EventGroupSets returned in \p eventGroupPasses indicates
1221
+ * how many passes are required to collect the events with the \p
1222
+ * numSets field. Within each event group set, the \p sets array
1223
+ * indicates the event groups that should be collected on each pass.
1224
+ * \note \b Thread-safety: this function is thread safe, but client
1225
+ * must guard against another thread simultaneously destroying \p
1226
+ * context.
1227
+ *
1228
+ * \param context The context for event collection
1229
+ * \param eventIdArraySizeBytes Size of \p eventIdArray in bytes
1230
+ * \param eventIdArray Array of event IDs that need to be grouped
1231
+ * \param eventGroupPasses Returns a CUpti_EventGroupSets object that
1232
+ * indicates the number of passes required to collect the events and
1233
+ * the events to collect on each pass
1234
+ *
1235
+ * \retval CUPTI_SUCCESS
1236
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1237
+ * \retval CUPTI_ERROR_INVALID_CONTEXT
1238
+ * \retval CUPTI_ERROR_INVALID_EVENT_ID
1239
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventIdArray or
1240
+ * \p eventGroupPasses is NULL
1241
+ */
1242
+ CUptiResult CUPTIAPI cuptiEventGroupSetsCreate(CUcontext context,
1243
+ size_t eventIdArraySizeBytes,
1244
+ CUpti_EventID *eventIdArray,
1245
+ CUpti_EventGroupSets **eventGroupPasses);
1246
+
1247
+ /**
1248
+ * \brief Destroy a event group sets object.
1249
+ *
1250
+ * Destroy a CUpti_EventGroupSets object.
1251
+ * \note \b Thread-safety: this function is thread safe.
1252
+ *
1253
+ * \param eventGroupSets The object to destroy
1254
+ *
1255
+ * \retval CUPTI_SUCCESS
1256
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1257
+ * \retval CUPTI_ERROR_INVALID_OPERATION if any of the event groups
1258
+ * contained in the sets is enabled
1259
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSets is NULL
1260
+ */
1261
+ CUptiResult CUPTIAPI cuptiEventGroupSetsDestroy(CUpti_EventGroupSets *eventGroupSets);
1262
+
1263
+
1264
+ /**
1265
+ * \brief Enable an event group set.
1266
+ *
1267
+ * Enable a set of event groups. Enabling a set of event groups zeros the value of
1268
+ * all the events in all the groups and then starts collection of those events.
1269
+ * \note \b Thread-safety: this function is thread safe.
1270
+ *
1271
+ * \param eventGroupSet The pointer to the event group set
1272
+ *
1273
+ * \retval CUPTI_SUCCESS
1274
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1275
+ * \retval CUPTI_ERROR_HARDWARE
1276
+ * \retval CUPTI_ERROR_NOT_READY if \p eventGroup does not contain any events
1277
+ * \retval CUPTI_ERROR_NOT_COMPATIBLE if \p eventGroup cannot be
1278
+ * enabled due to other already enabled event groups
1279
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
1280
+ * \retval CUPTI_ERROR_HARDWARE_BUSY if other client is profiling and hardware is
1281
+ * busy
1282
+ */
1283
+ CUptiResult CUPTIAPI cuptiEventGroupSetEnable(CUpti_EventGroupSet *eventGroupSet);
1284
+
1285
+ /**
1286
+ * \brief Disable an event group set.
1287
+ *
1288
+ * Disable a set of event groups. Disabling a set of event groups
1289
+ * stops collection of events contained in the groups.
1290
+ * \note \b Thread-safety: this function is thread safe.
1291
+ * \note \b If this call fails, some of the event groups in the set may be disabled
1292
+ * and other event groups may remain enabled.
1293
+ *
1294
+ * \param eventGroupSet The pointer to the event group set
1295
+ * \retval CUPTI_SUCCESS
1296
+ * \retval CUPTI_ERROR_NOT_INITIALIZED
1297
+ * \retval CUPTI_ERROR_HARDWARE
1298
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p eventGroupSet is NULL
1299
+ */
1300
+ CUptiResult CUPTIAPI cuptiEventGroupSetDisable(CUpti_EventGroupSet *eventGroupSet);
1301
+
1302
+ /**
1303
+ * \brief Enable kernel replay mode.
1304
+ *
1305
+ * Set profiling mode for the context to replay mode. In this mode,
1306
+ * any number of events can be collected in one run of the kernel. The
1307
+ * event collection mode will automatically switch to
1308
+ * CUPTI_EVENT_COLLECTION_MODE_KERNEL. In this mode, \ref
1309
+ * cuptiSetEventCollectionMode will return
1310
+ * CUPTI_ERROR_INVALID_OPERATION.
1311
+ * \note \b Kernels might take longer to run if many events are enabled.
1312
+ * \note \b Thread-safety: this function is thread safe.
1313
+ *
1314
+ * \param context The context
1315
+ * \retval CUPTI_SUCCESS
1316
+ */
1317
+ CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context);
1318
+
1319
+ /**
1320
+ * \brief Disable kernel replay mode.
1321
+ *
1322
+ * Set profiling mode for the context to non-replay (default)
1323
+ * mode. Event collection mode will be set to
1324
+ * CUPTI_EVENT_COLLECTION_MODE_KERNEL. All previously enabled
1325
+ * event groups and event group sets will be disabled.
1326
+ * \note \b Thread-safety: this function is thread safe.
1327
+ *
1328
+ * \param context The context
1329
+ * \retval CUPTI_SUCCESS
1330
+ */
1331
+ CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context);
1332
+
1333
+ /**
1334
+ * \brief Function type for getting updates on kernel replay.
1335
+ *
1336
+ * \param kernelName The mangled kernel name
1337
+ * \param numReplaysDone Number of replays done so far
1338
+ * \param customData Pointer of any custom data passed in when subscribing
1339
+ */
1340
+ typedef void (CUPTIAPI *CUpti_KernelReplayUpdateFunc)(
1341
+ const char *kernelName,
1342
+ int numReplaysDone,
1343
+ void *customData);
1344
+
1345
+ /**
1346
+ * \brief Subscribe to kernel replay updates.
1347
+ *
1348
+ * When subscribed, the function pointer passed in will be called each time a
1349
+ * kernel run is finished during kernel replay. Previously subscribed function
1350
+ * pointer will be replaced. Pass in NULL as the function pointer unsubscribes
1351
+ * the update.
1352
+ *
1353
+ * \param updateFunc The update function pointer
1354
+ * \param customData Pointer to any custom data
1355
+ * \retval CUPTI_SUCCESS
1356
+ */
1357
+ CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate(CUpti_KernelReplayUpdateFunc updateFunc, void *customData);
1358
+
1359
+ /** @} */ /* END CUPTI_EVENT_API */
1360
+
1361
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
1362
+ #pragma GCC visibility pop
1363
+ #endif
1364
+
1365
+ #if defined(__cplusplus)
1366
+ }
1367
+ #endif
1368
+
1369
+ #endif /*_CUPTI_EVENTS_H_*/
1370
+
1371
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_pcsampling_util.h ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #if !defined(_CUPTI_PCSAMPLING_UTIL_H_)
2
+ #define _CUPTI_PCSAMPLING_UTIL_H_
3
+
4
+ #include <cupti_pcsampling.h>
5
+ #include <fstream>
6
+
7
+ #ifndef CUPTIUTILAPI
8
+ #ifdef _WIN32
9
+ #define CUPTIUTILAPI __stdcall
10
+ #else
11
+ #define CUPTIUTILAPI
12
+ #endif
13
+ #endif
14
+
15
+ #define ACTIVITY_RECORD_ALIGNMENT 8
16
+ #if defined(_WIN32) // Windows 32- and 64-bit
17
+ #define START_PACKED_ALIGNMENT __pragma(pack(push,1)) // exact fit - no padding
18
+ #define PACKED_ALIGNMENT __declspec(align(ACTIVITY_RECORD_ALIGNMENT))
19
+ #define END_PACKED_ALIGNMENT __pragma(pack(pop))
20
+ #elif defined(__GNUC__) // GCC
21
+ #define START_PACKED_ALIGNMENT
22
+ #define PACKED_ALIGNMENT __attribute__ ((__packed__)) __attribute__ ((aligned (ACTIVITY_RECORD_ALIGNMENT)))
23
+ #define END_PACKED_ALIGNMENT
24
+ #else // all other compilers
25
+ #define START_PACKED_ALIGNMENT
26
+ #define PACKED_ALIGNMENT
27
+ #define END_PACKED_ALIGNMENT
28
+ #endif
29
+
30
+ #ifndef CUPTI_UTIL_STRUCT_SIZE
31
+ #define CUPTI_UTIL_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
32
+ #endif
33
+
34
+ #ifndef CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS
35
+ #define CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(type, member, structSize) \
36
+ (offsetof(type, member) < structSize)
37
+ #endif
38
+
39
+ #if defined(__cplusplus)
40
+ extern "C" {
41
+ #endif
42
+
43
+ #if defined(__GNUC__)
44
+ #pragma GCC visibility push(default)
45
+ #endif
46
+
47
+ namespace CUPTI { namespace PcSamplingUtil {
48
+
49
+ /**
50
+ * \defgroup CUPTI_PCSAMPLING_UTILITY CUPTI PC Sampling Utility API
51
+ * Functions, types, and enums that implement the CUPTI PC Sampling Utility API.
52
+ * @{
53
+ */
54
+
55
+ /**
56
+ * \brief Header info will be stored in file.
57
+ */
58
+ typedef struct PACKED_ALIGNMENT {
59
+ /**
60
+ * Version of file format.
61
+ */
62
+ uint32_t version;
63
+ /**
64
+ * Total number of buffers present in the file.
65
+ */
66
+ uint32_t totalBuffers;
67
+ } Header;
68
+
69
+ /**
70
+ * \brief BufferInfo will be stored in the file for every buffer
71
+ * i.e for every call of UtilDumpPcSamplingBufferInFile() API.
72
+ */
73
+ typedef struct PACKED_ALIGNMENT {
74
+ /**
75
+ * Total number of PC records.
76
+ */
77
+ uint64_t recordCount;
78
+ /**
79
+ * Count of all stall reasons supported on the GPU
80
+ */
81
+ size_t numStallReasons;
82
+ /**
83
+ * Total number of stall reasons in single record.
84
+ */
85
+ uint64_t numSelectedStallReasons;
86
+ /**
87
+ * Buffer size in Bytes.
88
+ */
89
+ uint64_t bufferByteSize;
90
+ } BufferInfo;
91
+
92
+ /**
93
+ * \brief All available stall reasons name and respective indexes
94
+ * will be stored in it.
95
+ */
96
+ typedef struct PACKED_ALIGNMENT {
97
+ /**
98
+ * Number of all available stall reasons
99
+ */
100
+ size_t numStallReasons;
101
+ /**
102
+ * Stall reasons names of all available stall reasons
103
+ */
104
+ char **stallReasons;
105
+ /**
106
+ * Stall reason index of all available stall reasons
107
+ */
108
+ uint32_t *stallReasonIndex;
109
+ } PcSamplingStallReasons;
110
+
111
+ typedef enum {
112
+ /**
113
+ * Invalid buffer type.
114
+ */
115
+ PC_SAMPLING_BUFFER_INVALID = 0,
116
+ /**
117
+ * Refers to CUpti_PCSamplingData buffer.
118
+ */
119
+ PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA = 1
120
+ } PcSamplingBufferType;
121
+
122
+ /**
123
+ * \brief CUPTI PC sampling utility API result codes.
124
+ *
125
+ * Error and result codes returned by CUPTI PC sampling utility API.
126
+ */
127
+ typedef enum {
128
+ /**
129
+ * No error
130
+ */
131
+ CUPTI_UTIL_SUCCESS = 0,
132
+ /**
133
+ * One or more of the parameters are invalid.
134
+ */
135
+ CUPTI_UTIL_ERROR_INVALID_PARAMETER = 1,
136
+ /**
137
+ * Unable to create a new file
138
+ */
139
+ CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE = 2,
140
+ /**
141
+ * Unable to open a file
142
+ */
143
+ CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE = 3,
144
+ /**
145
+ * Read or write operation failed
146
+ */
147
+ CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED = 4,
148
+ /**
149
+ * Provided file handle is corrupted.
150
+ */
151
+ CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED = 5,
152
+ /**
153
+ * seek operation failed.
154
+ */
155
+ CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED = 6,
156
+ /**
157
+ * Unable to allocate enough memory to perform the requested
158
+ * operation.
159
+ */
160
+ CUPTI_UTIL_ERROR_OUT_OF_MEMORY = 7,
161
+ /**
162
+ * An unknown internal error has occurred.
163
+ */
164
+ CUPTI_UTIL_ERROR_UNKNOWN = 999,
165
+ CUPTI_UTIL_ERROR_FORCE_INT = 0x7fffffff
166
+ } CUptiUtilResult;
167
+
168
+ /**
169
+ * \brief Params for \ref CuptiUtilPutPcSampData
170
+ */
171
+ typedef struct {
172
+ /**
173
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
174
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
175
+ * available in the structure. Used to preserve backward compatibility.
176
+ */
177
+ size_t size;
178
+ /**
179
+ * Type of buffer to store in file
180
+ */
181
+ PcSamplingBufferType bufferType;
182
+ /**
183
+ * PC sampling buffer.
184
+ */
185
+ void *pSamplingData;
186
+ /**
187
+ * Number of configured attributes
188
+ */
189
+ size_t numAttributes;
190
+ /**
191
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
192
+ * It is expected to provide configuration details of at least
193
+ * CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute.
194
+ */
195
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
196
+ /**
197
+ * Refer \ref PcSamplingStallReasons.
198
+ */
199
+ PcSamplingStallReasons *pPcSamplingStallReasons;
200
+ /**
201
+ * File name to store buffer into it.
202
+ */
203
+ const char* fileName;
204
+ } CUptiUtil_PutPcSampDataParams;
205
+ #define CUptiUtil_PutPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_PutPcSampDataParams, fileName)
206
+
207
+ /**
208
+ * \brief Dump PC sampling data into the file.
209
+ *
210
+ * This API can be called multiple times.
211
+ * It will append buffer in the file.
212
+ * For every buffer it will store BufferInfo
213
+ * so that before retrieving data it will help to allocate buffer
214
+ * to store retrieved data.
215
+ * This API creates file if file does not present.
216
+ * If stallReasonIndex or stallReasons pointer of \ref CUptiUtil_PutPcSampDataParams is NULL
217
+ * then stall reasons data will not be stored in file.
218
+ * It is expected to store all available stall reason data at least once to refer it during
219
+ * offline correlation.
220
+ *
221
+ * \retval CUPTI_UTIL_SUCCESS
222
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
223
+ * or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details not provided
224
+ * or filename is empty.
225
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE
226
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE
227
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
228
+ */
229
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilPutPcSampData(CUptiUtil_PutPcSampDataParams *pParams);
230
+
231
+ /**
232
+ * \brief Params for \ref CuptiUtilGetHeaderData
233
+ */
234
+ typedef struct {
235
+ /**
236
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
237
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
238
+ * available in the structure. Used to preserve backward compatibility.
239
+ */
240
+ size_t size;
241
+ /**
242
+ * File handle.
243
+ */
244
+ std::ifstream *fileHandler;
245
+ /**
246
+ * Header Info.
247
+ */
248
+ Header headerInfo;
249
+
250
+ } CUptiUtil_GetHeaderDataParams;
251
+ #define CUptiUtil_GetHeaderDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetHeaderDataParams, headerInfo)
252
+
253
+ /**
254
+ * \brief Get header data of file.
255
+ *
256
+ * This API must be called once initially while retrieving data from file.
257
+ * \ref Header structure, it gives info about total number
258
+ * of buffers present in the file.
259
+ *
260
+ * \retval CUPTI_UTIL_SUCCESS
261
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
262
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file
263
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
264
+ */
265
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilGetHeaderData(CUptiUtil_GetHeaderDataParams *pParams);
266
+
267
+ /**
268
+ * \brief Params for \ref CuptiUtilGetBufferInfo
269
+ */
270
+ typedef struct {
271
+ /**
272
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
273
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
274
+ * available in the structure. Used to preserve backward compatibility.
275
+ */
276
+ size_t size;
277
+ /**
278
+ * File handle.
279
+ */
280
+ std::ifstream *fileHandler;
281
+ /**
282
+ * Buffer Info.
283
+ */
284
+ BufferInfo bufferInfoData;
285
+ } CUptiUtil_GetBufferInfoParams;
286
+ #define CUptiUtil_GetBufferInfoParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetBufferInfoParams, bufferInfoData)
287
+
288
+ /**
289
+ * \brief Get buffer info data of file.
290
+ *
291
+ * This API must be called every time before calling CuptiUtilGetPcSampData API.
292
+ * \ref BufferInfo structure, it gives info about recordCount and stallReasonCount
293
+ * of every record in the buffer. This will help to allocate exact buffer to retrieve data into it.
294
+ *
295
+ * \retval CUPTI_UTIL_SUCCESS
296
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
297
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
298
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
299
+ */
300
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilGetBufferInfo(CUptiUtil_GetBufferInfoParams *pParams);
301
+
302
+ /**
303
+ * \brief Params for \ref CuptiUtilGetPcSampData
304
+ */
305
+ typedef struct {
306
+ /**
307
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
308
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
309
+ * available in the structure. Used to preserve backward compatibility.
310
+ */
311
+ size_t size;
312
+ /**
313
+ * File handle.
314
+ */
315
+ std::ifstream *fileHandler;
316
+ /**
317
+ * Type of buffer to store in file
318
+ */
319
+ PcSamplingBufferType bufferType;
320
+ /**
321
+ * Pointer to collected buffer info using \ref CuptiUtilGetBufferInfo
322
+ */
323
+ BufferInfo *pBufferInfoData;
324
+ /**
325
+ * Pointer to allocated memory to store retrieved data from file.
326
+ */
327
+ void *pSamplingData;
328
+ /**
329
+ * Number of configuration attributes
330
+ */
331
+ size_t numAttributes;
332
+ /**
333
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
334
+ */
335
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
336
+ /**
337
+ * Refer \ref PcSamplingStallReasons.
338
+ * For stallReasons field of \ref PcSamplingStallReasons it is expected to
339
+ * allocate memory for each string element of array.
340
+ */
341
+ PcSamplingStallReasons *pPcSamplingStallReasons;
342
+ } CUptiUtil_GetPcSampDataParams;
343
+ #define CUptiUtil_GetPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetPcSampDataParams, pPcSamplingStallReasons)
344
+
345
+ /**
346
+ * \brief Retrieve PC sampling data from file into allocated buffer.
347
+ *
348
+ * This API must be called after CuptiUtilGetBufferInfo API.
349
+ * It will retrieve data from file into allocated buffer.
350
+ *
351
+ * \retval CUPTI_UTIL_SUCCESS
352
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
353
+ * or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then
354
+ * error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL.
355
+ * or filename is empty.
356
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
357
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
358
+ */
359
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilGetPcSampData(CUptiUtil_GetPcSampDataParams *pParams);
360
+
361
+ /**
362
+ * \brief Params for \ref CuptiUtilMergePcSampData
363
+ */
364
+ typedef struct
365
+ {
366
+ /**
367
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
368
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
369
+ * available in the structure. Used to preserve backward compatibility.
370
+ */
371
+ size_t size;
372
+ /**
373
+ * Number of buffers to merge.
374
+ */
375
+ size_t numberOfBuffers;
376
+ /**
377
+ * Pointer to array of buffers to merge
378
+ */
379
+ CUpti_PCSamplingData *PcSampDataBuffer;
380
+ /**
381
+ * Pointer to array of merged buffers as per the range id.
382
+ */
383
+ CUpti_PCSamplingData **MergedPcSampDataBuffers;
384
+ /**
385
+ * Number of merged buffers.
386
+ */
387
+ size_t *numMergedBuffer;
388
+ } CUptiUtil_MergePcSampDataParams;
389
+ #define CUptiUtil_MergePcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_MergePcSampDataParams, numMergedBuffer)
390
+
391
+ /**
392
+ * \brief Merge PC sampling data range id wise.
393
+ *
394
+ * This API merge PC sampling data range id wise.
395
+ * It allocates memory for merged data and fill data in it
396
+ * and provide buffer pointer in MergedPcSampDataBuffers field.
397
+ * It is expected from user to free merge data buffers after use.
398
+ *
399
+ * \retval CUPTI_UTIL_SUCCESS
400
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if param struct size is invalid
401
+ * or count of buffers to merge is invalid i.e less than 1
402
+ * or either of PcSampDataBuffer, MergedPcSampDataBuffers, numMergedBuffer is NULL
403
+ * \retval CUPTI_UTIL_ERROR_OUT_OF_MEMORY Unable to allocate memory for merged buffer.
404
+ */
405
+ CUptiUtilResult CUPTIUTILAPI CuptiUtilMergePcSampData(CUptiUtil_MergePcSampDataParams *pParams);
406
+
407
+ /** @} */ /* END CUPTI_PCSAMPLING_UTILITY */
408
+
409
+ } }
410
+
411
+ #if defined(__GNUC__)
412
+ #pragma GCC visibility pop
413
+ #endif
414
+
415
+ #if defined(__cplusplus)
416
+ }
417
+ #endif
418
+
419
+ #endif
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_result.h ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2021 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_RESULT_H_)
51
+ #define _CUPTI_RESULT_H_
52
+
53
+ #ifndef CUPTIAPI
54
+ #ifdef _WIN32
55
+ #define CUPTIAPI __stdcall
56
+ #else
57
+ #define CUPTIAPI
58
+ #endif
59
+ #endif
60
+
61
+ #if defined(__cplusplus)
62
+ extern "C" {
63
+ #endif
64
+
65
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
66
+ #pragma GCC visibility push(default)
67
+ #endif
68
+
69
+ /**
70
+ * \defgroup CUPTI_RESULT_API CUPTI Result Codes
71
+ * Error and result codes returned by CUPTI functions.
72
+ * @{
73
+ */
74
+
75
+ /**
76
+ * \brief CUPTI result codes.
77
+ *
78
+ * Error and result codes returned by CUPTI functions.
79
+ */
80
+ typedef enum {
81
+ /**
82
+ * No error.
83
+ */
84
+ CUPTI_SUCCESS = 0,
85
+ /**
86
+ * One or more of the parameters is invalid.
87
+ */
88
+ CUPTI_ERROR_INVALID_PARAMETER = 1,
89
+ /**
90
+ * The device does not correspond to a valid CUDA device.
91
+ */
92
+ CUPTI_ERROR_INVALID_DEVICE = 2,
93
+ /**
94
+ * The context is NULL or not valid.
95
+ */
96
+ CUPTI_ERROR_INVALID_CONTEXT = 3,
97
+ /**
98
+ * The event domain id is invalid.
99
+ */
100
+ CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID = 4,
101
+ /**
102
+ * The event id is invalid.
103
+ */
104
+ CUPTI_ERROR_INVALID_EVENT_ID = 5,
105
+ /**
106
+ * The event name is invalid.
107
+ */
108
+ CUPTI_ERROR_INVALID_EVENT_NAME = 6,
109
+ /**
110
+ * The current operation cannot be performed due to dependency on
111
+ * other factors.
112
+ */
113
+ CUPTI_ERROR_INVALID_OPERATION = 7,
114
+ /**
115
+ * Unable to allocate enough memory to perform the requested
116
+ * operation.
117
+ */
118
+ CUPTI_ERROR_OUT_OF_MEMORY = 8,
119
+ /**
120
+ * An error occurred on the performance monitoring hardware.
121
+ */
122
+ CUPTI_ERROR_HARDWARE = 9,
123
+ /**
124
+ * The output buffer size is not sufficient to return all
125
+ * requested data.
126
+ */
127
+ CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT = 10,
128
+ /**
129
+ * API is not implemented.
130
+ */
131
+ CUPTI_ERROR_API_NOT_IMPLEMENTED = 11,
132
+ /**
133
+ * The maximum limit is reached.
134
+ */
135
+ CUPTI_ERROR_MAX_LIMIT_REACHED = 12,
136
+ /**
137
+ * The object is not yet ready to perform the requested operation.
138
+ */
139
+ CUPTI_ERROR_NOT_READY = 13,
140
+ /**
141
+ * The current operation is not compatible with the current state
142
+ * of the object
143
+ */
144
+ CUPTI_ERROR_NOT_COMPATIBLE = 14,
145
+ /**
146
+ * CUPTI is unable to initialize its connection to the CUDA
147
+ * driver.
148
+ */
149
+ CUPTI_ERROR_NOT_INITIALIZED = 15,
150
+ /**
151
+ * The metric id is invalid.
152
+ */
153
+ CUPTI_ERROR_INVALID_METRIC_ID = 16,
154
+ /**
155
+ * The metric name is invalid.
156
+ */
157
+ CUPTI_ERROR_INVALID_METRIC_NAME = 17,
158
+ /**
159
+ * The queue is empty.
160
+ */
161
+ CUPTI_ERROR_QUEUE_EMPTY = 18,
162
+ /**
163
+ * Invalid handle (internal?).
164
+ */
165
+ CUPTI_ERROR_INVALID_HANDLE = 19,
166
+ /**
167
+ * Invalid stream.
168
+ */
169
+ CUPTI_ERROR_INVALID_STREAM = 20,
170
+ /**
171
+ * Invalid kind.
172
+ */
173
+ CUPTI_ERROR_INVALID_KIND = 21,
174
+ /**
175
+ * Invalid event value.
176
+ */
177
+ CUPTI_ERROR_INVALID_EVENT_VALUE = 22,
178
+ /**
179
+ * CUPTI is disabled due to conflicts with other enabled profilers
180
+ */
181
+ CUPTI_ERROR_DISABLED = 23,
182
+ /**
183
+ * Invalid module.
184
+ */
185
+ CUPTI_ERROR_INVALID_MODULE = 24,
186
+ /**
187
+ * Invalid metric value.
188
+ */
189
+ CUPTI_ERROR_INVALID_METRIC_VALUE = 25,
190
+ /**
191
+ * The performance monitoring hardware is in use by other client.
192
+ */
193
+ CUPTI_ERROR_HARDWARE_BUSY = 26,
194
+ /**
195
+ * The attempted operation is not supported on the current
196
+ * system or device.
197
+ */
198
+ CUPTI_ERROR_NOT_SUPPORTED = 27,
199
+ /**
200
+ * Unified memory profiling is not supported on the system.
201
+ * Potential reason could be unsupported OS or architecture.
202
+ */
203
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED = 28,
204
+ /**
205
+ * Unified memory profiling is not supported on the device
206
+ */
207
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_DEVICE = 29,
208
+ /**
209
+ * Unified memory profiling is not supported on a multi-GPU
210
+ * configuration without P2P support between any pair of devices
211
+ */
212
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_ON_NON_P2P_DEVICES = 30,
213
+ /**
214
+ * Unified memory profiling is not supported under the
215
+ * Multi-Process Service (MPS) environment. CUDA 7.5 removes this
216
+ * restriction.
217
+ */
218
+ CUPTI_ERROR_UM_PROFILING_NOT_SUPPORTED_WITH_MPS = 31,
219
+ /**
220
+ * In CUDA 9.0, devices with compute capability 7.0 don't
221
+ * support CDP tracing
222
+ */
223
+ CUPTI_ERROR_CDP_TRACING_NOT_SUPPORTED = 32,
224
+ /**
225
+ * Profiling on virtualized GPU is not supported.
226
+ */
227
+ CUPTI_ERROR_VIRTUALIZED_DEVICE_NOT_SUPPORTED = 33,
228
+ /**
229
+ * Profiling results might be incorrect for CUDA applications
230
+ * compiled with nvcc version older than 9.0 for devices with
231
+ * compute capability 6.0 and 6.1.
232
+ * Profiling session will continue and CUPTI will notify it using this error code.
233
+ * User is advised to recompile the application code with nvcc version 9.0 or later.
234
+ * Ignore this warning if code is already compiled with the recommended nvcc version.
235
+ */
236
+ CUPTI_ERROR_CUDA_COMPILER_NOT_COMPATIBLE = 34,
237
+ /**
238
+ * User doesn't have sufficient privileges which are required to
239
+ * start the profiling session.
240
+ * One possible reason for this may be that the NVIDIA driver or your system
241
+ * administrator may have restricted access to the NVIDIA GPU performance counters.
242
+ * To learn how to resolve this issue and find more information, please visit
243
+ * https://developer.nvidia.com/CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
244
+ */
245
+ CUPTI_ERROR_INSUFFICIENT_PRIVILEGES = 35,
246
+ /**
247
+ * Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
248
+ * metric API from the header cupti_metrics.h are not compatible with the
249
+ * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
250
+ * in the headers nvperf_host.h and nvperf_target.h.
251
+ */
252
+ CUPTI_ERROR_OLD_PROFILER_API_INITIALIZED = 36,
253
+ /**
254
+ * Missing definition of the OpenACC API routine in the linked OpenACC library.
255
+ *
256
+ * One possible reason is that OpenACC library is linked statically in the
257
+ * user application, which might not have the definition of all the OpenACC
258
+ * API routines needed for the OpenACC profiling, as compiler might ignore
259
+ * definitions for the functions not used in the application. This issue
260
+ * can be mitigated by linking the OpenACC library dynamically.
261
+ */
262
+ CUPTI_ERROR_OPENACC_UNDEFINED_ROUTINE = 37,
263
+ /**
264
+ * Legacy CUPTI Profiling API i.e. event API from the header cupti_events.h and
265
+ * metric API from the header cupti_metrics.h are not supported on devices with
266
+ * compute capability 7.5 and higher (i.e. Turing and later GPU architectures).
267
+ * These API will be deprecated in a future CUDA release. These are replaced by
268
+ * Profiling API in the header cupti_profiler_target.h and Perfworks metrics API
269
+ * in the headers nvperf_host.h and nvperf_target.h.
270
+ */
271
+ CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED = 38,
272
+ /**
273
+ * CUPTI doesn't allow multiple callback subscribers. Only a single subscriber
274
+ * can be registered at a time.
275
+ * Same error code is used when application is launched using NVIDIA tools
276
+ * like nvprof, Visual Profiler, Nsight Systems, Nsight Compute, cuda-gdb and
277
+ * cuda-memcheck.
278
+ */
279
+ CUPTI_ERROR_MULTIPLE_SUBSCRIBERS_NOT_SUPPORTED = 39,
280
+ /**
281
+ * Profiling on virtualized GPU is not allowed by hypervisor.
282
+ */
283
+ CUPTI_ERROR_VIRTUALIZED_DEVICE_INSUFFICIENT_PRIVILEGES = 40,
284
+ /**
285
+ * Profiling and tracing are not allowed when confidential computing mode
286
+ * is enabled.
287
+ */
288
+ CUPTI_ERROR_CONFIDENTIAL_COMPUTING_NOT_SUPPORTED = 41,
289
+ /**
290
+ * CUPTI does not support NVIDIA Crypto Mining Processors (CMP).
291
+ * For more information, please visit https://developer.nvidia.com/ERR_NVCMPGPU
292
+ */
293
+ CUPTI_ERROR_CMP_DEVICE_NOT_SUPPORTED = 42,
294
+ /**
295
+ * An unknown internal error has occurred.
296
+ */
297
+ CUPTI_ERROR_UNKNOWN = 999,
298
+ CUPTI_ERROR_FORCE_INT = 0x7fffffff
299
+ } CUptiResult;
300
+
301
+ /**
302
+ * \brief Get the descriptive string for a CUptiResult.
303
+ *
304
+ * Return the descriptive string for a CUptiResult in \p *str.
305
+ * \note \b Thread-safety: this function is thread safe.
306
+ *
307
+ * \param result The result to get the string for
308
+ * \param str Returns the string
309
+ *
310
+ * \retval CUPTI_SUCCESS on success
311
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p str is NULL or \p
312
+ * result is not a valid CUptiResult
313
+ */
314
+ CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, const char **str);
315
+
316
+ /** @} */ /* END CUPTI_RESULT_API */
317
+
318
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
319
+ #pragma GCC visibility pop
320
+ #endif
321
+
322
+ #if defined(__cplusplus)
323
+ }
324
+ #endif
325
+
326
+ #endif /*_CUPTI_RESULT_H_*/
327
+
328
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_runtime_cbid.h ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // *************************************************************************
3
+ // Definitions of indices for API functions, unique across entire API
4
+ // *************************************************************************
5
+
6
+ // This file is generated. Any changes you make will be lost during the next clean build.
7
+ // CUDA public interface, for type definitions and cu* function prototypes
8
+
9
+ typedef enum CUpti_runtime_api_trace_cbid_enum {
10
+ CUPTI_RUNTIME_TRACE_CBID_INVALID = 0,
11
+ CUPTI_RUNTIME_TRACE_CBID_cudaDriverGetVersion_v3020 = 1,
12
+ CUPTI_RUNTIME_TRACE_CBID_cudaRuntimeGetVersion_v3020 = 2,
13
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceCount_v3020 = 3,
14
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceProperties_v3020 = 4,
15
+ CUPTI_RUNTIME_TRACE_CBID_cudaChooseDevice_v3020 = 5,
16
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetChannelDesc_v3020 = 6,
17
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateChannelDesc_v3020 = 7,
18
+ CUPTI_RUNTIME_TRACE_CBID_cudaConfigureCall_v3020 = 8,
19
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetupArgument_v3020 = 9,
20
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 = 10,
21
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeekAtLastError_v3020 = 11,
22
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorString_v3020 = 12,
23
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020 = 13,
24
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetCacheConfig_v3020 = 14,
25
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncGetAttributes_v3020 = 15,
26
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 = 16,
27
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 = 17,
28
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetValidDevices_v3020 = 18,
29
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDeviceFlags_v3020 = 19,
30
+ CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020 = 20,
31
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocPitch_v3020 = 21,
32
+ CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020 = 22,
33
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocArray_v3020 = 23,
34
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeArray_v3020 = 24,
35
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocHost_v3020 = 25,
36
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeHost_v3020 = 26,
37
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostAlloc_v3020 = 27,
38
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostGetDevicePointer_v3020 = 28,
39
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostGetFlags_v3020 = 29,
40
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemGetInfo_v3020 = 30,
41
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020 = 31,
42
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_v3020 = 32,
43
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_v3020 = 33,
44
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_v3020 = 34,
45
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_v3020 = 35,
46
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_v3020 = 36,
47
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_v3020 = 37,
48
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_v3020 = 38,
49
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_v3020 = 39,
50
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_v3020 = 40,
51
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020 = 41,
52
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_v3020 = 42,
53
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_v3020 = 43,
54
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_v3020 = 44,
55
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_v3020 = 45,
56
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_v3020 = 46,
57
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_v3020 = 47,
58
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_v3020 = 48,
59
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020 = 49,
60
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_v3020 = 50,
61
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020 = 51,
62
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_v3020 = 52,
63
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolAddress_v3020 = 53,
64
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSymbolSize_v3020 = 54,
65
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture_v3020 = 55,
66
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTexture2D_v3020 = 56,
67
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToArray_v3020 = 57,
68
+ CUPTI_RUNTIME_TRACE_CBID_cudaUnbindTexture_v3020 = 58,
69
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureAlignmentOffset_v3020 = 59,
70
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureReference_v3020 = 60,
71
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindSurfaceToArray_v3020 = 61,
72
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceReference_v3020 = 62,
73
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLSetGLDevice_v3020 = 63,
74
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLRegisterBufferObject_v3020 = 64,
75
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObject_v3020 = 65,
76
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObject_v3020 = 66,
77
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLUnregisterBufferObject_v3020 = 67,
78
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLSetBufferObjectMapFlags_v3020 = 68,
79
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLMapBufferObjectAsync_v3020 = 69,
80
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLUnmapBufferObjectAsync_v3020 = 70,
81
+ CUPTI_RUNTIME_TRACE_CBID_cudaWGLGetDevice_v3020 = 71,
82
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterImage_v3020 = 72,
83
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsGLRegisterBuffer_v3020 = 73,
84
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnregisterResource_v3020 = 74,
85
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceSetMapFlags_v3020 = 75,
86
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsMapResources_v3020 = 76,
87
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsUnmapResources_v3020 = 77,
88
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedPointer_v3020 = 78,
89
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsSubResourceGetMappedArray_v3020 = 79,
90
+ CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUGetDevice_v3020 = 80,
91
+ CUPTI_RUNTIME_TRACE_CBID_cudaVDPAUSetVDPAUDevice_v3020 = 81,
92
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterVideoSurface_v3020 = 82,
93
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsVDPAURegisterOutputSurface_v3020 = 83,
94
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevice_v3020 = 84,
95
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDevices_v3020 = 85,
96
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11SetDirect3DDevice_v3020 = 86,
97
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D11RegisterResource_v3020 = 87,
98
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevice_v3020 = 88,
99
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDevices_v3020 = 89,
100
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10SetDirect3DDevice_v3020 = 90,
101
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D10RegisterResource_v3020 = 91,
102
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10RegisterResource_v3020 = 92,
103
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnregisterResource_v3020 = 93,
104
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10MapResources_v3020 = 94,
105
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10UnmapResources_v3020 = 95,
106
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceSetMapFlags_v3020 = 96,
107
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetSurfaceDimensions_v3020 = 97,
108
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedArray_v3020 = 98,
109
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPointer_v3020 = 99,
110
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedSize_v3020 = 100,
111
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10ResourceGetMappedPitch_v3020 = 101,
112
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevice_v3020 = 102,
113
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDevices_v3020 = 103,
114
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9SetDirect3DDevice_v3020 = 104,
115
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9GetDirect3DDevice_v3020 = 105,
116
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsD3D9RegisterResource_v3020 = 106,
117
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterResource_v3020 = 107,
118
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterResource_v3020 = 108,
119
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapResources_v3020 = 109,
120
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapResources_v3020 = 110,
121
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceSetMapFlags_v3020 = 111,
122
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetSurfaceDimensions_v3020 = 112,
123
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedArray_v3020 = 113,
124
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPointer_v3020 = 114,
125
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedSize_v3020 = 115,
126
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9ResourceGetMappedPitch_v3020 = 116,
127
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9Begin_v3020 = 117,
128
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9End_v3020 = 118,
129
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9RegisterVertexBuffer_v3020 = 119,
130
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnregisterVertexBuffer_v3020 = 120,
131
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9MapVertexBuffer_v3020 = 121,
132
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D9UnmapVertexBuffer_v3020 = 122,
133
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadExit_v3020 = 123,
134
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForDevice_v3020 = 124,
135
+ CUPTI_RUNTIME_TRACE_CBID_cudaSetDoubleForHost_v3020 = 125,
136
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020 = 126,
137
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetLimit_v3020 = 127,
138
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetLimit_v3020 = 128,
139
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreate_v3020 = 129,
140
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v3020 = 130,
141
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020 = 131,
142
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_v3020 = 132,
143
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 = 133,
144
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 = 134,
145
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 = 135,
146
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 = 136,
147
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020 = 137,
148
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020 = 138,
149
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventElapsedTime_v3020 = 139,
150
+ CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3D_v3020 = 140,
151
+ CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3DArray_v3020 = 141,
152
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_v3020 = 142,
153
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_v3020 = 143,
154
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_v3020 = 144,
155
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_v3020 = 145,
156
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadSetCacheConfig_v3020 = 146,
157
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020 = 147,
158
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D11GetDirect3DDevice_v3020 = 148,
159
+ CUPTI_RUNTIME_TRACE_CBID_cudaD3D10GetDirect3DDevice_v3020 = 149,
160
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadGetCacheConfig_v3020 = 150,
161
+ CUPTI_RUNTIME_TRACE_CBID_cudaPointerGetAttributes_v4000 = 151,
162
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostRegister_v4000 = 152,
163
+ CUPTI_RUNTIME_TRACE_CBID_cudaHostUnregister_v4000 = 153,
164
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceCanAccessPeer_v4000 = 154,
165
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceEnablePeerAccess_v4000 = 155,
166
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceDisablePeerAccess_v4000 = 156,
167
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeerRegister_v4000 = 157,
168
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeerUnregister_v4000 = 158,
169
+ CUPTI_RUNTIME_TRACE_CBID_cudaPeerGetDevicePointer_v4000 = 159,
170
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeer_v4000 = 160,
171
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeerAsync_v4000 = 161,
172
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_v4000 = 162,
173
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_v4000 = 163,
174
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceReset_v3020 = 164,
175
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020 = 165,
176
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetLimit_v3020 = 166,
177
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetLimit_v3020 = 167,
178
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetCacheConfig_v3020 = 168,
179
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetCacheConfig_v3020 = 169,
180
+ CUPTI_RUNTIME_TRACE_CBID_cudaProfilerInitialize_v4000 = 170,
181
+ CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStart_v4000 = 171,
182
+ CUPTI_RUNTIME_TRACE_CBID_cudaProfilerStop_v4000 = 172,
183
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetByPCIBusId_v4010 = 173,
184
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetPCIBusId_v4010 = 174,
185
+ CUPTI_RUNTIME_TRACE_CBID_cudaGLGetDevices_v4010 = 175,
186
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetEventHandle_v4010 = 176,
187
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenEventHandle_v4010 = 177,
188
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcGetMemHandle_v4010 = 178,
189
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcOpenMemHandle_v4010 = 179,
190
+ CUPTI_RUNTIME_TRACE_CBID_cudaIpcCloseMemHandle_v4010 = 180,
191
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetInfo_v4010 = 181,
192
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetSharedMemConfig_v4020 = 182,
193
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetSharedMemConfig_v4020 = 183,
194
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetSharedMemConfig_v4020 = 184,
195
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateTextureObject_v5000 = 185,
196
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroyTextureObject_v5000 = 186,
197
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceDesc_v5000 = 187,
198
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectTextureDesc_v5000 = 188,
199
+ CUPTI_RUNTIME_TRACE_CBID_cudaCreateSurfaceObject_v5000 = 189,
200
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroySurfaceObject_v5000 = 190,
201
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetSurfaceObjectResourceDesc_v5000 = 191,
202
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocMipmappedArray_v5000 = 192,
203
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetMipmappedArrayLevel_v5000 = 193,
204
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeMipmappedArray_v5000 = 194,
205
+ CUPTI_RUNTIME_TRACE_CBID_cudaBindTextureToMipmappedArray_v5000 = 195,
206
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedMipmappedArray_v5000 = 196,
207
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_v5000 = 197,
208
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithFlags_v5000 = 198,
209
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetTextureObjectResourceViewDesc_v5000 = 199,
210
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetAttribute_v5000 = 200,
211
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamDestroy_v5050 = 201,
212
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCreateWithPriority_v5050 = 202,
213
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_v5050 = 203,
214
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_v5050 = 204,
215
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetStreamPriorityRange_v5050 = 205,
216
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocManaged_v6000 = 206,
217
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207,
218
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_v6000 = 208,
219
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetErrorName_v6050 = 209,
220
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210,
221
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 = 211,
222
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDeviceFlags_v7000 = 212,
223
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_ptsz_v7000 = 213,
224
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000 = 214,
225
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_ptds_v7000 = 215,
226
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_ptds_v7000 = 216,
227
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_ptds_v7000 = 217,
228
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_ptds_v7000 = 218,
229
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_ptds_v7000 = 219,
230
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_ptds_v7000 = 220,
231
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_ptds_v7000 = 221,
232
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_ptds_v7000 = 222,
233
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_ptds_v7000 = 223,
234
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_ptds_v7000 = 224,
235
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_ptsz_v7000 = 225,
236
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_ptsz_v7000 = 226,
237
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_ptsz_v7000 = 227,
238
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_ptsz_v7000 = 228,
239
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_ptsz_v7000 = 229,
240
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_ptsz_v7000 = 230,
241
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_ptsz_v7000 = 231,
242
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_ptsz_v7000 = 232,
243
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset_ptds_v7000 = 233,
244
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2D_ptds_v7000 = 234,
245
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_ptsz_v7000 = 235,
246
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_ptsz_v7000 = 236,
247
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetPriority_ptsz_v7000 = 237,
248
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetFlags_ptsz_v7000 = 238,
249
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_ptsz_v7000 = 239,
250
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamQuery_ptsz_v7000 = 240,
251
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAttachMemAsync_ptsz_v7000 = 241,
252
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_ptsz_v7000 = 242,
253
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3D_ptds_v7000 = 243,
254
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemset3DAsync_ptsz_v7000 = 244,
255
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_ptds_v7000 = 245,
256
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_ptsz_v7000 = 246,
257
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_ptsz_v7000 = 247,
258
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamAddCallback_ptsz_v7000 = 248,
259
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_ptds_v7000 = 249,
260
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_ptsz_v7000 = 250,
261
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000 = 251,
262
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_v8000 = 252,
263
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPrefetchAsync_ptsz_v8000 = 253,
264
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemAdvise_v8000 = 254,
265
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetP2PAttribute_v8000 = 255,
266
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsEGLRegisterImage_v7000 = 256,
267
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnect_v7000 = 257,
268
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerDisconnect_v7000 = 258,
269
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerAcquireFrame_v7000 = 259,
270
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerReleaseFrame_v7000 = 260,
271
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerConnect_v7000 = 261,
272
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerDisconnect_v7000 = 262,
273
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerPresentFrame_v7000 = 263,
274
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamProducerReturnFrame_v7000 = 264,
275
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphicsResourceGetMappedEglFrame_v7000 = 265,
276
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttribute_v8000 = 266,
277
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemRangeGetAttributes_v8000 = 267,
278
+ CUPTI_RUNTIME_TRACE_CBID_cudaEGLStreamConsumerConnectWithFlags_v7000 = 268,
279
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000 = 269,
280
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_ptsz_v9000 = 270,
281
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateFromEGLSync_v9000 = 271,
282
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000 = 272,
283
+ CUPTI_RUNTIME_TRACE_CBID_cudaFuncSetAttribute_v9000 = 273,
284
+ CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalMemory_v10000 = 274,
285
+ CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedBuffer_v10000 = 275,
286
+ CUPTI_RUNTIME_TRACE_CBID_cudaExternalMemoryGetMappedMipmappedArray_v10000 = 276,
287
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalMemory_v10000 = 277,
288
+ CUPTI_RUNTIME_TRACE_CBID_cudaImportExternalSemaphore_v10000 = 278,
289
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v10000 = 279,
290
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_ptsz_v10000 = 280,
291
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v10000 = 281,
292
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_ptsz_v10000 = 282,
293
+ CUPTI_RUNTIME_TRACE_CBID_cudaDestroyExternalSemaphore_v10000 = 283,
294
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_v10000 = 284,
295
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchHostFunc_ptsz_v10000 = 285,
296
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphCreate_v10000 = 286,
297
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetParams_v10000 = 287,
298
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetParams_v10000 = 288,
299
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddKernelNode_v10000 = 289,
300
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode_v10000 = 290,
301
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeGetParams_v10000 = 291,
302
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams_v10000 = 292,
303
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemsetNode_v10000 = 293,
304
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeGetParams_v10000 = 294,
305
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemsetNodeSetParams_v10000 = 295,
306
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddHostNode_v10000 = 296,
307
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeGetParams_v10000 = 297,
308
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddChildGraphNode_v10000 = 298,
309
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphChildGraphNodeGetGraph_v10000 = 299,
310
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEmptyNode_v10000 = 300,
311
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphClone_v10000 = 301,
312
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeFindInClone_v10000 = 302,
313
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetType_v10000 = 303,
314
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetRootNodes_v10000 = 304,
315
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependencies_v10000 = 305,
316
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetDependentNodes_v10000 = 306,
317
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddDependencies_v10000 = 307,
318
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphRemoveDependencies_v10000 = 308,
319
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroyNode_v10000 = 309,
320
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiate_v10000 = 310,
321
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_v10000 = 311,
322
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000 = 312,
323
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecDestroy_v10000 = 313,
324
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphDestroy_v10000 = 314,
325
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_v10000 = 315,
326
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamBeginCapture_ptsz_v10000 = 316,
327
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_v10000 = 317,
328
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamIsCapturing_ptsz_v10000 = 318,
329
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_v10000 = 319,
330
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamEndCapture_ptsz_v10000 = 320,
331
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphHostNodeSetParams_v10000 = 321,
332
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetNodes_v10000 = 322,
333
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphGetEdges_v10000 = 323,
334
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v10010 = 324,
335
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_ptsz_v10010 = 325,
336
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecKernelNodeSetParams_v10010 = 326,
337
+ CUPTI_RUNTIME_TRACE_CBID_cudaThreadExchangeStreamCaptureMode_v10010 = 327,
338
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetNvSciSyncAttributes_v10020 = 328,
339
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyAvailableDynamicSMemPerBlock_v10200 = 329,
340
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_v10200 = 330,
341
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetFlags_ptsz_v10200 = 331,
342
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams_v10020 = 332,
343
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemsetNodeSetParams_v10020 = 333,
344
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecHostNodeSetParams_v10020 = 334,
345
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecUpdate_v10020 = 335,
346
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetFuncBySymbol_v11000 = 336,
347
+ CUPTI_RUNTIME_TRACE_CBID_cudaCtxResetPersistingL2Cache_v11000 = 337,
348
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeCopyAttributes_v11000 = 338,
349
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeGetAttribute_v11000 = 339,
350
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphKernelNodeSetAttribute_v11000 = 340,
351
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_v11000 = 341,
352
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamCopyAttributes_ptsz_v11000 = 342,
353
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_v11000 = 343,
354
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetAttribute_ptsz_v11000 = 344,
355
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_v11000 = 345,
356
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_ptsz_v11000 = 346,
357
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetTexture1DLinearMaxWidth_v11010 = 347,
358
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_v10000 = 348,
359
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphUpload_ptsz_v10000 = 349,
360
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeToSymbol_v11010 = 350,
361
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNodeFromSymbol_v11010 = 351,
362
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemcpyNode1D_v11010 = 352,
363
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsToSymbol_v11010 = 353,
364
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParamsFromSymbol_v11010 = 354,
365
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemcpyNodeSetParams1D_v11010 = 355,
366
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010 = 356,
367
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010 = 357,
368
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecMemcpyNodeSetParams1D_v11010 = 358,
369
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetSparseProperties_v11010 = 359,
370
+ CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetSparseProperties_v11010 = 360,
371
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecChildGraphNodeSetParams_v11010 = 361,
372
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventRecordNode_v11010 = 362,
373
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeGetEvent_v11010 = 363,
374
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventRecordNodeSetEvent_v11010 = 364,
375
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddEventWaitNode_v11010 = 365,
376
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeGetEvent_v11010 = 366,
377
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphEventWaitNodeSetEvent_v11010 = 367,
378
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventRecordNodeSetEvent_v11010 = 368,
379
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecEventWaitNodeSetEvent_v11010 = 369,
380
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_v11010 = 370,
381
+ CUPTI_RUNTIME_TRACE_CBID_cudaEventRecordWithFlags_ptsz_v11010 = 371,
382
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetDefaultMemPool_v11020 = 372,
383
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_v11020 = 373,
384
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocAsync_ptsz_v11020 = 374,
385
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_v11020 = 375,
386
+ CUPTI_RUNTIME_TRACE_CBID_cudaFreeAsync_ptsz_v11020 = 376,
387
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolTrimTo_v11020 = 377,
388
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAttribute_v11020 = 378,
389
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAttribute_v11020 = 379,
390
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolSetAccess_v11020 = 380,
391
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetPlane_v11020 = 381,
392
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolGetAccess_v11020 = 382,
393
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolCreate_v11020 = 383,
394
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolDestroy_v11020 = 384,
395
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetMemPool_v11020 = 385,
396
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetMemPool_v11020 = 386,
397
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportToShareableHandle_v11020 = 387,
398
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportFromShareableHandle_v11020 = 388,
399
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolExportPointer_v11020 = 389,
400
+ CUPTI_RUNTIME_TRACE_CBID_cudaMemPoolImportPointer_v11020 = 390,
401
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_v11020 = 391,
402
+ CUPTI_RUNTIME_TRACE_CBID_cudaMallocFromPoolAsync_ptsz_v11020 = 392,
403
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_v11020 = 393,
404
+ CUPTI_RUNTIME_TRACE_CBID_cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020 = 394,
405
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_v11020 = 395,
406
+ CUPTI_RUNTIME_TRACE_CBID_cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020 = 396,
407
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresSignalNode_v11020 = 397,
408
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeGetParams_v11020 = 398,
409
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresSignalNodeSetParams_v11020 = 399,
410
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddExternalSemaphoresWaitNode_v11020 = 400,
411
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeGetParams_v11020 = 401,
412
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExternalSemaphoresWaitNodeSetParams_v11020 = 402,
413
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020 = 403,
414
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020 = 404,
415
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceFlushGPUDirectRDMAWrites_v11030 = 405,
416
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_v11030 = 406,
417
+ CUPTI_RUNTIME_TRACE_CBID_cudaGetDriverEntryPoint_ptsz_v11030 = 407,
418
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphDebugDotPrint_v11030 = 408,
419
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_v11030 = 409,
420
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamGetCaptureInfo_v2_ptsz_v11030 = 410,
421
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_v11030 = 411,
422
+ CUPTI_RUNTIME_TRACE_CBID_cudaStreamUpdateCaptureDependencies_ptsz_v11030 = 412,
423
+ CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectCreate_v11030 = 413,
424
+ CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRetain_v11030 = 414,
425
+ CUPTI_RUNTIME_TRACE_CBID_cudaUserObjectRelease_v11030 = 415,
426
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphRetainUserObject_v11030 = 416,
427
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphReleaseUserObject_v11030 = 417,
428
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphInstantiateWithFlags_v11040 = 418,
429
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemAllocNode_v11040 = 419,
430
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemAllocNodeGetParams_v11040 = 420,
431
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphAddMemFreeNode_v11040 = 421,
432
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphMemFreeNodeGetParams_v11040 = 422,
433
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGraphMemTrim_v11040 = 423,
434
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceGetGraphMemAttribute_v11040 = 424,
435
+ CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSetGraphMemAttribute_v11040 = 425,
436
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeSetEnabled_v11060 = 426,
437
+ CUPTI_RUNTIME_TRACE_CBID_cudaGraphNodeGetEnabled_v11060 = 427,
438
+ CUPTI_RUNTIME_TRACE_CBID_cudaArrayGetMemoryRequirements_v11060 = 428,
439
+ CUPTI_RUNTIME_TRACE_CBID_cudaMipmappedArrayGetMemoryRequirements_v11060 = 429,
440
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060 = 430,
441
+ CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060 = 431,
442
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxPotentialClusterSize_v11070 = 432,
443
+ CUPTI_RUNTIME_TRACE_CBID_cudaOccupancyMaxActiveClusters_v11070 = 433,
444
+ CUPTI_RUNTIME_TRACE_CBID_SIZE = 434,
445
+ CUPTI_RUNTIME_TRACE_CBID_FORCE_INT = 0x7fffffff
446
+ } CUpti_runtime_api_trace_cbid;
447
+
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_target.h ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #if !defined(_CUPTI_TARGET_H_)
2
+ #define _CUPTI_TARGET_H_
3
+
4
+ /*
5
+ CUPTI profiler target API's
6
+ This file contains the CUPTI profiling API's.
7
+ */
8
+ #include <cupti_result.h>
9
+ #include <stddef.h>
10
+ #include <stdint.h>
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
17
+ #pragma GCC visibility push(default)
18
+ #endif
19
+
20
+ #ifndef CUPTI_PROFILER_STRUCT_SIZE
21
+ #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
22
+ #endif
23
+
24
+ typedef struct CUpti_Device_GetChipName_Params
25
+ {
26
+ size_t structSize; //!< [in]
27
+ void* pPriv; //!< [in] assign to NULL
28
+
29
+ size_t deviceIndex; //!< [in]
30
+ const char* pChipName; //!< [out]
31
+ } CUpti_Device_GetChipName_Params;
32
+
33
+ #define CUpti_Device_GetChipName_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Device_GetChipName_Params, pChipName)
34
+ CUptiResult CUPTIAPI cuptiDeviceGetChipName(CUpti_Device_GetChipName_Params *pParams);
35
+
36
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
37
+ #pragma GCC visibility pop
38
+ #endif
39
+
40
+ #ifdef __cplusplus
41
+ } /* extern "C" */
42
+ #endif
43
+ #endif
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/cupti_version.h ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2010-2018 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ #if !defined(_CUPTI_VERSION_H_)
51
+ #define _CUPTI_VERSION_H_
52
+
53
+ #include <cuda_stdint.h>
54
+ #include <cupti_result.h>
55
+
56
+ #ifndef CUPTIAPI
57
+ #ifdef _WIN32
58
+ #define CUPTIAPI __stdcall
59
+ #else
60
+ #define CUPTIAPI
61
+ #endif
62
+ #endif
63
+
64
+ #if defined(__cplusplus)
65
+ extern "C" {
66
+ #endif
67
+
68
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
69
+ #pragma GCC visibility push(default)
70
+ #endif
71
+
72
+ /**
73
+ * \defgroup CUPTI_VERSION_API CUPTI Version
74
+ * Function and macro to determine the CUPTI version.
75
+ * @{
76
+ */
77
+
78
+ /**
79
+ * \brief The API version for this implementation of CUPTI.
80
+ *
81
+ * The API version for this implementation of CUPTI. This define along
82
+ * with \ref cuptiGetVersion can be used to dynamically detect if the
83
+ * version of CUPTI compiled against matches the version of the loaded
84
+ * CUPTI library.
85
+ *
86
+ * v1 : CUDAToolsSDK 4.0
87
+ * v2 : CUDAToolsSDK 4.1
88
+ * v3 : CUDA Toolkit 5.0
89
+ * v4 : CUDA Toolkit 5.5
90
+ * v5 : CUDA Toolkit 6.0
91
+ * v6 : CUDA Toolkit 6.5
92
+ * v7 : CUDA Toolkit 6.5(with sm_52 support)
93
+ * v8 : CUDA Toolkit 7.0
94
+ * v9 : CUDA Toolkit 8.0
95
+ * v10 : CUDA Toolkit 9.0
96
+ * v11 : CUDA Toolkit 9.1
97
+ * v12 : CUDA Toolkit 10.0, 10.1 and 10.2
98
+ * v13 : CUDA Toolkit 11.0
99
+ * v14 : CUDA Toolkit 11.1
100
+ * v15 : CUDA Toolkit 11.2, 11.3 and 11.4
101
+ * v16 : CUDA Toolkit 11.5
102
+ * v17 : CUDA Toolkit 11.6
103
+ * v18 : CUDA Toolkit 11.8
104
+ */
105
+ #define CUPTI_API_VERSION 18
106
+
107
+ /**
108
+ * \brief Get the CUPTI API version.
109
+ *
110
+ * Return the API version in \p *version.
111
+ *
112
+ * \param version Returns the version
113
+ *
114
+ * \retval CUPTI_SUCCESS on success
115
+ * \retval CUPTI_ERROR_INVALID_PARAMETER if \p version is NULL
116
+ * \sa CUPTI_API_VERSION
117
+ */
118
+ CUptiResult CUPTIAPI cuptiGetVersion(uint32_t *version);
119
+
120
+ /** @} */ /* END CUPTI_VERSION_API */
121
+
122
+ #if defined(__GNUC__) && defined(CUPTI_LIB)
123
+ #pragma GCC visibility pop
124
+ #endif
125
+
126
+ #if defined(__cplusplus)
127
+ }
128
+ #endif
129
+
130
+ #endif /*_CUPTI_VERSION_H_*/
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_meta.h ADDED
@@ -0,0 +1,2941 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // No dependent includes
4
+
5
+ // CUDA public interface, for type definitions and cu* function prototypes
6
+ #include "cuda.h"
7
+
8
+
9
+ // *************************************************************************
10
+ // Definitions of structs to hold parameters for each function
11
+ // *************************************************************************
12
+
13
+ typedef struct cuGetErrorString_params_st {
14
+ CUresult error;
15
+ const char **pStr;
16
+ } cuGetErrorString_params;
17
+
18
+ typedef struct cuGetErrorName_params_st {
19
+ CUresult error;
20
+ const char **pStr;
21
+ } cuGetErrorName_params;
22
+
23
+ typedef struct cuInit_params_st {
24
+ unsigned int Flags;
25
+ } cuInit_params;
26
+
27
+ typedef struct cuDriverGetVersion_params_st {
28
+ int *driverVersion;
29
+ } cuDriverGetVersion_params;
30
+
31
+ typedef struct cuDeviceGet_params_st {
32
+ CUdevice *device;
33
+ int ordinal;
34
+ } cuDeviceGet_params;
35
+
36
+ typedef struct cuDeviceGetCount_params_st {
37
+ int *count;
38
+ } cuDeviceGetCount_params;
39
+
40
+ typedef struct cuDeviceGetName_params_st {
41
+ char *name;
42
+ int len;
43
+ CUdevice dev;
44
+ } cuDeviceGetName_params;
45
+
46
+ typedef struct cuDeviceGetUuid_params_st {
47
+ CUuuid *uuid;
48
+ CUdevice dev;
49
+ } cuDeviceGetUuid_params;
50
+
51
+ typedef struct cuDeviceGetUuid_v2_params_st {
52
+ CUuuid *uuid;
53
+ CUdevice dev;
54
+ } cuDeviceGetUuid_v2_params;
55
+
56
+ typedef struct cuDeviceGetLuid_params_st {
57
+ char *luid;
58
+ unsigned int *deviceNodeMask;
59
+ CUdevice dev;
60
+ } cuDeviceGetLuid_params;
61
+
62
+ typedef struct cuDeviceTotalMem_v2_params_st {
63
+ size_t *bytes;
64
+ CUdevice dev;
65
+ } cuDeviceTotalMem_v2_params;
66
+
67
+ typedef struct cuDeviceGetTexture1DLinearMaxWidth_params_st {
68
+ size_t *maxWidthInElements;
69
+ CUarray_format format;
70
+ unsigned numChannels;
71
+ CUdevice dev;
72
+ } cuDeviceGetTexture1DLinearMaxWidth_params;
73
+
74
+ typedef struct cuDeviceGetAttribute_params_st {
75
+ int *pi;
76
+ CUdevice_attribute attrib;
77
+ CUdevice dev;
78
+ } cuDeviceGetAttribute_params;
79
+
80
+ typedef struct cuDeviceGetNvSciSyncAttributes_params_st {
81
+ void *nvSciSyncAttrList;
82
+ CUdevice dev;
83
+ int flags;
84
+ } cuDeviceGetNvSciSyncAttributes_params;
85
+
86
+ typedef struct cuDeviceSetMemPool_params_st {
87
+ CUdevice dev;
88
+ CUmemoryPool pool;
89
+ } cuDeviceSetMemPool_params;
90
+
91
+ typedef struct cuDeviceGetMemPool_params_st {
92
+ CUmemoryPool *pool;
93
+ CUdevice dev;
94
+ } cuDeviceGetMemPool_params;
95
+
96
+ typedef struct cuDeviceGetDefaultMemPool_params_st {
97
+ CUmemoryPool *pool_out;
98
+ CUdevice dev;
99
+ } cuDeviceGetDefaultMemPool_params;
100
+
101
+ typedef struct cuFlushGPUDirectRDMAWrites_params_st {
102
+ CUflushGPUDirectRDMAWritesTarget target;
103
+ CUflushGPUDirectRDMAWritesScope scope;
104
+ } cuFlushGPUDirectRDMAWrites_params;
105
+
106
+ typedef struct cuDeviceGetProperties_params_st {
107
+ CUdevprop *prop;
108
+ CUdevice dev;
109
+ } cuDeviceGetProperties_params;
110
+
111
+ typedef struct cuDeviceComputeCapability_params_st {
112
+ int *major;
113
+ int *minor;
114
+ CUdevice dev;
115
+ } cuDeviceComputeCapability_params;
116
+
117
+ typedef struct cuDevicePrimaryCtxRetain_params_st {
118
+ CUcontext *pctx;
119
+ CUdevice dev;
120
+ } cuDevicePrimaryCtxRetain_params;
121
+
122
+ typedef struct cuDevicePrimaryCtxRelease_v2_params_st {
123
+ CUdevice dev;
124
+ } cuDevicePrimaryCtxRelease_v2_params;
125
+
126
+ typedef struct cuDevicePrimaryCtxSetFlags_v2_params_st {
127
+ CUdevice dev;
128
+ unsigned int flags;
129
+ } cuDevicePrimaryCtxSetFlags_v2_params;
130
+
131
+ typedef struct cuDevicePrimaryCtxGetState_params_st {
132
+ CUdevice dev;
133
+ unsigned int *flags;
134
+ int *active;
135
+ } cuDevicePrimaryCtxGetState_params;
136
+
137
+ typedef struct cuDevicePrimaryCtxReset_v2_params_st {
138
+ CUdevice dev;
139
+ } cuDevicePrimaryCtxReset_v2_params;
140
+
141
+ typedef struct cuDeviceGetExecAffinitySupport_params_st {
142
+ int *pi;
143
+ CUexecAffinityType type;
144
+ CUdevice dev;
145
+ } cuDeviceGetExecAffinitySupport_params;
146
+
147
+ typedef struct cuCtxCreate_v2_params_st {
148
+ CUcontext *pctx;
149
+ unsigned int flags;
150
+ CUdevice dev;
151
+ } cuCtxCreate_v2_params;
152
+
153
+ typedef struct cuCtxCreate_v3_params_st {
154
+ CUcontext *pctx;
155
+ CUexecAffinityParam *paramsArray;
156
+ int numParams;
157
+ unsigned int flags;
158
+ CUdevice dev;
159
+ } cuCtxCreate_v3_params;
160
+
161
+ typedef struct cuCtxDestroy_v2_params_st {
162
+ CUcontext ctx;
163
+ } cuCtxDestroy_v2_params;
164
+
165
+ typedef struct cuCtxPushCurrent_v2_params_st {
166
+ CUcontext ctx;
167
+ } cuCtxPushCurrent_v2_params;
168
+
169
+ typedef struct cuCtxPopCurrent_v2_params_st {
170
+ CUcontext *pctx;
171
+ } cuCtxPopCurrent_v2_params;
172
+
173
+ typedef struct cuCtxSetCurrent_params_st {
174
+ CUcontext ctx;
175
+ } cuCtxSetCurrent_params;
176
+
177
+ typedef struct cuCtxGetCurrent_params_st {
178
+ CUcontext *pctx;
179
+ } cuCtxGetCurrent_params;
180
+
181
+ typedef struct cuCtxGetDevice_params_st {
182
+ CUdevice *device;
183
+ } cuCtxGetDevice_params;
184
+
185
+ typedef struct cuCtxGetFlags_params_st {
186
+ unsigned int *flags;
187
+ } cuCtxGetFlags_params;
188
+
189
+ typedef struct cuCtxSetLimit_params_st {
190
+ CUlimit limit;
191
+ size_t value;
192
+ } cuCtxSetLimit_params;
193
+
194
+ typedef struct cuCtxGetLimit_params_st {
195
+ size_t *pvalue;
196
+ CUlimit limit;
197
+ } cuCtxGetLimit_params;
198
+
199
+ typedef struct cuCtxGetCacheConfig_params_st {
200
+ CUfunc_cache *pconfig;
201
+ } cuCtxGetCacheConfig_params;
202
+
203
+ typedef struct cuCtxSetCacheConfig_params_st {
204
+ CUfunc_cache config;
205
+ } cuCtxSetCacheConfig_params;
206
+
207
+ typedef struct cuCtxGetSharedMemConfig_params_st {
208
+ CUsharedconfig *pConfig;
209
+ } cuCtxGetSharedMemConfig_params;
210
+
211
+ typedef struct cuCtxSetSharedMemConfig_params_st {
212
+ CUsharedconfig config;
213
+ } cuCtxSetSharedMemConfig_params;
214
+
215
+ typedef struct cuCtxGetApiVersion_params_st {
216
+ CUcontext ctx;
217
+ unsigned int *version;
218
+ } cuCtxGetApiVersion_params;
219
+
220
+ typedef struct cuCtxGetStreamPriorityRange_params_st {
221
+ int *leastPriority;
222
+ int *greatestPriority;
223
+ } cuCtxGetStreamPriorityRange_params;
224
+
225
+ typedef struct cuCtxGetExecAffinity_params_st {
226
+ CUexecAffinityParam *pExecAffinity;
227
+ CUexecAffinityType type;
228
+ } cuCtxGetExecAffinity_params;
229
+
230
+ typedef struct cuCtxAttach_params_st {
231
+ CUcontext *pctx;
232
+ unsigned int flags;
233
+ } cuCtxAttach_params;
234
+
235
+ typedef struct cuCtxDetach_params_st {
236
+ CUcontext ctx;
237
+ } cuCtxDetach_params;
238
+
239
+ typedef struct cuModuleLoad_params_st {
240
+ CUmodule *module;
241
+ const char *fname;
242
+ } cuModuleLoad_params;
243
+
244
+ typedef struct cuModuleLoadData_params_st {
245
+ CUmodule *module;
246
+ const void *image;
247
+ } cuModuleLoadData_params;
248
+
249
+ typedef struct cuModuleLoadDataEx_params_st {
250
+ CUmodule *module;
251
+ const void *image;
252
+ unsigned int numOptions;
253
+ CUjit_option *options;
254
+ void **optionValues;
255
+ } cuModuleLoadDataEx_params;
256
+
257
+ typedef struct cuModuleLoadFatBinary_params_st {
258
+ CUmodule *module;
259
+ const void *fatCubin;
260
+ } cuModuleLoadFatBinary_params;
261
+
262
+ typedef struct cuModuleUnload_params_st {
263
+ CUmodule hmod;
264
+ } cuModuleUnload_params;
265
+
266
+ typedef struct cuModuleGetFunction_params_st {
267
+ CUfunction *hfunc;
268
+ CUmodule hmod;
269
+ const char *name;
270
+ } cuModuleGetFunction_params;
271
+
272
+ typedef struct cuModuleGetGlobal_v2_params_st {
273
+ CUdeviceptr *dptr;
274
+ size_t *bytes;
275
+ CUmodule hmod;
276
+ const char *name;
277
+ } cuModuleGetGlobal_v2_params;
278
+
279
+ typedef struct cuModuleGetTexRef_params_st {
280
+ CUtexref *pTexRef;
281
+ CUmodule hmod;
282
+ const char *name;
283
+ } cuModuleGetTexRef_params;
284
+
285
+ typedef struct cuModuleGetSurfRef_params_st {
286
+ CUsurfref *pSurfRef;
287
+ CUmodule hmod;
288
+ const char *name;
289
+ } cuModuleGetSurfRef_params;
290
+
291
+ typedef struct cuLinkCreate_v2_params_st {
292
+ unsigned int numOptions;
293
+ CUjit_option *options;
294
+ void **optionValues;
295
+ CUlinkState *stateOut;
296
+ } cuLinkCreate_v2_params;
297
+
298
+ typedef struct cuLinkAddData_v2_params_st {
299
+ CUlinkState state;
300
+ CUjitInputType type;
301
+ void *data;
302
+ size_t size;
303
+ const char *name;
304
+ unsigned int numOptions;
305
+ CUjit_option *options;
306
+ void **optionValues;
307
+ } cuLinkAddData_v2_params;
308
+
309
+ typedef struct cuLinkAddFile_v2_params_st {
310
+ CUlinkState state;
311
+ CUjitInputType type;
312
+ const char *path;
313
+ unsigned int numOptions;
314
+ CUjit_option *options;
315
+ void **optionValues;
316
+ } cuLinkAddFile_v2_params;
317
+
318
+ typedef struct cuLinkComplete_params_st {
319
+ CUlinkState state;
320
+ void **cubinOut;
321
+ size_t *sizeOut;
322
+ } cuLinkComplete_params;
323
+
324
+ typedef struct cuLinkDestroy_params_st {
325
+ CUlinkState state;
326
+ } cuLinkDestroy_params;
327
+
328
+ typedef struct cuMemGetInfo_v2_params_st {
329
+ size_t *free;
330
+ size_t *total;
331
+ } cuMemGetInfo_v2_params;
332
+
333
+ typedef struct cuMemAlloc_v2_params_st {
334
+ CUdeviceptr *dptr;
335
+ size_t bytesize;
336
+ } cuMemAlloc_v2_params;
337
+
338
+ typedef struct cuMemAllocPitch_v2_params_st {
339
+ CUdeviceptr *dptr;
340
+ size_t *pPitch;
341
+ size_t WidthInBytes;
342
+ size_t Height;
343
+ unsigned int ElementSizeBytes;
344
+ } cuMemAllocPitch_v2_params;
345
+
346
+ typedef struct cuMemFree_v2_params_st {
347
+ CUdeviceptr dptr;
348
+ } cuMemFree_v2_params;
349
+
350
+ typedef struct cuMemGetAddressRange_v2_params_st {
351
+ CUdeviceptr *pbase;
352
+ size_t *psize;
353
+ CUdeviceptr dptr;
354
+ } cuMemGetAddressRange_v2_params;
355
+
356
+ typedef struct cuMemAllocHost_v2_params_st {
357
+ void **pp;
358
+ size_t bytesize;
359
+ } cuMemAllocHost_v2_params;
360
+
361
+ typedef struct cuMemFreeHost_params_st {
362
+ void *p;
363
+ } cuMemFreeHost_params;
364
+
365
+ typedef struct cuMemHostAlloc_params_st {
366
+ void **pp;
367
+ size_t bytesize;
368
+ unsigned int Flags;
369
+ } cuMemHostAlloc_params;
370
+
371
+ typedef struct cuMemHostGetDevicePointer_v2_params_st {
372
+ CUdeviceptr *pdptr;
373
+ void *p;
374
+ unsigned int Flags;
375
+ } cuMemHostGetDevicePointer_v2_params;
376
+
377
+ typedef struct cuMemHostGetFlags_params_st {
378
+ unsigned int *pFlags;
379
+ void *p;
380
+ } cuMemHostGetFlags_params;
381
+
382
+ typedef struct cuMemAllocManaged_params_st {
383
+ CUdeviceptr *dptr;
384
+ size_t bytesize;
385
+ unsigned int flags;
386
+ } cuMemAllocManaged_params;
387
+
388
+ typedef struct cuDeviceGetByPCIBusId_params_st {
389
+ CUdevice *dev;
390
+ const char *pciBusId;
391
+ } cuDeviceGetByPCIBusId_params;
392
+
393
+ typedef struct cuDeviceGetPCIBusId_params_st {
394
+ char *pciBusId;
395
+ int len;
396
+ CUdevice dev;
397
+ } cuDeviceGetPCIBusId_params;
398
+
399
+ typedef struct cuIpcGetEventHandle_params_st {
400
+ CUipcEventHandle *pHandle;
401
+ CUevent event;
402
+ } cuIpcGetEventHandle_params;
403
+
404
+ typedef struct cuIpcOpenEventHandle_params_st {
405
+ CUevent *phEvent;
406
+ CUipcEventHandle handle;
407
+ } cuIpcOpenEventHandle_params;
408
+
409
+ typedef struct cuIpcGetMemHandle_params_st {
410
+ CUipcMemHandle *pHandle;
411
+ CUdeviceptr dptr;
412
+ } cuIpcGetMemHandle_params;
413
+
414
+ typedef struct cuIpcOpenMemHandle_v2_params_st {
415
+ CUdeviceptr *pdptr;
416
+ CUipcMemHandle handle;
417
+ unsigned int Flags;
418
+ } cuIpcOpenMemHandle_v2_params;
419
+
420
+ typedef struct cuIpcCloseMemHandle_params_st {
421
+ CUdeviceptr dptr;
422
+ } cuIpcCloseMemHandle_params;
423
+
424
+ typedef struct cuMemHostRegister_v2_params_st {
425
+ void *p;
426
+ size_t bytesize;
427
+ unsigned int Flags;
428
+ } cuMemHostRegister_v2_params;
429
+
430
+ typedef struct cuMemHostUnregister_params_st {
431
+ void *p;
432
+ } cuMemHostUnregister_params;
433
+
434
+ typedef struct cuMemcpy_ptds_params_st {
435
+ CUdeviceptr dst;
436
+ CUdeviceptr src;
437
+ size_t ByteCount;
438
+ } cuMemcpy_ptds_params;
439
+
440
+ typedef struct cuMemcpyPeer_ptds_params_st {
441
+ CUdeviceptr dstDevice;
442
+ CUcontext dstContext;
443
+ CUdeviceptr srcDevice;
444
+ CUcontext srcContext;
445
+ size_t ByteCount;
446
+ } cuMemcpyPeer_ptds_params;
447
+
448
+ typedef struct cuMemcpyHtoD_v2_ptds_params_st {
449
+ CUdeviceptr dstDevice;
450
+ const void *srcHost;
451
+ size_t ByteCount;
452
+ } cuMemcpyHtoD_v2_ptds_params;
453
+
454
+ typedef struct cuMemcpyDtoH_v2_ptds_params_st {
455
+ void *dstHost;
456
+ CUdeviceptr srcDevice;
457
+ size_t ByteCount;
458
+ } cuMemcpyDtoH_v2_ptds_params;
459
+
460
+ typedef struct cuMemcpyDtoD_v2_ptds_params_st {
461
+ CUdeviceptr dstDevice;
462
+ CUdeviceptr srcDevice;
463
+ size_t ByteCount;
464
+ } cuMemcpyDtoD_v2_ptds_params;
465
+
466
+ typedef struct cuMemcpyDtoA_v2_ptds_params_st {
467
+ CUarray dstArray;
468
+ size_t dstOffset;
469
+ CUdeviceptr srcDevice;
470
+ size_t ByteCount;
471
+ } cuMemcpyDtoA_v2_ptds_params;
472
+
473
+ typedef struct cuMemcpyAtoD_v2_ptds_params_st {
474
+ CUdeviceptr dstDevice;
475
+ CUarray srcArray;
476
+ size_t srcOffset;
477
+ size_t ByteCount;
478
+ } cuMemcpyAtoD_v2_ptds_params;
479
+
480
+ typedef struct cuMemcpyHtoA_v2_ptds_params_st {
481
+ CUarray dstArray;
482
+ size_t dstOffset;
483
+ const void *srcHost;
484
+ size_t ByteCount;
485
+ } cuMemcpyHtoA_v2_ptds_params;
486
+
487
+ typedef struct cuMemcpyAtoH_v2_ptds_params_st {
488
+ void *dstHost;
489
+ CUarray srcArray;
490
+ size_t srcOffset;
491
+ size_t ByteCount;
492
+ } cuMemcpyAtoH_v2_ptds_params;
493
+
494
+ typedef struct cuMemcpyAtoA_v2_ptds_params_st {
495
+ CUarray dstArray;
496
+ size_t dstOffset;
497
+ CUarray srcArray;
498
+ size_t srcOffset;
499
+ size_t ByteCount;
500
+ } cuMemcpyAtoA_v2_ptds_params;
501
+
502
+ typedef struct cuMemcpy2D_v2_ptds_params_st {
503
+ const CUDA_MEMCPY2D *pCopy;
504
+ } cuMemcpy2D_v2_ptds_params;
505
+
506
+ typedef struct cuMemcpy2DUnaligned_v2_ptds_params_st {
507
+ const CUDA_MEMCPY2D *pCopy;
508
+ } cuMemcpy2DUnaligned_v2_ptds_params;
509
+
510
+ typedef struct cuMemcpy3D_v2_ptds_params_st {
511
+ const CUDA_MEMCPY3D *pCopy;
512
+ } cuMemcpy3D_v2_ptds_params;
513
+
514
+ typedef struct cuMemcpy3DPeer_ptds_params_st {
515
+ const CUDA_MEMCPY3D_PEER *pCopy;
516
+ } cuMemcpy3DPeer_ptds_params;
517
+
518
+ typedef struct cuMemcpyAsync_ptsz_params_st {
519
+ CUdeviceptr dst;
520
+ CUdeviceptr src;
521
+ size_t ByteCount;
522
+ CUstream hStream;
523
+ } cuMemcpyAsync_ptsz_params;
524
+
525
+ typedef struct cuMemcpyPeerAsync_ptsz_params_st {
526
+ CUdeviceptr dstDevice;
527
+ CUcontext dstContext;
528
+ CUdeviceptr srcDevice;
529
+ CUcontext srcContext;
530
+ size_t ByteCount;
531
+ CUstream hStream;
532
+ } cuMemcpyPeerAsync_ptsz_params;
533
+
534
+ typedef struct cuMemcpyHtoDAsync_v2_ptsz_params_st {
535
+ CUdeviceptr dstDevice;
536
+ const void *srcHost;
537
+ size_t ByteCount;
538
+ CUstream hStream;
539
+ } cuMemcpyHtoDAsync_v2_ptsz_params;
540
+
541
+ typedef struct cuMemcpyDtoHAsync_v2_ptsz_params_st {
542
+ void *dstHost;
543
+ CUdeviceptr srcDevice;
544
+ size_t ByteCount;
545
+ CUstream hStream;
546
+ } cuMemcpyDtoHAsync_v2_ptsz_params;
547
+
548
+ typedef struct cuMemcpyDtoDAsync_v2_ptsz_params_st {
549
+ CUdeviceptr dstDevice;
550
+ CUdeviceptr srcDevice;
551
+ size_t ByteCount;
552
+ CUstream hStream;
553
+ } cuMemcpyDtoDAsync_v2_ptsz_params;
554
+
555
+ typedef struct cuMemcpyHtoAAsync_v2_ptsz_params_st {
556
+ CUarray dstArray;
557
+ size_t dstOffset;
558
+ const void *srcHost;
559
+ size_t ByteCount;
560
+ CUstream hStream;
561
+ } cuMemcpyHtoAAsync_v2_ptsz_params;
562
+
563
+ typedef struct cuMemcpyAtoHAsync_v2_ptsz_params_st {
564
+ void *dstHost;
565
+ CUarray srcArray;
566
+ size_t srcOffset;
567
+ size_t ByteCount;
568
+ CUstream hStream;
569
+ } cuMemcpyAtoHAsync_v2_ptsz_params;
570
+
571
+ typedef struct cuMemcpy2DAsync_v2_ptsz_params_st {
572
+ const CUDA_MEMCPY2D *pCopy;
573
+ CUstream hStream;
574
+ } cuMemcpy2DAsync_v2_ptsz_params;
575
+
576
+ typedef struct cuMemcpy3DAsync_v2_ptsz_params_st {
577
+ const CUDA_MEMCPY3D *pCopy;
578
+ CUstream hStream;
579
+ } cuMemcpy3DAsync_v2_ptsz_params;
580
+
581
+ typedef struct cuMemcpy3DPeerAsync_ptsz_params_st {
582
+ const CUDA_MEMCPY3D_PEER *pCopy;
583
+ CUstream hStream;
584
+ } cuMemcpy3DPeerAsync_ptsz_params;
585
+
586
+ typedef struct cuMemsetD8_v2_ptds_params_st {
587
+ CUdeviceptr dstDevice;
588
+ unsigned char uc;
589
+ size_t N;
590
+ } cuMemsetD8_v2_ptds_params;
591
+
592
+ typedef struct cuMemsetD16_v2_ptds_params_st {
593
+ CUdeviceptr dstDevice;
594
+ unsigned short us;
595
+ size_t N;
596
+ } cuMemsetD16_v2_ptds_params;
597
+
598
+ typedef struct cuMemsetD32_v2_ptds_params_st {
599
+ CUdeviceptr dstDevice;
600
+ unsigned int ui;
601
+ size_t N;
602
+ } cuMemsetD32_v2_ptds_params;
603
+
604
+ typedef struct cuMemsetD2D8_v2_ptds_params_st {
605
+ CUdeviceptr dstDevice;
606
+ size_t dstPitch;
607
+ unsigned char uc;
608
+ size_t Width;
609
+ size_t Height;
610
+ } cuMemsetD2D8_v2_ptds_params;
611
+
612
+ typedef struct cuMemsetD2D16_v2_ptds_params_st {
613
+ CUdeviceptr dstDevice;
614
+ size_t dstPitch;
615
+ unsigned short us;
616
+ size_t Width;
617
+ size_t Height;
618
+ } cuMemsetD2D16_v2_ptds_params;
619
+
620
+ typedef struct cuMemsetD2D32_v2_ptds_params_st {
621
+ CUdeviceptr dstDevice;
622
+ size_t dstPitch;
623
+ unsigned int ui;
624
+ size_t Width;
625
+ size_t Height;
626
+ } cuMemsetD2D32_v2_ptds_params;
627
+
628
+ typedef struct cuMemsetD8Async_ptsz_params_st {
629
+ CUdeviceptr dstDevice;
630
+ unsigned char uc;
631
+ size_t N;
632
+ CUstream hStream;
633
+ } cuMemsetD8Async_ptsz_params;
634
+
635
+ typedef struct cuMemsetD16Async_ptsz_params_st {
636
+ CUdeviceptr dstDevice;
637
+ unsigned short us;
638
+ size_t N;
639
+ CUstream hStream;
640
+ } cuMemsetD16Async_ptsz_params;
641
+
642
+ typedef struct cuMemsetD32Async_ptsz_params_st {
643
+ CUdeviceptr dstDevice;
644
+ unsigned int ui;
645
+ size_t N;
646
+ CUstream hStream;
647
+ } cuMemsetD32Async_ptsz_params;
648
+
649
+ typedef struct cuMemsetD2D8Async_ptsz_params_st {
650
+ CUdeviceptr dstDevice;
651
+ size_t dstPitch;
652
+ unsigned char uc;
653
+ size_t Width;
654
+ size_t Height;
655
+ CUstream hStream;
656
+ } cuMemsetD2D8Async_ptsz_params;
657
+
658
+ typedef struct cuMemsetD2D16Async_ptsz_params_st {
659
+ CUdeviceptr dstDevice;
660
+ size_t dstPitch;
661
+ unsigned short us;
662
+ size_t Width;
663
+ size_t Height;
664
+ CUstream hStream;
665
+ } cuMemsetD2D16Async_ptsz_params;
666
+
667
+ typedef struct cuMemsetD2D32Async_ptsz_params_st {
668
+ CUdeviceptr dstDevice;
669
+ size_t dstPitch;
670
+ unsigned int ui;
671
+ size_t Width;
672
+ size_t Height;
673
+ CUstream hStream;
674
+ } cuMemsetD2D32Async_ptsz_params;
675
+
676
+ typedef struct cuArrayCreate_v2_params_st {
677
+ CUarray *pHandle;
678
+ const CUDA_ARRAY_DESCRIPTOR *pAllocateArray;
679
+ } cuArrayCreate_v2_params;
680
+
681
+ typedef struct cuArrayGetDescriptor_v2_params_st {
682
+ CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor;
683
+ CUarray hArray;
684
+ } cuArrayGetDescriptor_v2_params;
685
+
686
+ typedef struct cuArrayGetSparseProperties_params_st {
687
+ CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties;
688
+ CUarray array;
689
+ } cuArrayGetSparseProperties_params;
690
+
691
+ typedef struct cuMipmappedArrayGetSparseProperties_params_st {
692
+ CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties;
693
+ CUmipmappedArray mipmap;
694
+ } cuMipmappedArrayGetSparseProperties_params;
695
+
696
+ typedef struct cuArrayGetMemoryRequirements_params_st {
697
+ CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements;
698
+ CUarray array;
699
+ CUdevice device;
700
+ } cuArrayGetMemoryRequirements_params;
701
+
702
+ typedef struct cuMipmappedArrayGetMemoryRequirements_params_st {
703
+ CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements;
704
+ CUmipmappedArray mipmap;
705
+ CUdevice device;
706
+ } cuMipmappedArrayGetMemoryRequirements_params;
707
+
708
+ typedef struct cuArrayGetPlane_params_st {
709
+ CUarray *pPlaneArray;
710
+ CUarray hArray;
711
+ unsigned int planeIdx;
712
+ } cuArrayGetPlane_params;
713
+
714
+ typedef struct cuArrayDestroy_params_st {
715
+ CUarray hArray;
716
+ } cuArrayDestroy_params;
717
+
718
+ typedef struct cuArray3DCreate_v2_params_st {
719
+ CUarray *pHandle;
720
+ const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray;
721
+ } cuArray3DCreate_v2_params;
722
+
723
+ typedef struct cuArray3DGetDescriptor_v2_params_st {
724
+ CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor;
725
+ CUarray hArray;
726
+ } cuArray3DGetDescriptor_v2_params;
727
+
728
+ typedef struct cuMipmappedArrayCreate_params_st {
729
+ CUmipmappedArray *pHandle;
730
+ const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc;
731
+ unsigned int numMipmapLevels;
732
+ } cuMipmappedArrayCreate_params;
733
+
734
+ typedef struct cuMipmappedArrayGetLevel_params_st {
735
+ CUarray *pLevelArray;
736
+ CUmipmappedArray hMipmappedArray;
737
+ unsigned int level;
738
+ } cuMipmappedArrayGetLevel_params;
739
+
740
+ typedef struct cuMipmappedArrayDestroy_params_st {
741
+ CUmipmappedArray hMipmappedArray;
742
+ } cuMipmappedArrayDestroy_params;
743
+
744
+ typedef struct cuMemAddressReserve_params_st {
745
+ CUdeviceptr *ptr;
746
+ size_t size;
747
+ size_t alignment;
748
+ CUdeviceptr addr;
749
+ unsigned long long flags;
750
+ } cuMemAddressReserve_params;
751
+
752
+ typedef struct cuMemAddressFree_params_st {
753
+ CUdeviceptr ptr;
754
+ size_t size;
755
+ } cuMemAddressFree_params;
756
+
757
+ typedef struct cuMemCreate_params_st {
758
+ CUmemGenericAllocationHandle *handle;
759
+ size_t size;
760
+ const CUmemAllocationProp *prop;
761
+ unsigned long long flags;
762
+ } cuMemCreate_params;
763
+
764
+ typedef struct cuMemRelease_params_st {
765
+ CUmemGenericAllocationHandle handle;
766
+ } cuMemRelease_params;
767
+
768
+ typedef struct cuMemMap_params_st {
769
+ CUdeviceptr ptr;
770
+ size_t size;
771
+ size_t offset;
772
+ CUmemGenericAllocationHandle handle;
773
+ unsigned long long flags;
774
+ } cuMemMap_params;
775
+
776
+ typedef struct cuMemMapArrayAsync_ptsz_params_st {
777
+ CUarrayMapInfo *mapInfoList;
778
+ unsigned int count;
779
+ CUstream hStream;
780
+ } cuMemMapArrayAsync_ptsz_params;
781
+
782
+ typedef struct cuMemUnmap_params_st {
783
+ CUdeviceptr ptr;
784
+ size_t size;
785
+ } cuMemUnmap_params;
786
+
787
+ typedef struct cuMemSetAccess_params_st {
788
+ CUdeviceptr ptr;
789
+ size_t size;
790
+ const CUmemAccessDesc *desc;
791
+ size_t count;
792
+ } cuMemSetAccess_params;
793
+
794
+ typedef struct cuMemGetAccess_params_st {
795
+ unsigned long long *flags;
796
+ const CUmemLocation *location;
797
+ CUdeviceptr ptr;
798
+ } cuMemGetAccess_params;
799
+
800
+ typedef struct cuMemExportToShareableHandle_params_st {
801
+ void *shareableHandle;
802
+ CUmemGenericAllocationHandle handle;
803
+ CUmemAllocationHandleType handleType;
804
+ unsigned long long flags;
805
+ } cuMemExportToShareableHandle_params;
806
+
807
+ typedef struct cuMemImportFromShareableHandle_params_st {
808
+ CUmemGenericAllocationHandle *handle;
809
+ void *osHandle;
810
+ CUmemAllocationHandleType shHandleType;
811
+ } cuMemImportFromShareableHandle_params;
812
+
813
+ typedef struct cuMemGetAllocationGranularity_params_st {
814
+ size_t *granularity;
815
+ const CUmemAllocationProp *prop;
816
+ CUmemAllocationGranularity_flags option;
817
+ } cuMemGetAllocationGranularity_params;
818
+
819
+ typedef struct cuMemGetAllocationPropertiesFromHandle_params_st {
820
+ CUmemAllocationProp *prop;
821
+ CUmemGenericAllocationHandle handle;
822
+ } cuMemGetAllocationPropertiesFromHandle_params;
823
+
824
+ typedef struct cuMemRetainAllocationHandle_params_st {
825
+ CUmemGenericAllocationHandle *handle;
826
+ void *addr;
827
+ } cuMemRetainAllocationHandle_params;
828
+
829
+ typedef struct cuMemFreeAsync_ptsz_params_st {
830
+ CUdeviceptr dptr;
831
+ CUstream hStream;
832
+ } cuMemFreeAsync_ptsz_params;
833
+
834
+ typedef struct cuMemAllocAsync_ptsz_params_st {
835
+ CUdeviceptr *dptr;
836
+ size_t bytesize;
837
+ CUstream hStream;
838
+ } cuMemAllocAsync_ptsz_params;
839
+
840
+ typedef struct cuMemPoolTrimTo_params_st {
841
+ CUmemoryPool pool;
842
+ size_t minBytesToKeep;
843
+ } cuMemPoolTrimTo_params;
844
+
845
+ typedef struct cuMemPoolSetAttribute_params_st {
846
+ CUmemoryPool pool;
847
+ CUmemPool_attribute attr;
848
+ void *value;
849
+ } cuMemPoolSetAttribute_params;
850
+
851
+ typedef struct cuMemPoolGetAttribute_params_st {
852
+ CUmemoryPool pool;
853
+ CUmemPool_attribute attr;
854
+ void *value;
855
+ } cuMemPoolGetAttribute_params;
856
+
857
+ typedef struct cuMemPoolSetAccess_params_st {
858
+ CUmemoryPool pool;
859
+ const CUmemAccessDesc *map;
860
+ size_t count;
861
+ } cuMemPoolSetAccess_params;
862
+
863
+ typedef struct cuMemPoolGetAccess_params_st {
864
+ CUmemAccess_flags *flags;
865
+ CUmemoryPool memPool;
866
+ CUmemLocation *location;
867
+ } cuMemPoolGetAccess_params;
868
+
869
+ typedef struct cuMemPoolCreate_params_st {
870
+ CUmemoryPool *pool;
871
+ const CUmemPoolProps *poolProps;
872
+ } cuMemPoolCreate_params;
873
+
874
+ typedef struct cuMemPoolDestroy_params_st {
875
+ CUmemoryPool pool;
876
+ } cuMemPoolDestroy_params;
877
+
878
+ typedef struct cuMemAllocFromPoolAsync_ptsz_params_st {
879
+ CUdeviceptr *dptr;
880
+ size_t bytesize;
881
+ CUmemoryPool pool;
882
+ CUstream hStream;
883
+ } cuMemAllocFromPoolAsync_ptsz_params;
884
+
885
+ typedef struct cuMemPoolExportToShareableHandle_params_st {
886
+ void *handle_out;
887
+ CUmemoryPool pool;
888
+ CUmemAllocationHandleType handleType;
889
+ unsigned long long flags;
890
+ } cuMemPoolExportToShareableHandle_params;
891
+
892
+ typedef struct cuMemPoolImportFromShareableHandle_params_st {
893
+ CUmemoryPool *pool_out;
894
+ void *handle;
895
+ CUmemAllocationHandleType handleType;
896
+ unsigned long long flags;
897
+ } cuMemPoolImportFromShareableHandle_params;
898
+
899
+ typedef struct cuMemPoolExportPointer_params_st {
900
+ CUmemPoolPtrExportData *shareData_out;
901
+ CUdeviceptr ptr;
902
+ } cuMemPoolExportPointer_params;
903
+
904
+ typedef struct cuMemPoolImportPointer_params_st {
905
+ CUdeviceptr *ptr_out;
906
+ CUmemoryPool pool;
907
+ CUmemPoolPtrExportData *shareData;
908
+ } cuMemPoolImportPointer_params;
909
+
910
+ typedef struct cuPointerGetAttribute_params_st {
911
+ void *data;
912
+ CUpointer_attribute attribute;
913
+ CUdeviceptr ptr;
914
+ } cuPointerGetAttribute_params;
915
+
916
+ typedef struct cuMemPrefetchAsync_ptsz_params_st {
917
+ CUdeviceptr devPtr;
918
+ size_t count;
919
+ CUdevice dstDevice;
920
+ CUstream hStream;
921
+ } cuMemPrefetchAsync_ptsz_params;
922
+
923
+ typedef struct cuMemAdvise_params_st {
924
+ CUdeviceptr devPtr;
925
+ size_t count;
926
+ CUmem_advise advice;
927
+ CUdevice device;
928
+ } cuMemAdvise_params;
929
+
930
+ typedef struct cuMemRangeGetAttribute_params_st {
931
+ void *data;
932
+ size_t dataSize;
933
+ CUmem_range_attribute attribute;
934
+ CUdeviceptr devPtr;
935
+ size_t count;
936
+ } cuMemRangeGetAttribute_params;
937
+
938
+ typedef struct cuMemRangeGetAttributes_params_st {
939
+ void **data;
940
+ size_t *dataSizes;
941
+ CUmem_range_attribute *attributes;
942
+ size_t numAttributes;
943
+ CUdeviceptr devPtr;
944
+ size_t count;
945
+ } cuMemRangeGetAttributes_params;
946
+
947
+ typedef struct cuPointerSetAttribute_params_st {
948
+ const void *value;
949
+ CUpointer_attribute attribute;
950
+ CUdeviceptr ptr;
951
+ } cuPointerSetAttribute_params;
952
+
953
+ typedef struct cuPointerGetAttributes_params_st {
954
+ unsigned int numAttributes;
955
+ CUpointer_attribute *attributes;
956
+ void **data;
957
+ CUdeviceptr ptr;
958
+ } cuPointerGetAttributes_params;
959
+
960
+ typedef struct cuStreamCreate_params_st {
961
+ CUstream *phStream;
962
+ unsigned int Flags;
963
+ } cuStreamCreate_params;
964
+
965
+ typedef struct cuStreamCreateWithPriority_params_st {
966
+ CUstream *phStream;
967
+ unsigned int flags;
968
+ int priority;
969
+ } cuStreamCreateWithPriority_params;
970
+
971
+ typedef struct cuStreamGetPriority_ptsz_params_st {
972
+ CUstream hStream;
973
+ int *priority;
974
+ } cuStreamGetPriority_ptsz_params;
975
+
976
+ typedef struct cuStreamGetFlags_ptsz_params_st {
977
+ CUstream hStream;
978
+ unsigned int *flags;
979
+ } cuStreamGetFlags_ptsz_params;
980
+
981
+ typedef struct cuStreamGetCtx_ptsz_params_st {
982
+ CUstream hStream;
983
+ CUcontext *pctx;
984
+ } cuStreamGetCtx_ptsz_params;
985
+
986
+ typedef struct cuStreamWaitEvent_ptsz_params_st {
987
+ CUstream hStream;
988
+ CUevent hEvent;
989
+ unsigned int Flags;
990
+ } cuStreamWaitEvent_ptsz_params;
991
+
992
+ typedef struct cuStreamAddCallback_ptsz_params_st {
993
+ CUstream hStream;
994
+ CUstreamCallback callback;
995
+ void *userData;
996
+ unsigned int flags;
997
+ } cuStreamAddCallback_ptsz_params;
998
+
999
+ typedef struct cuStreamBeginCapture_v2_ptsz_params_st {
1000
+ CUstream hStream;
1001
+ CUstreamCaptureMode mode;
1002
+ } cuStreamBeginCapture_v2_ptsz_params;
1003
+
1004
+ typedef struct cuThreadExchangeStreamCaptureMode_params_st {
1005
+ CUstreamCaptureMode *mode;
1006
+ } cuThreadExchangeStreamCaptureMode_params;
1007
+
1008
+ typedef struct cuStreamEndCapture_ptsz_params_st {
1009
+ CUstream hStream;
1010
+ CUgraph *phGraph;
1011
+ } cuStreamEndCapture_ptsz_params;
1012
+
1013
+ typedef struct cuStreamIsCapturing_ptsz_params_st {
1014
+ CUstream hStream;
1015
+ CUstreamCaptureStatus *captureStatus;
1016
+ } cuStreamIsCapturing_ptsz_params;
1017
+
1018
+ typedef struct cuStreamGetCaptureInfo_ptsz_params_st {
1019
+ CUstream hStream;
1020
+ CUstreamCaptureStatus *captureStatus_out;
1021
+ cuuint64_t *id_out;
1022
+ } cuStreamGetCaptureInfo_ptsz_params;
1023
+
1024
+ typedef struct cuStreamGetCaptureInfo_v2_ptsz_params_st {
1025
+ CUstream hStream;
1026
+ CUstreamCaptureStatus *captureStatus_out;
1027
+ cuuint64_t *id_out;
1028
+ CUgraph *graph_out;
1029
+ const CUgraphNode **dependencies_out;
1030
+ size_t *numDependencies_out;
1031
+ } cuStreamGetCaptureInfo_v2_ptsz_params;
1032
+
1033
+ typedef struct cuStreamUpdateCaptureDependencies_ptsz_params_st {
1034
+ CUstream hStream;
1035
+ CUgraphNode *dependencies;
1036
+ size_t numDependencies;
1037
+ unsigned int flags;
1038
+ } cuStreamUpdateCaptureDependencies_ptsz_params;
1039
+
1040
+ typedef struct cuStreamAttachMemAsync_ptsz_params_st {
1041
+ CUstream hStream;
1042
+ CUdeviceptr dptr;
1043
+ size_t length;
1044
+ unsigned int flags;
1045
+ } cuStreamAttachMemAsync_ptsz_params;
1046
+
1047
+ typedef struct cuStreamQuery_ptsz_params_st {
1048
+ CUstream hStream;
1049
+ } cuStreamQuery_ptsz_params;
1050
+
1051
+ typedef struct cuStreamSynchronize_ptsz_params_st {
1052
+ CUstream hStream;
1053
+ } cuStreamSynchronize_ptsz_params;
1054
+
1055
+ typedef struct cuStreamDestroy_v2_params_st {
1056
+ CUstream hStream;
1057
+ } cuStreamDestroy_v2_params;
1058
+
1059
+ typedef struct cuStreamCopyAttributes_ptsz_params_st {
1060
+ CUstream dst;
1061
+ CUstream src;
1062
+ } cuStreamCopyAttributes_ptsz_params;
1063
+
1064
+ typedef struct cuStreamGetAttribute_ptsz_params_st {
1065
+ CUstream hStream;
1066
+ CUstreamAttrID attr;
1067
+ CUstreamAttrValue *value_out;
1068
+ } cuStreamGetAttribute_ptsz_params;
1069
+
1070
+ typedef struct cuStreamSetAttribute_ptsz_params_st {
1071
+ CUstream hStream;
1072
+ CUstreamAttrID attr;
1073
+ const CUstreamAttrValue *value;
1074
+ } cuStreamSetAttribute_ptsz_params;
1075
+
1076
+ typedef struct cuEventCreate_params_st {
1077
+ CUevent *phEvent;
1078
+ unsigned int Flags;
1079
+ } cuEventCreate_params;
1080
+
1081
+ typedef struct cuEventRecord_ptsz_params_st {
1082
+ CUevent hEvent;
1083
+ CUstream hStream;
1084
+ } cuEventRecord_ptsz_params;
1085
+
1086
+ typedef struct cuEventRecordWithFlags_ptsz_params_st {
1087
+ CUevent hEvent;
1088
+ CUstream hStream;
1089
+ unsigned int flags;
1090
+ } cuEventRecordWithFlags_ptsz_params;
1091
+
1092
+ typedef struct cuEventQuery_params_st {
1093
+ CUevent hEvent;
1094
+ } cuEventQuery_params;
1095
+
1096
+ typedef struct cuEventSynchronize_params_st {
1097
+ CUevent hEvent;
1098
+ } cuEventSynchronize_params;
1099
+
1100
+ typedef struct cuEventDestroy_v2_params_st {
1101
+ CUevent hEvent;
1102
+ } cuEventDestroy_v2_params;
1103
+
1104
+ typedef struct cuEventElapsedTime_params_st {
1105
+ float *pMilliseconds;
1106
+ CUevent hStart;
1107
+ CUevent hEnd;
1108
+ } cuEventElapsedTime_params;
1109
+
1110
+ typedef struct cuImportExternalMemory_params_st {
1111
+ CUexternalMemory *extMem_out;
1112
+ const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc;
1113
+ } cuImportExternalMemory_params;
1114
+
1115
+ typedef struct cuExternalMemoryGetMappedBuffer_params_st {
1116
+ CUdeviceptr *devPtr;
1117
+ CUexternalMemory extMem;
1118
+ const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc;
1119
+ } cuExternalMemoryGetMappedBuffer_params;
1120
+
1121
+ typedef struct cuExternalMemoryGetMappedMipmappedArray_params_st {
1122
+ CUmipmappedArray *mipmap;
1123
+ CUexternalMemory extMem;
1124
+ const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc;
1125
+ } cuExternalMemoryGetMappedMipmappedArray_params;
1126
+
1127
+ typedef struct cuDestroyExternalMemory_params_st {
1128
+ CUexternalMemory extMem;
1129
+ } cuDestroyExternalMemory_params;
1130
+
1131
+ typedef struct cuImportExternalSemaphore_params_st {
1132
+ CUexternalSemaphore *extSem_out;
1133
+ const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc;
1134
+ } cuImportExternalSemaphore_params;
1135
+
1136
+ typedef struct cuSignalExternalSemaphoresAsync_ptsz_params_st {
1137
+ const CUexternalSemaphore *extSemArray;
1138
+ const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray;
1139
+ unsigned int numExtSems;
1140
+ CUstream stream;
1141
+ } cuSignalExternalSemaphoresAsync_ptsz_params;
1142
+
1143
+ typedef struct cuWaitExternalSemaphoresAsync_ptsz_params_st {
1144
+ const CUexternalSemaphore *extSemArray;
1145
+ const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray;
1146
+ unsigned int numExtSems;
1147
+ CUstream stream;
1148
+ } cuWaitExternalSemaphoresAsync_ptsz_params;
1149
+
1150
+ typedef struct cuDestroyExternalSemaphore_params_st {
1151
+ CUexternalSemaphore extSem;
1152
+ } cuDestroyExternalSemaphore_params;
1153
+
1154
+ typedef struct cuStreamWaitValue32_ptsz_params_st {
1155
+ CUstream stream;
1156
+ CUdeviceptr addr;
1157
+ cuuint32_t value;
1158
+ unsigned int flags;
1159
+ } cuStreamWaitValue32_ptsz_params;
1160
+
1161
+ typedef struct cuStreamWaitValue64_ptsz_params_st {
1162
+ CUstream stream;
1163
+ CUdeviceptr addr;
1164
+ cuuint64_t value;
1165
+ unsigned int flags;
1166
+ } cuStreamWaitValue64_ptsz_params;
1167
+
1168
+ typedef struct cuStreamWriteValue32_ptsz_params_st {
1169
+ CUstream stream;
1170
+ CUdeviceptr addr;
1171
+ cuuint32_t value;
1172
+ unsigned int flags;
1173
+ } cuStreamWriteValue32_ptsz_params;
1174
+
1175
+ typedef struct cuStreamWriteValue64_ptsz_params_st {
1176
+ CUstream stream;
1177
+ CUdeviceptr addr;
1178
+ cuuint64_t value;
1179
+ unsigned int flags;
1180
+ } cuStreamWriteValue64_ptsz_params;
1181
+
1182
+ typedef struct cuStreamBatchMemOp_ptsz_params_st {
1183
+ CUstream stream;
1184
+ unsigned int count;
1185
+ CUstreamBatchMemOpParams *paramArray;
1186
+ unsigned int flags;
1187
+ } cuStreamBatchMemOp_ptsz_params;
1188
+
1189
+ typedef struct cuFuncGetAttribute_params_st {
1190
+ int *pi;
1191
+ CUfunction_attribute attrib;
1192
+ CUfunction hfunc;
1193
+ } cuFuncGetAttribute_params;
1194
+
1195
+ typedef struct cuFuncSetAttribute_params_st {
1196
+ CUfunction hfunc;
1197
+ CUfunction_attribute attrib;
1198
+ int value;
1199
+ } cuFuncSetAttribute_params;
1200
+
1201
+ typedef struct cuFuncSetCacheConfig_params_st {
1202
+ CUfunction hfunc;
1203
+ CUfunc_cache config;
1204
+ } cuFuncSetCacheConfig_params;
1205
+
1206
+ typedef struct cuFuncSetSharedMemConfig_params_st {
1207
+ CUfunction hfunc;
1208
+ CUsharedconfig config;
1209
+ } cuFuncSetSharedMemConfig_params;
1210
+
1211
+ typedef struct cuFuncGetModule_params_st {
1212
+ CUmodule *hmod;
1213
+ CUfunction hfunc;
1214
+ } cuFuncGetModule_params;
1215
+
1216
+ typedef struct cuLaunchKernel_ptsz_params_st {
1217
+ CUfunction f;
1218
+ unsigned int gridDimX;
1219
+ unsigned int gridDimY;
1220
+ unsigned int gridDimZ;
1221
+ unsigned int blockDimX;
1222
+ unsigned int blockDimY;
1223
+ unsigned int blockDimZ;
1224
+ unsigned int sharedMemBytes;
1225
+ CUstream hStream;
1226
+ void **kernelParams;
1227
+ void **extra;
1228
+ } cuLaunchKernel_ptsz_params;
1229
+
1230
+ typedef struct cuLaunchKernelEx_ptsz_params_st {
1231
+ const CUlaunchConfig *config;
1232
+ CUfunction f;
1233
+ void **kernelParams;
1234
+ void **extra;
1235
+ } cuLaunchKernelEx_ptsz_params;
1236
+
1237
+ typedef struct cuLaunchCooperativeKernel_ptsz_params_st {
1238
+ CUfunction f;
1239
+ unsigned int gridDimX;
1240
+ unsigned int gridDimY;
1241
+ unsigned int gridDimZ;
1242
+ unsigned int blockDimX;
1243
+ unsigned int blockDimY;
1244
+ unsigned int blockDimZ;
1245
+ unsigned int sharedMemBytes;
1246
+ CUstream hStream;
1247
+ void **kernelParams;
1248
+ } cuLaunchCooperativeKernel_ptsz_params;
1249
+
1250
+ typedef struct cuLaunchCooperativeKernelMultiDevice_params_st {
1251
+ CUDA_LAUNCH_PARAMS *launchParamsList;
1252
+ unsigned int numDevices;
1253
+ unsigned int flags;
1254
+ } cuLaunchCooperativeKernelMultiDevice_params;
1255
+
1256
+ typedef struct cuLaunchHostFunc_ptsz_params_st {
1257
+ CUstream hStream;
1258
+ CUhostFn fn;
1259
+ void *userData;
1260
+ } cuLaunchHostFunc_ptsz_params;
1261
+
1262
+ typedef struct cuFuncSetBlockShape_params_st {
1263
+ CUfunction hfunc;
1264
+ int x;
1265
+ int y;
1266
+ int z;
1267
+ } cuFuncSetBlockShape_params;
1268
+
1269
+ typedef struct cuFuncSetSharedSize_params_st {
1270
+ CUfunction hfunc;
1271
+ unsigned int bytes;
1272
+ } cuFuncSetSharedSize_params;
1273
+
1274
+ typedef struct cuParamSetSize_params_st {
1275
+ CUfunction hfunc;
1276
+ unsigned int numbytes;
1277
+ } cuParamSetSize_params;
1278
+
1279
+ typedef struct cuParamSeti_params_st {
1280
+ CUfunction hfunc;
1281
+ int offset;
1282
+ unsigned int value;
1283
+ } cuParamSeti_params;
1284
+
1285
+ typedef struct cuParamSetf_params_st {
1286
+ CUfunction hfunc;
1287
+ int offset;
1288
+ float value;
1289
+ } cuParamSetf_params;
1290
+
1291
+ typedef struct cuParamSetv_params_st {
1292
+ CUfunction hfunc;
1293
+ int offset;
1294
+ void *ptr;
1295
+ unsigned int numbytes;
1296
+ } cuParamSetv_params;
1297
+
1298
+ typedef struct cuLaunch_params_st {
1299
+ CUfunction f;
1300
+ } cuLaunch_params;
1301
+
1302
+ typedef struct cuLaunchGrid_params_st {
1303
+ CUfunction f;
1304
+ int grid_width;
1305
+ int grid_height;
1306
+ } cuLaunchGrid_params;
1307
+
1308
+ typedef struct cuLaunchGridAsync_params_st {
1309
+ CUfunction f;
1310
+ int grid_width;
1311
+ int grid_height;
1312
+ CUstream hStream;
1313
+ } cuLaunchGridAsync_params;
1314
+
1315
+ typedef struct cuParamSetTexRef_params_st {
1316
+ CUfunction hfunc;
1317
+ int texunit;
1318
+ CUtexref hTexRef;
1319
+ } cuParamSetTexRef_params;
1320
+
1321
+ typedef struct cuGraphCreate_params_st {
1322
+ CUgraph *phGraph;
1323
+ unsigned int flags;
1324
+ } cuGraphCreate_params;
1325
+
1326
+ typedef struct cuGraphAddKernelNode_params_st {
1327
+ CUgraphNode *phGraphNode;
1328
+ CUgraph hGraph;
1329
+ const CUgraphNode *dependencies;
1330
+ size_t numDependencies;
1331
+ const CUDA_KERNEL_NODE_PARAMS *nodeParams;
1332
+ } cuGraphAddKernelNode_params;
1333
+
1334
+ typedef struct cuGraphKernelNodeGetParams_params_st {
1335
+ CUgraphNode hNode;
1336
+ CUDA_KERNEL_NODE_PARAMS *nodeParams;
1337
+ } cuGraphKernelNodeGetParams_params;
1338
+
1339
+ typedef struct cuGraphKernelNodeSetParams_params_st {
1340
+ CUgraphNode hNode;
1341
+ const CUDA_KERNEL_NODE_PARAMS *nodeParams;
1342
+ } cuGraphKernelNodeSetParams_params;
1343
+
1344
+ typedef struct cuGraphAddMemcpyNode_params_st {
1345
+ CUgraphNode *phGraphNode;
1346
+ CUgraph hGraph;
1347
+ const CUgraphNode *dependencies;
1348
+ size_t numDependencies;
1349
+ const CUDA_MEMCPY3D *copyParams;
1350
+ CUcontext ctx;
1351
+ } cuGraphAddMemcpyNode_params;
1352
+
1353
+ typedef struct cuGraphMemcpyNodeGetParams_params_st {
1354
+ CUgraphNode hNode;
1355
+ CUDA_MEMCPY3D *nodeParams;
1356
+ } cuGraphMemcpyNodeGetParams_params;
1357
+
1358
+ typedef struct cuGraphMemcpyNodeSetParams_params_st {
1359
+ CUgraphNode hNode;
1360
+ const CUDA_MEMCPY3D *nodeParams;
1361
+ } cuGraphMemcpyNodeSetParams_params;
1362
+
1363
+ typedef struct cuGraphAddMemsetNode_params_st {
1364
+ CUgraphNode *phGraphNode;
1365
+ CUgraph hGraph;
1366
+ const CUgraphNode *dependencies;
1367
+ size_t numDependencies;
1368
+ const CUDA_MEMSET_NODE_PARAMS *memsetParams;
1369
+ CUcontext ctx;
1370
+ } cuGraphAddMemsetNode_params;
1371
+
1372
+ typedef struct cuGraphMemsetNodeGetParams_params_st {
1373
+ CUgraphNode hNode;
1374
+ CUDA_MEMSET_NODE_PARAMS *nodeParams;
1375
+ } cuGraphMemsetNodeGetParams_params;
1376
+
1377
+ typedef struct cuGraphMemsetNodeSetParams_params_st {
1378
+ CUgraphNode hNode;
1379
+ const CUDA_MEMSET_NODE_PARAMS *nodeParams;
1380
+ } cuGraphMemsetNodeSetParams_params;
1381
+
1382
+ typedef struct cuGraphAddHostNode_params_st {
1383
+ CUgraphNode *phGraphNode;
1384
+ CUgraph hGraph;
1385
+ const CUgraphNode *dependencies;
1386
+ size_t numDependencies;
1387
+ const CUDA_HOST_NODE_PARAMS *nodeParams;
1388
+ } cuGraphAddHostNode_params;
1389
+
1390
+ typedef struct cuGraphHostNodeGetParams_params_st {
1391
+ CUgraphNode hNode;
1392
+ CUDA_HOST_NODE_PARAMS *nodeParams;
1393
+ } cuGraphHostNodeGetParams_params;
1394
+
1395
+ typedef struct cuGraphHostNodeSetParams_params_st {
1396
+ CUgraphNode hNode;
1397
+ const CUDA_HOST_NODE_PARAMS *nodeParams;
1398
+ } cuGraphHostNodeSetParams_params;
1399
+
1400
+ typedef struct cuGraphAddChildGraphNode_params_st {
1401
+ CUgraphNode *phGraphNode;
1402
+ CUgraph hGraph;
1403
+ const CUgraphNode *dependencies;
1404
+ size_t numDependencies;
1405
+ CUgraph childGraph;
1406
+ } cuGraphAddChildGraphNode_params;
1407
+
1408
+ typedef struct cuGraphChildGraphNodeGetGraph_params_st {
1409
+ CUgraphNode hNode;
1410
+ CUgraph *phGraph;
1411
+ } cuGraphChildGraphNodeGetGraph_params;
1412
+
1413
+ typedef struct cuGraphAddEmptyNode_params_st {
1414
+ CUgraphNode *phGraphNode;
1415
+ CUgraph hGraph;
1416
+ const CUgraphNode *dependencies;
1417
+ size_t numDependencies;
1418
+ } cuGraphAddEmptyNode_params;
1419
+
1420
+ typedef struct cuGraphAddEventRecordNode_params_st {
1421
+ CUgraphNode *phGraphNode;
1422
+ CUgraph hGraph;
1423
+ const CUgraphNode *dependencies;
1424
+ size_t numDependencies;
1425
+ CUevent event;
1426
+ } cuGraphAddEventRecordNode_params;
1427
+
1428
+ typedef struct cuGraphEventRecordNodeGetEvent_params_st {
1429
+ CUgraphNode hNode;
1430
+ CUevent *event_out;
1431
+ } cuGraphEventRecordNodeGetEvent_params;
1432
+
1433
+ typedef struct cuGraphEventRecordNodeSetEvent_params_st {
1434
+ CUgraphNode hNode;
1435
+ CUevent event;
1436
+ } cuGraphEventRecordNodeSetEvent_params;
1437
+
1438
+ typedef struct cuGraphAddEventWaitNode_params_st {
1439
+ CUgraphNode *phGraphNode;
1440
+ CUgraph hGraph;
1441
+ const CUgraphNode *dependencies;
1442
+ size_t numDependencies;
1443
+ CUevent event;
1444
+ } cuGraphAddEventWaitNode_params;
1445
+
1446
+ typedef struct cuGraphEventWaitNodeGetEvent_params_st {
1447
+ CUgraphNode hNode;
1448
+ CUevent *event_out;
1449
+ } cuGraphEventWaitNodeGetEvent_params;
1450
+
1451
+ typedef struct cuGraphEventWaitNodeSetEvent_params_st {
1452
+ CUgraphNode hNode;
1453
+ CUevent event;
1454
+ } cuGraphEventWaitNodeSetEvent_params;
1455
+
1456
+ typedef struct cuGraphAddExternalSemaphoresSignalNode_params_st {
1457
+ CUgraphNode *phGraphNode;
1458
+ CUgraph hGraph;
1459
+ const CUgraphNode *dependencies;
1460
+ size_t numDependencies;
1461
+ const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
1462
+ } cuGraphAddExternalSemaphoresSignalNode_params;
1463
+
1464
+ typedef struct cuGraphExternalSemaphoresSignalNodeGetParams_params_st {
1465
+ CUgraphNode hNode;
1466
+ CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out;
1467
+ } cuGraphExternalSemaphoresSignalNodeGetParams_params;
1468
+
1469
+ typedef struct cuGraphExternalSemaphoresSignalNodeSetParams_params_st {
1470
+ CUgraphNode hNode;
1471
+ const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
1472
+ } cuGraphExternalSemaphoresSignalNodeSetParams_params;
1473
+
1474
+ typedef struct cuGraphAddExternalSemaphoresWaitNode_params_st {
1475
+ CUgraphNode *phGraphNode;
1476
+ CUgraph hGraph;
1477
+ const CUgraphNode *dependencies;
1478
+ size_t numDependencies;
1479
+ const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
1480
+ } cuGraphAddExternalSemaphoresWaitNode_params;
1481
+
1482
+ typedef struct cuGraphExternalSemaphoresWaitNodeGetParams_params_st {
1483
+ CUgraphNode hNode;
1484
+ CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out;
1485
+ } cuGraphExternalSemaphoresWaitNodeGetParams_params;
1486
+
1487
+ typedef struct cuGraphExternalSemaphoresWaitNodeSetParams_params_st {
1488
+ CUgraphNode hNode;
1489
+ const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
1490
+ } cuGraphExternalSemaphoresWaitNodeSetParams_params;
1491
+
1492
+ typedef struct cuGraphAddMemAllocNode_params_st {
1493
+ CUgraphNode *phGraphNode;
1494
+ CUgraph hGraph;
1495
+ const CUgraphNode *dependencies;
1496
+ size_t numDependencies;
1497
+ CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams;
1498
+ } cuGraphAddMemAllocNode_params;
1499
+
1500
+ typedef struct cuGraphMemAllocNodeGetParams_params_st {
1501
+ CUgraphNode hNode;
1502
+ CUDA_MEM_ALLOC_NODE_PARAMS *params_out;
1503
+ } cuGraphMemAllocNodeGetParams_params;
1504
+
1505
+ typedef struct cuGraphAddMemFreeNode_params_st {
1506
+ CUgraphNode *phGraphNode;
1507
+ CUgraph hGraph;
1508
+ const CUgraphNode *dependencies;
1509
+ size_t numDependencies;
1510
+ CUdeviceptr dptr;
1511
+ } cuGraphAddMemFreeNode_params;
1512
+
1513
+ typedef struct cuGraphMemFreeNodeGetParams_params_st {
1514
+ CUgraphNode hNode;
1515
+ CUdeviceptr *dptr_out;
1516
+ } cuGraphMemFreeNodeGetParams_params;
1517
+
1518
+ typedef struct cuDeviceGraphMemTrim_params_st {
1519
+ CUdevice device;
1520
+ } cuDeviceGraphMemTrim_params;
1521
+
1522
+ typedef struct cuDeviceGetGraphMemAttribute_params_st {
1523
+ CUdevice device;
1524
+ CUgraphMem_attribute attr;
1525
+ void *value;
1526
+ } cuDeviceGetGraphMemAttribute_params;
1527
+
1528
+ typedef struct cuDeviceSetGraphMemAttribute_params_st {
1529
+ CUdevice device;
1530
+ CUgraphMem_attribute attr;
1531
+ void *value;
1532
+ } cuDeviceSetGraphMemAttribute_params;
1533
+
1534
+ typedef struct cuGraphClone_params_st {
1535
+ CUgraph *phGraphClone;
1536
+ CUgraph originalGraph;
1537
+ } cuGraphClone_params;
1538
+
1539
+ typedef struct cuGraphNodeFindInClone_params_st {
1540
+ CUgraphNode *phNode;
1541
+ CUgraphNode hOriginalNode;
1542
+ CUgraph hClonedGraph;
1543
+ } cuGraphNodeFindInClone_params;
1544
+
1545
+ typedef struct cuGraphNodeGetType_params_st {
1546
+ CUgraphNode hNode;
1547
+ CUgraphNodeType *type;
1548
+ } cuGraphNodeGetType_params;
1549
+
1550
+ typedef struct cuGraphGetNodes_params_st {
1551
+ CUgraph hGraph;
1552
+ CUgraphNode *nodes;
1553
+ size_t *numNodes;
1554
+ } cuGraphGetNodes_params;
1555
+
1556
+ typedef struct cuGraphGetRootNodes_params_st {
1557
+ CUgraph hGraph;
1558
+ CUgraphNode *rootNodes;
1559
+ size_t *numRootNodes;
1560
+ } cuGraphGetRootNodes_params;
1561
+
1562
+ typedef struct cuGraphGetEdges_params_st {
1563
+ CUgraph hGraph;
1564
+ CUgraphNode *from;
1565
+ CUgraphNode *to;
1566
+ size_t *numEdges;
1567
+ } cuGraphGetEdges_params;
1568
+
1569
+ typedef struct cuGraphNodeGetDependencies_params_st {
1570
+ CUgraphNode hNode;
1571
+ CUgraphNode *dependencies;
1572
+ size_t *numDependencies;
1573
+ } cuGraphNodeGetDependencies_params;
1574
+
1575
+ typedef struct cuGraphNodeGetDependentNodes_params_st {
1576
+ CUgraphNode hNode;
1577
+ CUgraphNode *dependentNodes;
1578
+ size_t *numDependentNodes;
1579
+ } cuGraphNodeGetDependentNodes_params;
1580
+
1581
+ typedef struct cuGraphAddDependencies_params_st {
1582
+ CUgraph hGraph;
1583
+ const CUgraphNode *from;
1584
+ const CUgraphNode *to;
1585
+ size_t numDependencies;
1586
+ } cuGraphAddDependencies_params;
1587
+
1588
+ typedef struct cuGraphRemoveDependencies_params_st {
1589
+ CUgraph hGraph;
1590
+ const CUgraphNode *from;
1591
+ const CUgraphNode *to;
1592
+ size_t numDependencies;
1593
+ } cuGraphRemoveDependencies_params;
1594
+
1595
+ typedef struct cuGraphDestroyNode_params_st {
1596
+ CUgraphNode hNode;
1597
+ } cuGraphDestroyNode_params;
1598
+
1599
+ typedef struct cuGraphInstantiate_v2_params_st {
1600
+ CUgraphExec *phGraphExec;
1601
+ CUgraph hGraph;
1602
+ CUgraphNode *phErrorNode;
1603
+ char *logBuffer;
1604
+ size_t bufferSize;
1605
+ } cuGraphInstantiate_v2_params;
1606
+
1607
+ typedef struct cuGraphInstantiateWithFlags_params_st {
1608
+ CUgraphExec *phGraphExec;
1609
+ CUgraph hGraph;
1610
+ unsigned long long flags;
1611
+ } cuGraphInstantiateWithFlags_params;
1612
+
1613
+ typedef struct cuGraphExecKernelNodeSetParams_params_st {
1614
+ CUgraphExec hGraphExec;
1615
+ CUgraphNode hNode;
1616
+ const CUDA_KERNEL_NODE_PARAMS *nodeParams;
1617
+ } cuGraphExecKernelNodeSetParams_params;
1618
+
1619
+ typedef struct cuGraphExecMemcpyNodeSetParams_params_st {
1620
+ CUgraphExec hGraphExec;
1621
+ CUgraphNode hNode;
1622
+ const CUDA_MEMCPY3D *copyParams;
1623
+ CUcontext ctx;
1624
+ } cuGraphExecMemcpyNodeSetParams_params;
1625
+
1626
+ typedef struct cuGraphExecMemsetNodeSetParams_params_st {
1627
+ CUgraphExec hGraphExec;
1628
+ CUgraphNode hNode;
1629
+ const CUDA_MEMSET_NODE_PARAMS *memsetParams;
1630
+ CUcontext ctx;
1631
+ } cuGraphExecMemsetNodeSetParams_params;
1632
+
1633
+ typedef struct cuGraphExecHostNodeSetParams_params_st {
1634
+ CUgraphExec hGraphExec;
1635
+ CUgraphNode hNode;
1636
+ const CUDA_HOST_NODE_PARAMS *nodeParams;
1637
+ } cuGraphExecHostNodeSetParams_params;
1638
+
1639
+ typedef struct cuGraphExecChildGraphNodeSetParams_params_st {
1640
+ CUgraphExec hGraphExec;
1641
+ CUgraphNode hNode;
1642
+ CUgraph childGraph;
1643
+ } cuGraphExecChildGraphNodeSetParams_params;
1644
+
1645
+ typedef struct cuGraphExecEventRecordNodeSetEvent_params_st {
1646
+ CUgraphExec hGraphExec;
1647
+ CUgraphNode hNode;
1648
+ CUevent event;
1649
+ } cuGraphExecEventRecordNodeSetEvent_params;
1650
+
1651
+ typedef struct cuGraphExecEventWaitNodeSetEvent_params_st {
1652
+ CUgraphExec hGraphExec;
1653
+ CUgraphNode hNode;
1654
+ CUevent event;
1655
+ } cuGraphExecEventWaitNodeSetEvent_params;
1656
+
1657
+ typedef struct cuGraphExecExternalSemaphoresSignalNodeSetParams_params_st {
1658
+ CUgraphExec hGraphExec;
1659
+ CUgraphNode hNode;
1660
+ const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams;
1661
+ } cuGraphExecExternalSemaphoresSignalNodeSetParams_params;
1662
+
1663
+ typedef struct cuGraphExecExternalSemaphoresWaitNodeSetParams_params_st {
1664
+ CUgraphExec hGraphExec;
1665
+ CUgraphNode hNode;
1666
+ const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams;
1667
+ } cuGraphExecExternalSemaphoresWaitNodeSetParams_params;
1668
+
1669
+ typedef struct cuGraphNodeSetEnabled_params_st {
1670
+ CUgraphExec hGraphExec;
1671
+ CUgraphNode hNode;
1672
+ unsigned int isEnabled;
1673
+ } cuGraphNodeSetEnabled_params;
1674
+
1675
+ typedef struct cuGraphNodeGetEnabled_params_st {
1676
+ CUgraphExec hGraphExec;
1677
+ CUgraphNode hNode;
1678
+ unsigned int *isEnabled;
1679
+ } cuGraphNodeGetEnabled_params;
1680
+
1681
+ typedef struct cuGraphUpload_ptsz_params_st {
1682
+ CUgraphExec hGraphExec;
1683
+ CUstream hStream;
1684
+ } cuGraphUpload_ptsz_params;
1685
+
1686
+ typedef struct cuGraphLaunch_ptsz_params_st {
1687
+ CUgraphExec hGraphExec;
1688
+ CUstream hStream;
1689
+ } cuGraphLaunch_ptsz_params;
1690
+
1691
+ typedef struct cuGraphExecDestroy_params_st {
1692
+ CUgraphExec hGraphExec;
1693
+ } cuGraphExecDestroy_params;
1694
+
1695
+ typedef struct cuGraphDestroy_params_st {
1696
+ CUgraph hGraph;
1697
+ } cuGraphDestroy_params;
1698
+
1699
+ typedef struct cuGraphExecUpdate_params_st {
1700
+ CUgraphExec hGraphExec;
1701
+ CUgraph hGraph;
1702
+ CUgraphNode *hErrorNode_out;
1703
+ CUgraphExecUpdateResult *updateResult_out;
1704
+ } cuGraphExecUpdate_params;
1705
+
1706
+ typedef struct cuGraphKernelNodeCopyAttributes_params_st {
1707
+ CUgraphNode dst;
1708
+ CUgraphNode src;
1709
+ } cuGraphKernelNodeCopyAttributes_params;
1710
+
1711
+ typedef struct cuGraphKernelNodeGetAttribute_params_st {
1712
+ CUgraphNode hNode;
1713
+ CUkernelNodeAttrID attr;
1714
+ CUkernelNodeAttrValue *value_out;
1715
+ } cuGraphKernelNodeGetAttribute_params;
1716
+
1717
+ typedef struct cuGraphKernelNodeSetAttribute_params_st {
1718
+ CUgraphNode hNode;
1719
+ CUkernelNodeAttrID attr;
1720
+ const CUkernelNodeAttrValue *value;
1721
+ } cuGraphKernelNodeSetAttribute_params;
1722
+
1723
+ typedef struct cuGraphDebugDotPrint_params_st {
1724
+ CUgraph hGraph;
1725
+ const char *path;
1726
+ unsigned int flags;
1727
+ } cuGraphDebugDotPrint_params;
1728
+
1729
+ typedef struct cuUserObjectCreate_params_st {
1730
+ CUuserObject *object_out;
1731
+ void *ptr;
1732
+ CUhostFn destroy;
1733
+ unsigned int initialRefcount;
1734
+ unsigned int flags;
1735
+ } cuUserObjectCreate_params;
1736
+
1737
+ typedef struct cuUserObjectRetain_params_st {
1738
+ CUuserObject object;
1739
+ unsigned int count;
1740
+ } cuUserObjectRetain_params;
1741
+
1742
+ typedef struct cuUserObjectRelease_params_st {
1743
+ CUuserObject object;
1744
+ unsigned int count;
1745
+ } cuUserObjectRelease_params;
1746
+
1747
+ typedef struct cuGraphRetainUserObject_params_st {
1748
+ CUgraph graph;
1749
+ CUuserObject object;
1750
+ unsigned int count;
1751
+ unsigned int flags;
1752
+ } cuGraphRetainUserObject_params;
1753
+
1754
+ typedef struct cuGraphReleaseUserObject_params_st {
1755
+ CUgraph graph;
1756
+ CUuserObject object;
1757
+ unsigned int count;
1758
+ } cuGraphReleaseUserObject_params;
1759
+
1760
+ typedef struct cuOccupancyMaxActiveBlocksPerMultiprocessor_params_st {
1761
+ int *numBlocks;
1762
+ CUfunction func;
1763
+ int blockSize;
1764
+ size_t dynamicSMemSize;
1765
+ } cuOccupancyMaxActiveBlocksPerMultiprocessor_params;
1766
+
1767
+ typedef struct cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_params_st {
1768
+ int *numBlocks;
1769
+ CUfunction func;
1770
+ int blockSize;
1771
+ size_t dynamicSMemSize;
1772
+ unsigned int flags;
1773
+ } cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_params;
1774
+
1775
+ typedef struct cuOccupancyMaxPotentialBlockSize_params_st {
1776
+ int *minGridSize;
1777
+ int *blockSize;
1778
+ CUfunction func;
1779
+ CUoccupancyB2DSize blockSizeToDynamicSMemSize;
1780
+ size_t dynamicSMemSize;
1781
+ int blockSizeLimit;
1782
+ } cuOccupancyMaxPotentialBlockSize_params;
1783
+
1784
+ typedef struct cuOccupancyMaxPotentialBlockSizeWithFlags_params_st {
1785
+ int *minGridSize;
1786
+ int *blockSize;
1787
+ CUfunction func;
1788
+ CUoccupancyB2DSize blockSizeToDynamicSMemSize;
1789
+ size_t dynamicSMemSize;
1790
+ int blockSizeLimit;
1791
+ unsigned int flags;
1792
+ } cuOccupancyMaxPotentialBlockSizeWithFlags_params;
1793
+
1794
+ typedef struct cuOccupancyAvailableDynamicSMemPerBlock_params_st {
1795
+ size_t *dynamicSmemSize;
1796
+ CUfunction func;
1797
+ int numBlocks;
1798
+ int blockSize;
1799
+ } cuOccupancyAvailableDynamicSMemPerBlock_params;
1800
+
1801
+ typedef struct cuOccupancyMaxPotentialClusterSize_params_st {
1802
+ int *clusterSize;
1803
+ CUfunction func;
1804
+ const CUlaunchConfig *config;
1805
+ } cuOccupancyMaxPotentialClusterSize_params;
1806
+
1807
+ typedef struct cuOccupancyMaxActiveClusters_params_st {
1808
+ int *numClusters;
1809
+ CUfunction func;
1810
+ const CUlaunchConfig *config;
1811
+ } cuOccupancyMaxActiveClusters_params;
1812
+
1813
+ typedef struct cuTexRefSetArray_params_st {
1814
+ CUtexref hTexRef;
1815
+ CUarray hArray;
1816
+ unsigned int Flags;
1817
+ } cuTexRefSetArray_params;
1818
+
1819
+ typedef struct cuTexRefSetMipmappedArray_params_st {
1820
+ CUtexref hTexRef;
1821
+ CUmipmappedArray hMipmappedArray;
1822
+ unsigned int Flags;
1823
+ } cuTexRefSetMipmappedArray_params;
1824
+
1825
+ typedef struct cuTexRefSetAddress_v2_params_st {
1826
+ size_t *ByteOffset;
1827
+ CUtexref hTexRef;
1828
+ CUdeviceptr dptr;
1829
+ size_t bytes;
1830
+ } cuTexRefSetAddress_v2_params;
1831
+
1832
+ typedef struct cuTexRefSetAddress2D_v3_params_st {
1833
+ CUtexref hTexRef;
1834
+ const CUDA_ARRAY_DESCRIPTOR *desc;
1835
+ CUdeviceptr dptr;
1836
+ size_t Pitch;
1837
+ } cuTexRefSetAddress2D_v3_params;
1838
+
1839
+ typedef struct cuTexRefSetFormat_params_st {
1840
+ CUtexref hTexRef;
1841
+ CUarray_format fmt;
1842
+ int NumPackedComponents;
1843
+ } cuTexRefSetFormat_params;
1844
+
1845
+ typedef struct cuTexRefSetAddressMode_params_st {
1846
+ CUtexref hTexRef;
1847
+ int dim;
1848
+ CUaddress_mode am;
1849
+ } cuTexRefSetAddressMode_params;
1850
+
1851
+ typedef struct cuTexRefSetFilterMode_params_st {
1852
+ CUtexref hTexRef;
1853
+ CUfilter_mode fm;
1854
+ } cuTexRefSetFilterMode_params;
1855
+
1856
+ typedef struct cuTexRefSetMipmapFilterMode_params_st {
1857
+ CUtexref hTexRef;
1858
+ CUfilter_mode fm;
1859
+ } cuTexRefSetMipmapFilterMode_params;
1860
+
1861
+ typedef struct cuTexRefSetMipmapLevelBias_params_st {
1862
+ CUtexref hTexRef;
1863
+ float bias;
1864
+ } cuTexRefSetMipmapLevelBias_params;
1865
+
1866
+ typedef struct cuTexRefSetMipmapLevelClamp_params_st {
1867
+ CUtexref hTexRef;
1868
+ float minMipmapLevelClamp;
1869
+ float maxMipmapLevelClamp;
1870
+ } cuTexRefSetMipmapLevelClamp_params;
1871
+
1872
+ typedef struct cuTexRefSetMaxAnisotropy_params_st {
1873
+ CUtexref hTexRef;
1874
+ unsigned int maxAniso;
1875
+ } cuTexRefSetMaxAnisotropy_params;
1876
+
1877
+ typedef struct cuTexRefSetBorderColor_params_st {
1878
+ CUtexref hTexRef;
1879
+ float *pBorderColor;
1880
+ } cuTexRefSetBorderColor_params;
1881
+
1882
+ typedef struct cuTexRefSetFlags_params_st {
1883
+ CUtexref hTexRef;
1884
+ unsigned int Flags;
1885
+ } cuTexRefSetFlags_params;
1886
+
1887
+ typedef struct cuTexRefGetAddress_v2_params_st {
1888
+ CUdeviceptr *pdptr;
1889
+ CUtexref hTexRef;
1890
+ } cuTexRefGetAddress_v2_params;
1891
+
1892
+ typedef struct cuTexRefGetArray_params_st {
1893
+ CUarray *phArray;
1894
+ CUtexref hTexRef;
1895
+ } cuTexRefGetArray_params;
1896
+
1897
+ typedef struct cuTexRefGetMipmappedArray_params_st {
1898
+ CUmipmappedArray *phMipmappedArray;
1899
+ CUtexref hTexRef;
1900
+ } cuTexRefGetMipmappedArray_params;
1901
+
1902
+ typedef struct cuTexRefGetAddressMode_params_st {
1903
+ CUaddress_mode *pam;
1904
+ CUtexref hTexRef;
1905
+ int dim;
1906
+ } cuTexRefGetAddressMode_params;
1907
+
1908
+ typedef struct cuTexRefGetFilterMode_params_st {
1909
+ CUfilter_mode *pfm;
1910
+ CUtexref hTexRef;
1911
+ } cuTexRefGetFilterMode_params;
1912
+
1913
+ typedef struct cuTexRefGetFormat_params_st {
1914
+ CUarray_format *pFormat;
1915
+ int *pNumChannels;
1916
+ CUtexref hTexRef;
1917
+ } cuTexRefGetFormat_params;
1918
+
1919
+ typedef struct cuTexRefGetMipmapFilterMode_params_st {
1920
+ CUfilter_mode *pfm;
1921
+ CUtexref hTexRef;
1922
+ } cuTexRefGetMipmapFilterMode_params;
1923
+
1924
+ typedef struct cuTexRefGetMipmapLevelBias_params_st {
1925
+ float *pbias;
1926
+ CUtexref hTexRef;
1927
+ } cuTexRefGetMipmapLevelBias_params;
1928
+
1929
+ typedef struct cuTexRefGetMipmapLevelClamp_params_st {
1930
+ float *pminMipmapLevelClamp;
1931
+ float *pmaxMipmapLevelClamp;
1932
+ CUtexref hTexRef;
1933
+ } cuTexRefGetMipmapLevelClamp_params;
1934
+
1935
+ typedef struct cuTexRefGetMaxAnisotropy_params_st {
1936
+ int *pmaxAniso;
1937
+ CUtexref hTexRef;
1938
+ } cuTexRefGetMaxAnisotropy_params;
1939
+
1940
+ typedef struct cuTexRefGetBorderColor_params_st {
1941
+ float *pBorderColor;
1942
+ CUtexref hTexRef;
1943
+ } cuTexRefGetBorderColor_params;
1944
+
1945
+ typedef struct cuTexRefGetFlags_params_st {
1946
+ unsigned int *pFlags;
1947
+ CUtexref hTexRef;
1948
+ } cuTexRefGetFlags_params;
1949
+
1950
+ typedef struct cuTexRefCreate_params_st {
1951
+ CUtexref *pTexRef;
1952
+ } cuTexRefCreate_params;
1953
+
1954
+ typedef struct cuTexRefDestroy_params_st {
1955
+ CUtexref hTexRef;
1956
+ } cuTexRefDestroy_params;
1957
+
1958
+ typedef struct cuSurfRefSetArray_params_st {
1959
+ CUsurfref hSurfRef;
1960
+ CUarray hArray;
1961
+ unsigned int Flags;
1962
+ } cuSurfRefSetArray_params;
1963
+
1964
+ typedef struct cuSurfRefGetArray_params_st {
1965
+ CUarray *phArray;
1966
+ CUsurfref hSurfRef;
1967
+ } cuSurfRefGetArray_params;
1968
+
1969
+ typedef struct cuTexObjectCreate_params_st {
1970
+ CUtexObject *pTexObject;
1971
+ const CUDA_RESOURCE_DESC *pResDesc;
1972
+ const CUDA_TEXTURE_DESC *pTexDesc;
1973
+ const CUDA_RESOURCE_VIEW_DESC *pResViewDesc;
1974
+ } cuTexObjectCreate_params;
1975
+
1976
+ typedef struct cuTexObjectDestroy_params_st {
1977
+ CUtexObject texObject;
1978
+ } cuTexObjectDestroy_params;
1979
+
1980
+ typedef struct cuTexObjectGetResourceDesc_params_st {
1981
+ CUDA_RESOURCE_DESC *pResDesc;
1982
+ CUtexObject texObject;
1983
+ } cuTexObjectGetResourceDesc_params;
1984
+
1985
+ typedef struct cuTexObjectGetTextureDesc_params_st {
1986
+ CUDA_TEXTURE_DESC *pTexDesc;
1987
+ CUtexObject texObject;
1988
+ } cuTexObjectGetTextureDesc_params;
1989
+
1990
+ typedef struct cuTexObjectGetResourceViewDesc_params_st {
1991
+ CUDA_RESOURCE_VIEW_DESC *pResViewDesc;
1992
+ CUtexObject texObject;
1993
+ } cuTexObjectGetResourceViewDesc_params;
1994
+
1995
+ typedef struct cuSurfObjectCreate_params_st {
1996
+ CUsurfObject *pSurfObject;
1997
+ const CUDA_RESOURCE_DESC *pResDesc;
1998
+ } cuSurfObjectCreate_params;
1999
+
2000
+ typedef struct cuSurfObjectDestroy_params_st {
2001
+ CUsurfObject surfObject;
2002
+ } cuSurfObjectDestroy_params;
2003
+
2004
+ typedef struct cuSurfObjectGetResourceDesc_params_st {
2005
+ CUDA_RESOURCE_DESC *pResDesc;
2006
+ CUsurfObject surfObject;
2007
+ } cuSurfObjectGetResourceDesc_params;
2008
+
2009
+ typedef struct cuDeviceCanAccessPeer_params_st {
2010
+ int *canAccessPeer;
2011
+ CUdevice dev;
2012
+ CUdevice peerDev;
2013
+ } cuDeviceCanAccessPeer_params;
2014
+
2015
+ typedef struct cuCtxEnablePeerAccess_params_st {
2016
+ CUcontext peerContext;
2017
+ unsigned int Flags;
2018
+ } cuCtxEnablePeerAccess_params;
2019
+
2020
+ typedef struct cuCtxDisablePeerAccess_params_st {
2021
+ CUcontext peerContext;
2022
+ } cuCtxDisablePeerAccess_params;
2023
+
2024
+ typedef struct cuDeviceGetP2PAttribute_params_st {
2025
+ int *value;
2026
+ CUdevice_P2PAttribute attrib;
2027
+ CUdevice srcDevice;
2028
+ CUdevice dstDevice;
2029
+ } cuDeviceGetP2PAttribute_params;
2030
+
2031
+ typedef struct cuGraphicsUnregisterResource_params_st {
2032
+ CUgraphicsResource resource;
2033
+ } cuGraphicsUnregisterResource_params;
2034
+
2035
+ typedef struct cuGraphicsSubResourceGetMappedArray_params_st {
2036
+ CUarray *pArray;
2037
+ CUgraphicsResource resource;
2038
+ unsigned int arrayIndex;
2039
+ unsigned int mipLevel;
2040
+ } cuGraphicsSubResourceGetMappedArray_params;
2041
+
2042
+ typedef struct cuGraphicsResourceGetMappedMipmappedArray_params_st {
2043
+ CUmipmappedArray *pMipmappedArray;
2044
+ CUgraphicsResource resource;
2045
+ } cuGraphicsResourceGetMappedMipmappedArray_params;
2046
+
2047
+ typedef struct cuGraphicsResourceGetMappedPointer_v2_params_st {
2048
+ CUdeviceptr *pDevPtr;
2049
+ size_t *pSize;
2050
+ CUgraphicsResource resource;
2051
+ } cuGraphicsResourceGetMappedPointer_v2_params;
2052
+
2053
+ typedef struct cuGraphicsResourceSetMapFlags_v2_params_st {
2054
+ CUgraphicsResource resource;
2055
+ unsigned int flags;
2056
+ } cuGraphicsResourceSetMapFlags_v2_params;
2057
+
2058
+ typedef struct cuGraphicsMapResources_ptsz_params_st {
2059
+ unsigned int count;
2060
+ CUgraphicsResource *resources;
2061
+ CUstream hStream;
2062
+ } cuGraphicsMapResources_ptsz_params;
2063
+
2064
+ typedef struct cuGraphicsUnmapResources_ptsz_params_st {
2065
+ unsigned int count;
2066
+ CUgraphicsResource *resources;
2067
+ CUstream hStream;
2068
+ } cuGraphicsUnmapResources_ptsz_params;
2069
+
2070
+ typedef struct cuGetProcAddress_params_st {
2071
+ const char *symbol;
2072
+ void **pfn;
2073
+ int cudaVersion;
2074
+ cuuint64_t flags;
2075
+ } cuGetProcAddress_params;
2076
+
2077
+ typedef struct cuModuleGetLoadingMode_params_st {
2078
+ CUmoduleLoadingMode *mode;
2079
+ } cuModuleGetLoadingMode_params;
2080
+
2081
+ typedef struct cuMemGetHandleForAddressRange_params_st {
2082
+ void *handle;
2083
+ CUdeviceptr dptr;
2084
+ size_t size;
2085
+ CUmemRangeHandleType handleType;
2086
+ unsigned long long flags;
2087
+ } cuMemGetHandleForAddressRange_params;
2088
+
2089
+ typedef struct cuGetExportTable_params_st {
2090
+ const void **ppExportTable;
2091
+ const CUuuid *pExportTableId;
2092
+ } cuGetExportTable_params;
2093
+
2094
+ typedef struct cuMemHostRegister_params_st {
2095
+ void *p;
2096
+ size_t bytesize;
2097
+ unsigned int Flags;
2098
+ } cuMemHostRegister_params;
2099
+
2100
+ typedef struct cuGraphicsResourceSetMapFlags_params_st {
2101
+ CUgraphicsResource resource;
2102
+ unsigned int flags;
2103
+ } cuGraphicsResourceSetMapFlags_params;
2104
+
2105
+ typedef struct cuLinkCreate_params_st {
2106
+ unsigned int numOptions;
2107
+ CUjit_option *options;
2108
+ void **optionValues;
2109
+ CUlinkState *stateOut;
2110
+ } cuLinkCreate_params;
2111
+
2112
+ typedef struct cuLinkAddData_params_st {
2113
+ CUlinkState state;
2114
+ CUjitInputType type;
2115
+ void *data;
2116
+ size_t size;
2117
+ const char *name;
2118
+ unsigned int numOptions;
2119
+ CUjit_option *options;
2120
+ void **optionValues;
2121
+ } cuLinkAddData_params;
2122
+
2123
+ typedef struct cuLinkAddFile_params_st {
2124
+ CUlinkState state;
2125
+ CUjitInputType type;
2126
+ const char *path;
2127
+ unsigned int numOptions;
2128
+ CUjit_option *options;
2129
+ void **optionValues;
2130
+ } cuLinkAddFile_params;
2131
+
2132
+ typedef struct cuTexRefSetAddress2D_v2_params_st {
2133
+ CUtexref hTexRef;
2134
+ const CUDA_ARRAY_DESCRIPTOR *desc;
2135
+ CUdeviceptr dptr;
2136
+ size_t Pitch;
2137
+ } cuTexRefSetAddress2D_v2_params;
2138
+
2139
+ typedef struct cuDeviceTotalMem_params_st {
2140
+ unsigned int *bytes;
2141
+ CUdevice dev;
2142
+ } cuDeviceTotalMem_params;
2143
+
2144
+ typedef struct cuCtxCreate_params_st {
2145
+ CUcontext *pctx;
2146
+ unsigned int flags;
2147
+ CUdevice dev;
2148
+ } cuCtxCreate_params;
2149
+
2150
+ typedef struct cuModuleGetGlobal_params_st {
2151
+ CUdeviceptr_v1 *dptr;
2152
+ unsigned int *bytes;
2153
+ CUmodule hmod;
2154
+ const char *name;
2155
+ } cuModuleGetGlobal_params;
2156
+
2157
+ typedef struct cuMemGetInfo_params_st {
2158
+ unsigned int *free;
2159
+ unsigned int *total;
2160
+ } cuMemGetInfo_params;
2161
+
2162
+ typedef struct cuMemAlloc_params_st {
2163
+ CUdeviceptr_v1 *dptr;
2164
+ unsigned int bytesize;
2165
+ } cuMemAlloc_params;
2166
+
2167
+ typedef struct cuMemAllocPitch_params_st {
2168
+ CUdeviceptr_v1 *dptr;
2169
+ unsigned int *pPitch;
2170
+ unsigned int WidthInBytes;
2171
+ unsigned int Height;
2172
+ unsigned int ElementSizeBytes;
2173
+ } cuMemAllocPitch_params;
2174
+
2175
+ typedef struct cuMemFree_params_st {
2176
+ CUdeviceptr_v1 dptr;
2177
+ } cuMemFree_params;
2178
+
2179
+ typedef struct cuMemGetAddressRange_params_st {
2180
+ CUdeviceptr_v1 *pbase;
2181
+ unsigned int *psize;
2182
+ CUdeviceptr_v1 dptr;
2183
+ } cuMemGetAddressRange_params;
2184
+
2185
+ typedef struct cuMemAllocHost_params_st {
2186
+ void **pp;
2187
+ unsigned int bytesize;
2188
+ } cuMemAllocHost_params;
2189
+
2190
+ typedef struct cuMemHostGetDevicePointer_params_st {
2191
+ CUdeviceptr_v1 *pdptr;
2192
+ void *p;
2193
+ unsigned int Flags;
2194
+ } cuMemHostGetDevicePointer_params;
2195
+
2196
+ typedef struct cuMemcpyHtoD_params_st {
2197
+ CUdeviceptr_v1 dstDevice;
2198
+ const void *srcHost;
2199
+ unsigned int ByteCount;
2200
+ } cuMemcpyHtoD_params;
2201
+
2202
+ typedef struct cuMemcpyDtoH_params_st {
2203
+ void *dstHost;
2204
+ CUdeviceptr_v1 srcDevice;
2205
+ unsigned int ByteCount;
2206
+ } cuMemcpyDtoH_params;
2207
+
2208
+ typedef struct cuMemcpyDtoD_params_st {
2209
+ CUdeviceptr_v1 dstDevice;
2210
+ CUdeviceptr_v1 srcDevice;
2211
+ unsigned int ByteCount;
2212
+ } cuMemcpyDtoD_params;
2213
+
2214
+ typedef struct cuMemcpyDtoA_params_st {
2215
+ CUarray dstArray;
2216
+ unsigned int dstOffset;
2217
+ CUdeviceptr_v1 srcDevice;
2218
+ unsigned int ByteCount;
2219
+ } cuMemcpyDtoA_params;
2220
+
2221
+ typedef struct cuMemcpyAtoD_params_st {
2222
+ CUdeviceptr_v1 dstDevice;
2223
+ CUarray srcArray;
2224
+ unsigned int srcOffset;
2225
+ unsigned int ByteCount;
2226
+ } cuMemcpyAtoD_params;
2227
+
2228
+ typedef struct cuMemcpyHtoA_params_st {
2229
+ CUarray dstArray;
2230
+ unsigned int dstOffset;
2231
+ const void *srcHost;
2232
+ unsigned int ByteCount;
2233
+ } cuMemcpyHtoA_params;
2234
+
2235
+ typedef struct cuMemcpyAtoH_params_st {
2236
+ void *dstHost;
2237
+ CUarray srcArray;
2238
+ unsigned int srcOffset;
2239
+ unsigned int ByteCount;
2240
+ } cuMemcpyAtoH_params;
2241
+
2242
+ typedef struct cuMemcpyAtoA_params_st {
2243
+ CUarray dstArray;
2244
+ unsigned int dstOffset;
2245
+ CUarray srcArray;
2246
+ unsigned int srcOffset;
2247
+ unsigned int ByteCount;
2248
+ } cuMemcpyAtoA_params;
2249
+
2250
+ typedef struct cuMemcpyHtoAAsync_params_st {
2251
+ CUarray dstArray;
2252
+ unsigned int dstOffset;
2253
+ const void *srcHost;
2254
+ unsigned int ByteCount;
2255
+ CUstream hStream;
2256
+ } cuMemcpyHtoAAsync_params;
2257
+
2258
+ typedef struct cuMemcpyAtoHAsync_params_st {
2259
+ void *dstHost;
2260
+ CUarray srcArray;
2261
+ unsigned int srcOffset;
2262
+ unsigned int ByteCount;
2263
+ CUstream hStream;
2264
+ } cuMemcpyAtoHAsync_params;
2265
+
2266
+ typedef struct cuMemcpy2D_params_st {
2267
+ const CUDA_MEMCPY2D_v1 *pCopy;
2268
+ } cuMemcpy2D_params;
2269
+
2270
+ typedef struct cuMemcpy2DUnaligned_params_st {
2271
+ const CUDA_MEMCPY2D_v1 *pCopy;
2272
+ } cuMemcpy2DUnaligned_params;
2273
+
2274
+ typedef struct cuMemcpy3D_params_st {
2275
+ const CUDA_MEMCPY3D_v1 *pCopy;
2276
+ } cuMemcpy3D_params;
2277
+
2278
+ typedef struct cuMemcpyHtoDAsync_params_st {
2279
+ CUdeviceptr_v1 dstDevice;
2280
+ const void *srcHost;
2281
+ unsigned int ByteCount;
2282
+ CUstream hStream;
2283
+ } cuMemcpyHtoDAsync_params;
2284
+
2285
+ typedef struct cuMemcpyDtoHAsync_params_st {
2286
+ void *dstHost;
2287
+ CUdeviceptr_v1 srcDevice;
2288
+ unsigned int ByteCount;
2289
+ CUstream hStream;
2290
+ } cuMemcpyDtoHAsync_params;
2291
+
2292
+ typedef struct cuMemcpyDtoDAsync_params_st {
2293
+ CUdeviceptr_v1 dstDevice;
2294
+ CUdeviceptr_v1 srcDevice;
2295
+ unsigned int ByteCount;
2296
+ CUstream hStream;
2297
+ } cuMemcpyDtoDAsync_params;
2298
+
2299
+ typedef struct cuMemcpy2DAsync_params_st {
2300
+ const CUDA_MEMCPY2D_v1 *pCopy;
2301
+ CUstream hStream;
2302
+ } cuMemcpy2DAsync_params;
2303
+
2304
+ typedef struct cuMemcpy3DAsync_params_st {
2305
+ const CUDA_MEMCPY3D_v1 *pCopy;
2306
+ CUstream hStream;
2307
+ } cuMemcpy3DAsync_params;
2308
+
2309
+ typedef struct cuMemsetD8_params_st {
2310
+ CUdeviceptr_v1 dstDevice;
2311
+ unsigned char uc;
2312
+ unsigned int N;
2313
+ } cuMemsetD8_params;
2314
+
2315
+ typedef struct cuMemsetD16_params_st {
2316
+ CUdeviceptr_v1 dstDevice;
2317
+ unsigned short us;
2318
+ unsigned int N;
2319
+ } cuMemsetD16_params;
2320
+
2321
+ typedef struct cuMemsetD32_params_st {
2322
+ CUdeviceptr_v1 dstDevice;
2323
+ unsigned int ui;
2324
+ unsigned int N;
2325
+ } cuMemsetD32_params;
2326
+
2327
+ typedef struct cuMemsetD2D8_params_st {
2328
+ CUdeviceptr_v1 dstDevice;
2329
+ unsigned int dstPitch;
2330
+ unsigned char uc;
2331
+ unsigned int Width;
2332
+ unsigned int Height;
2333
+ } cuMemsetD2D8_params;
2334
+
2335
+ typedef struct cuMemsetD2D16_params_st {
2336
+ CUdeviceptr_v1 dstDevice;
2337
+ unsigned int dstPitch;
2338
+ unsigned short us;
2339
+ unsigned int Width;
2340
+ unsigned int Height;
2341
+ } cuMemsetD2D16_params;
2342
+
2343
+ typedef struct cuMemsetD2D32_params_st {
2344
+ CUdeviceptr_v1 dstDevice;
2345
+ unsigned int dstPitch;
2346
+ unsigned int ui;
2347
+ unsigned int Width;
2348
+ unsigned int Height;
2349
+ } cuMemsetD2D32_params;
2350
+
2351
+ typedef struct cuArrayCreate_params_st {
2352
+ CUarray *pHandle;
2353
+ const CUDA_ARRAY_DESCRIPTOR_v1 *pAllocateArray;
2354
+ } cuArrayCreate_params;
2355
+
2356
+ typedef struct cuArrayGetDescriptor_params_st {
2357
+ CUDA_ARRAY_DESCRIPTOR_v1 *pArrayDescriptor;
2358
+ CUarray hArray;
2359
+ } cuArrayGetDescriptor_params;
2360
+
2361
+ typedef struct cuArray3DCreate_params_st {
2362
+ CUarray *pHandle;
2363
+ const CUDA_ARRAY3D_DESCRIPTOR_v1 *pAllocateArray;
2364
+ } cuArray3DCreate_params;
2365
+
2366
+ typedef struct cuArray3DGetDescriptor_params_st {
2367
+ CUDA_ARRAY3D_DESCRIPTOR_v1 *pArrayDescriptor;
2368
+ CUarray hArray;
2369
+ } cuArray3DGetDescriptor_params;
2370
+
2371
+ typedef struct cuTexRefSetAddress_params_st {
2372
+ unsigned int *ByteOffset;
2373
+ CUtexref hTexRef;
2374
+ CUdeviceptr_v1 dptr;
2375
+ unsigned int bytes;
2376
+ } cuTexRefSetAddress_params;
2377
+
2378
+ typedef struct cuTexRefSetAddress2D_params_st {
2379
+ CUtexref hTexRef;
2380
+ const CUDA_ARRAY_DESCRIPTOR_v1 *desc;
2381
+ CUdeviceptr_v1 dptr;
2382
+ unsigned int Pitch;
2383
+ } cuTexRefSetAddress2D_params;
2384
+
2385
+ typedef struct cuTexRefGetAddress_params_st {
2386
+ CUdeviceptr_v1 *pdptr;
2387
+ CUtexref hTexRef;
2388
+ } cuTexRefGetAddress_params;
2389
+
2390
+ typedef struct cuGraphicsResourceGetMappedPointer_params_st {
2391
+ CUdeviceptr_v1 *pDevPtr;
2392
+ unsigned int *pSize;
2393
+ CUgraphicsResource resource;
2394
+ } cuGraphicsResourceGetMappedPointer_params;
2395
+
2396
+ typedef struct cuCtxDestroy_params_st {
2397
+ CUcontext ctx;
2398
+ } cuCtxDestroy_params;
2399
+
2400
+ typedef struct cuCtxPopCurrent_params_st {
2401
+ CUcontext *pctx;
2402
+ } cuCtxPopCurrent_params;
2403
+
2404
+ typedef struct cuCtxPushCurrent_params_st {
2405
+ CUcontext ctx;
2406
+ } cuCtxPushCurrent_params;
2407
+
2408
+ typedef struct cuStreamDestroy_params_st {
2409
+ CUstream hStream;
2410
+ } cuStreamDestroy_params;
2411
+
2412
+ typedef struct cuEventDestroy_params_st {
2413
+ CUevent hEvent;
2414
+ } cuEventDestroy_params;
2415
+
2416
+ typedef struct cuDevicePrimaryCtxRelease_params_st {
2417
+ CUdevice dev;
2418
+ } cuDevicePrimaryCtxRelease_params;
2419
+
2420
+ typedef struct cuDevicePrimaryCtxReset_params_st {
2421
+ CUdevice dev;
2422
+ } cuDevicePrimaryCtxReset_params;
2423
+
2424
+ typedef struct cuDevicePrimaryCtxSetFlags_params_st {
2425
+ CUdevice dev;
2426
+ unsigned int flags;
2427
+ } cuDevicePrimaryCtxSetFlags_params;
2428
+
2429
+ typedef struct cuMemcpyHtoD_v2_params_st {
2430
+ CUdeviceptr dstDevice;
2431
+ const void *srcHost;
2432
+ size_t ByteCount;
2433
+ } cuMemcpyHtoD_v2_params;
2434
+
2435
+ typedef struct cuMemcpyDtoH_v2_params_st {
2436
+ void *dstHost;
2437
+ CUdeviceptr srcDevice;
2438
+ size_t ByteCount;
2439
+ } cuMemcpyDtoH_v2_params;
2440
+
2441
+ typedef struct cuMemcpyDtoD_v2_params_st {
2442
+ CUdeviceptr dstDevice;
2443
+ CUdeviceptr srcDevice;
2444
+ size_t ByteCount;
2445
+ } cuMemcpyDtoD_v2_params;
2446
+
2447
+ typedef struct cuMemcpyDtoA_v2_params_st {
2448
+ CUarray dstArray;
2449
+ size_t dstOffset;
2450
+ CUdeviceptr srcDevice;
2451
+ size_t ByteCount;
2452
+ } cuMemcpyDtoA_v2_params;
2453
+
2454
+ typedef struct cuMemcpyAtoD_v2_params_st {
2455
+ CUdeviceptr dstDevice;
2456
+ CUarray srcArray;
2457
+ size_t srcOffset;
2458
+ size_t ByteCount;
2459
+ } cuMemcpyAtoD_v2_params;
2460
+
2461
+ typedef struct cuMemcpyHtoA_v2_params_st {
2462
+ CUarray dstArray;
2463
+ size_t dstOffset;
2464
+ const void *srcHost;
2465
+ size_t ByteCount;
2466
+ } cuMemcpyHtoA_v2_params;
2467
+
2468
+ typedef struct cuMemcpyAtoH_v2_params_st {
2469
+ void *dstHost;
2470
+ CUarray srcArray;
2471
+ size_t srcOffset;
2472
+ size_t ByteCount;
2473
+ } cuMemcpyAtoH_v2_params;
2474
+
2475
+ typedef struct cuMemcpyAtoA_v2_params_st {
2476
+ CUarray dstArray;
2477
+ size_t dstOffset;
2478
+ CUarray srcArray;
2479
+ size_t srcOffset;
2480
+ size_t ByteCount;
2481
+ } cuMemcpyAtoA_v2_params;
2482
+
2483
+ typedef struct cuMemcpyHtoAAsync_v2_params_st {
2484
+ CUarray dstArray;
2485
+ size_t dstOffset;
2486
+ const void *srcHost;
2487
+ size_t ByteCount;
2488
+ CUstream hStream;
2489
+ } cuMemcpyHtoAAsync_v2_params;
2490
+
2491
+ typedef struct cuMemcpyAtoHAsync_v2_params_st {
2492
+ void *dstHost;
2493
+ CUarray srcArray;
2494
+ size_t srcOffset;
2495
+ size_t ByteCount;
2496
+ CUstream hStream;
2497
+ } cuMemcpyAtoHAsync_v2_params;
2498
+
2499
+ typedef struct cuMemcpy2D_v2_params_st {
2500
+ const CUDA_MEMCPY2D *pCopy;
2501
+ } cuMemcpy2D_v2_params;
2502
+
2503
+ typedef struct cuMemcpy2DUnaligned_v2_params_st {
2504
+ const CUDA_MEMCPY2D *pCopy;
2505
+ } cuMemcpy2DUnaligned_v2_params;
2506
+
2507
+ typedef struct cuMemcpy3D_v2_params_st {
2508
+ const CUDA_MEMCPY3D *pCopy;
2509
+ } cuMemcpy3D_v2_params;
2510
+
2511
+ typedef struct cuMemcpyHtoDAsync_v2_params_st {
2512
+ CUdeviceptr dstDevice;
2513
+ const void *srcHost;
2514
+ size_t ByteCount;
2515
+ CUstream hStream;
2516
+ } cuMemcpyHtoDAsync_v2_params;
2517
+
2518
+ typedef struct cuMemcpyDtoHAsync_v2_params_st {
2519
+ void *dstHost;
2520
+ CUdeviceptr srcDevice;
2521
+ size_t ByteCount;
2522
+ CUstream hStream;
2523
+ } cuMemcpyDtoHAsync_v2_params;
2524
+
2525
+ typedef struct cuMemcpyDtoDAsync_v2_params_st {
2526
+ CUdeviceptr dstDevice;
2527
+ CUdeviceptr srcDevice;
2528
+ size_t ByteCount;
2529
+ CUstream hStream;
2530
+ } cuMemcpyDtoDAsync_v2_params;
2531
+
2532
+ typedef struct cuMemcpy2DAsync_v2_params_st {
2533
+ const CUDA_MEMCPY2D *pCopy;
2534
+ CUstream hStream;
2535
+ } cuMemcpy2DAsync_v2_params;
2536
+
2537
+ typedef struct cuMemcpy3DAsync_v2_params_st {
2538
+ const CUDA_MEMCPY3D *pCopy;
2539
+ CUstream hStream;
2540
+ } cuMemcpy3DAsync_v2_params;
2541
+
2542
+ typedef struct cuMemsetD8_v2_params_st {
2543
+ CUdeviceptr dstDevice;
2544
+ unsigned char uc;
2545
+ size_t N;
2546
+ } cuMemsetD8_v2_params;
2547
+
2548
+ typedef struct cuMemsetD16_v2_params_st {
2549
+ CUdeviceptr dstDevice;
2550
+ unsigned short us;
2551
+ size_t N;
2552
+ } cuMemsetD16_v2_params;
2553
+
2554
+ typedef struct cuMemsetD32_v2_params_st {
2555
+ CUdeviceptr dstDevice;
2556
+ unsigned int ui;
2557
+ size_t N;
2558
+ } cuMemsetD32_v2_params;
2559
+
2560
+ typedef struct cuMemsetD2D8_v2_params_st {
2561
+ CUdeviceptr dstDevice;
2562
+ size_t dstPitch;
2563
+ unsigned char uc;
2564
+ size_t Width;
2565
+ size_t Height;
2566
+ } cuMemsetD2D8_v2_params;
2567
+
2568
+ typedef struct cuMemsetD2D16_v2_params_st {
2569
+ CUdeviceptr dstDevice;
2570
+ size_t dstPitch;
2571
+ unsigned short us;
2572
+ size_t Width;
2573
+ size_t Height;
2574
+ } cuMemsetD2D16_v2_params;
2575
+
2576
+ typedef struct cuMemsetD2D32_v2_params_st {
2577
+ CUdeviceptr dstDevice;
2578
+ size_t dstPitch;
2579
+ unsigned int ui;
2580
+ size_t Width;
2581
+ size_t Height;
2582
+ } cuMemsetD2D32_v2_params;
2583
+
2584
+ typedef struct cuMemcpy_params_st {
2585
+ CUdeviceptr dst;
2586
+ CUdeviceptr src;
2587
+ size_t ByteCount;
2588
+ } cuMemcpy_params;
2589
+
2590
+ typedef struct cuMemcpyAsync_params_st {
2591
+ CUdeviceptr dst;
2592
+ CUdeviceptr src;
2593
+ size_t ByteCount;
2594
+ CUstream hStream;
2595
+ } cuMemcpyAsync_params;
2596
+
2597
+ typedef struct cuMemcpyPeer_params_st {
2598
+ CUdeviceptr dstDevice;
2599
+ CUcontext dstContext;
2600
+ CUdeviceptr srcDevice;
2601
+ CUcontext srcContext;
2602
+ size_t ByteCount;
2603
+ } cuMemcpyPeer_params;
2604
+
2605
+ typedef struct cuMemcpyPeerAsync_params_st {
2606
+ CUdeviceptr dstDevice;
2607
+ CUcontext dstContext;
2608
+ CUdeviceptr srcDevice;
2609
+ CUcontext srcContext;
2610
+ size_t ByteCount;
2611
+ CUstream hStream;
2612
+ } cuMemcpyPeerAsync_params;
2613
+
2614
+ typedef struct cuMemcpy3DPeer_params_st {
2615
+ const CUDA_MEMCPY3D_PEER *pCopy;
2616
+ } cuMemcpy3DPeer_params;
2617
+
2618
+ typedef struct cuMemcpy3DPeerAsync_params_st {
2619
+ const CUDA_MEMCPY3D_PEER *pCopy;
2620
+ CUstream hStream;
2621
+ } cuMemcpy3DPeerAsync_params;
2622
+
2623
+ typedef struct cuMemsetD8Async_params_st {
2624
+ CUdeviceptr dstDevice;
2625
+ unsigned char uc;
2626
+ size_t N;
2627
+ CUstream hStream;
2628
+ } cuMemsetD8Async_params;
2629
+
2630
+ typedef struct cuMemsetD16Async_params_st {
2631
+ CUdeviceptr dstDevice;
2632
+ unsigned short us;
2633
+ size_t N;
2634
+ CUstream hStream;
2635
+ } cuMemsetD16Async_params;
2636
+
2637
+ typedef struct cuMemsetD32Async_params_st {
2638
+ CUdeviceptr dstDevice;
2639
+ unsigned int ui;
2640
+ size_t N;
2641
+ CUstream hStream;
2642
+ } cuMemsetD32Async_params;
2643
+
2644
+ typedef struct cuMemsetD2D8Async_params_st {
2645
+ CUdeviceptr dstDevice;
2646
+ size_t dstPitch;
2647
+ unsigned char uc;
2648
+ size_t Width;
2649
+ size_t Height;
2650
+ CUstream hStream;
2651
+ } cuMemsetD2D8Async_params;
2652
+
2653
+ typedef struct cuMemsetD2D16Async_params_st {
2654
+ CUdeviceptr dstDevice;
2655
+ size_t dstPitch;
2656
+ unsigned short us;
2657
+ size_t Width;
2658
+ size_t Height;
2659
+ CUstream hStream;
2660
+ } cuMemsetD2D16Async_params;
2661
+
2662
+ typedef struct cuMemsetD2D32Async_params_st {
2663
+ CUdeviceptr dstDevice;
2664
+ size_t dstPitch;
2665
+ unsigned int ui;
2666
+ size_t Width;
2667
+ size_t Height;
2668
+ CUstream hStream;
2669
+ } cuMemsetD2D32Async_params;
2670
+
2671
+ typedef struct cuStreamGetPriority_params_st {
2672
+ CUstream hStream;
2673
+ int *priority;
2674
+ } cuStreamGetPriority_params;
2675
+
2676
+ typedef struct cuStreamGetFlags_params_st {
2677
+ CUstream hStream;
2678
+ unsigned int *flags;
2679
+ } cuStreamGetFlags_params;
2680
+
2681
+ typedef struct cuStreamGetCtx_params_st {
2682
+ CUstream hStream;
2683
+ CUcontext *pctx;
2684
+ } cuStreamGetCtx_params;
2685
+
2686
+ typedef struct cuStreamWaitEvent_params_st {
2687
+ CUstream hStream;
2688
+ CUevent hEvent;
2689
+ unsigned int Flags;
2690
+ } cuStreamWaitEvent_params;
2691
+
2692
+ typedef struct cuStreamAddCallback_params_st {
2693
+ CUstream hStream;
2694
+ CUstreamCallback callback;
2695
+ void *userData;
2696
+ unsigned int flags;
2697
+ } cuStreamAddCallback_params;
2698
+
2699
+ typedef struct cuStreamAttachMemAsync_params_st {
2700
+ CUstream hStream;
2701
+ CUdeviceptr dptr;
2702
+ size_t length;
2703
+ unsigned int flags;
2704
+ } cuStreamAttachMemAsync_params;
2705
+
2706
+ typedef struct cuStreamQuery_params_st {
2707
+ CUstream hStream;
2708
+ } cuStreamQuery_params;
2709
+
2710
+ typedef struct cuStreamSynchronize_params_st {
2711
+ CUstream hStream;
2712
+ } cuStreamSynchronize_params;
2713
+
2714
+ typedef struct cuEventRecord_params_st {
2715
+ CUevent hEvent;
2716
+ CUstream hStream;
2717
+ } cuEventRecord_params;
2718
+
2719
+ typedef struct cuEventRecordWithFlags_params_st {
2720
+ CUevent hEvent;
2721
+ CUstream hStream;
2722
+ unsigned int flags;
2723
+ } cuEventRecordWithFlags_params;
2724
+
2725
+ typedef struct cuLaunchKernel_params_st {
2726
+ CUfunction f;
2727
+ unsigned int gridDimX;
2728
+ unsigned int gridDimY;
2729
+ unsigned int gridDimZ;
2730
+ unsigned int blockDimX;
2731
+ unsigned int blockDimY;
2732
+ unsigned int blockDimZ;
2733
+ unsigned int sharedMemBytes;
2734
+ CUstream hStream;
2735
+ void **kernelParams;
2736
+ void **extra;
2737
+ } cuLaunchKernel_params;
2738
+
2739
+ typedef struct cuLaunchKernelEx_params_st {
2740
+ const CUlaunchConfig *config;
2741
+ CUfunction f;
2742
+ void **kernelParams;
2743
+ void **extra;
2744
+ } cuLaunchKernelEx_params;
2745
+
2746
+ typedef struct cuLaunchHostFunc_params_st {
2747
+ CUstream hStream;
2748
+ CUhostFn fn;
2749
+ void *userData;
2750
+ } cuLaunchHostFunc_params;
2751
+
2752
+ typedef struct cuGraphicsMapResources_params_st {
2753
+ unsigned int count;
2754
+ CUgraphicsResource *resources;
2755
+ CUstream hStream;
2756
+ } cuGraphicsMapResources_params;
2757
+
2758
+ typedef struct cuGraphicsUnmapResources_params_st {
2759
+ unsigned int count;
2760
+ CUgraphicsResource *resources;
2761
+ CUstream hStream;
2762
+ } cuGraphicsUnmapResources_params;
2763
+
2764
+ typedef struct cuStreamWriteValue32_params_st {
2765
+ CUstream stream;
2766
+ CUdeviceptr addr;
2767
+ cuuint32_t value;
2768
+ unsigned int flags;
2769
+ } cuStreamWriteValue32_params;
2770
+
2771
+ typedef struct cuStreamWaitValue32_params_st {
2772
+ CUstream stream;
2773
+ CUdeviceptr addr;
2774
+ cuuint32_t value;
2775
+ unsigned int flags;
2776
+ } cuStreamWaitValue32_params;
2777
+
2778
+ typedef struct cuStreamWriteValue64_params_st {
2779
+ CUstream stream;
2780
+ CUdeviceptr addr;
2781
+ cuuint64_t value;
2782
+ unsigned int flags;
2783
+ } cuStreamWriteValue64_params;
2784
+
2785
+ typedef struct cuStreamWaitValue64_params_st {
2786
+ CUstream stream;
2787
+ CUdeviceptr addr;
2788
+ cuuint64_t value;
2789
+ unsigned int flags;
2790
+ } cuStreamWaitValue64_params;
2791
+
2792
+ typedef struct cuStreamBatchMemOp_params_st {
2793
+ CUstream stream;
2794
+ unsigned int count;
2795
+ CUstreamBatchMemOpParams *paramArray;
2796
+ unsigned int flags;
2797
+ } cuStreamBatchMemOp_params;
2798
+
2799
+ typedef struct cuMemPrefetchAsync_params_st {
2800
+ CUdeviceptr devPtr;
2801
+ size_t count;
2802
+ CUdevice dstDevice;
2803
+ CUstream hStream;
2804
+ } cuMemPrefetchAsync_params;
2805
+
2806
+ typedef struct cuLaunchCooperativeKernel_params_st {
2807
+ CUfunction f;
2808
+ unsigned int gridDimX;
2809
+ unsigned int gridDimY;
2810
+ unsigned int gridDimZ;
2811
+ unsigned int blockDimX;
2812
+ unsigned int blockDimY;
2813
+ unsigned int blockDimZ;
2814
+ unsigned int sharedMemBytes;
2815
+ CUstream hStream;
2816
+ void **kernelParams;
2817
+ } cuLaunchCooperativeKernel_params;
2818
+
2819
+ typedef struct cuSignalExternalSemaphoresAsync_params_st {
2820
+ const CUexternalSemaphore *extSemArray;
2821
+ const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray;
2822
+ unsigned int numExtSems;
2823
+ CUstream stream;
2824
+ } cuSignalExternalSemaphoresAsync_params;
2825
+
2826
+ typedef struct cuWaitExternalSemaphoresAsync_params_st {
2827
+ const CUexternalSemaphore *extSemArray;
2828
+ const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray;
2829
+ unsigned int numExtSems;
2830
+ CUstream stream;
2831
+ } cuWaitExternalSemaphoresAsync_params;
2832
+
2833
+ typedef struct cuStreamBeginCapture_params_st {
2834
+ CUstream hStream;
2835
+ } cuStreamBeginCapture_params;
2836
+
2837
+ typedef struct cuStreamBeginCapture_ptsz_params_st {
2838
+ CUstream hStream;
2839
+ } cuStreamBeginCapture_ptsz_params;
2840
+
2841
+ typedef struct cuStreamBeginCapture_v2_params_st {
2842
+ CUstream hStream;
2843
+ CUstreamCaptureMode mode;
2844
+ } cuStreamBeginCapture_v2_params;
2845
+
2846
+ typedef struct cuStreamEndCapture_params_st {
2847
+ CUstream hStream;
2848
+ CUgraph *phGraph;
2849
+ } cuStreamEndCapture_params;
2850
+
2851
+ typedef struct cuStreamIsCapturing_params_st {
2852
+ CUstream hStream;
2853
+ CUstreamCaptureStatus *captureStatus;
2854
+ } cuStreamIsCapturing_params;
2855
+
2856
+ typedef struct cuStreamGetCaptureInfo_params_st {
2857
+ CUstream hStream;
2858
+ CUstreamCaptureStatus *captureStatus_out;
2859
+ cuuint64_t *id_out;
2860
+ } cuStreamGetCaptureInfo_params;
2861
+
2862
+ typedef struct cuStreamGetCaptureInfo_v2_params_st {
2863
+ CUstream hStream;
2864
+ CUstreamCaptureStatus *captureStatus_out;
2865
+ cuuint64_t *id_out;
2866
+ CUgraph *graph_out;
2867
+ const CUgraphNode **dependencies_out;
2868
+ size_t *numDependencies_out;
2869
+ } cuStreamGetCaptureInfo_v2_params;
2870
+
2871
+ typedef struct cuGraphUpload_params_st {
2872
+ CUgraphExec hGraph;
2873
+ CUstream hStream;
2874
+ } cuGraphUpload_params;
2875
+
2876
+ typedef struct cuGraphLaunch_params_st {
2877
+ CUgraphExec hGraph;
2878
+ CUstream hStream;
2879
+ } cuGraphLaunch_params;
2880
+
2881
+ typedef struct cuStreamCopyAttributes_params_st {
2882
+ CUstream dstStream;
2883
+ CUstream srcStream;
2884
+ } cuStreamCopyAttributes_params;
2885
+
2886
+ typedef struct cuStreamGetAttribute_params_st {
2887
+ CUstream hStream;
2888
+ CUstreamAttrID attr;
2889
+ CUstreamAttrValue *value;
2890
+ } cuStreamGetAttribute_params;
2891
+
2892
+ typedef struct cuStreamSetAttribute_params_st {
2893
+ CUstream hStream;
2894
+ CUstreamAttrID attr;
2895
+ const CUstreamAttrValue *param;
2896
+ } cuStreamSetAttribute_params;
2897
+
2898
+ typedef struct cuIpcOpenMemHandle_params_st {
2899
+ CUdeviceptr *pdptr;
2900
+ CUipcMemHandle handle;
2901
+ unsigned int Flags;
2902
+ } cuIpcOpenMemHandle_params;
2903
+
2904
+ typedef struct cuGraphInstantiate_params_st {
2905
+ CUgraphExec *phGraphExec;
2906
+ CUgraph hGraph;
2907
+ CUgraphNode *phErrorNode;
2908
+ char *logBuffer;
2909
+ size_t bufferSize;
2910
+ } cuGraphInstantiate_params;
2911
+
2912
+ typedef struct cuMemMapArrayAsync_params_st {
2913
+ CUarrayMapInfo *mapInfoList;
2914
+ unsigned int count;
2915
+ CUstream hStream;
2916
+ } cuMemMapArrayAsync_params;
2917
+
2918
+ typedef struct cuMemFreeAsync_params_st {
2919
+ CUdeviceptr dptr;
2920
+ CUstream hStream;
2921
+ } cuMemFreeAsync_params;
2922
+
2923
+ typedef struct cuMemAllocAsync_params_st {
2924
+ CUdeviceptr *dptr;
2925
+ size_t bytesize;
2926
+ CUstream hStream;
2927
+ } cuMemAllocAsync_params;
2928
+
2929
+ typedef struct cuMemAllocFromPoolAsync_params_st {
2930
+ CUdeviceptr *dptr;
2931
+ size_t bytesize;
2932
+ CUmemoryPool pool;
2933
+ CUstream hStream;
2934
+ } cuMemAllocFromPoolAsync_params;
2935
+
2936
+ typedef struct cuStreamUpdateCaptureDependencies_params_st {
2937
+ CUstream hStream;
2938
+ CUgraphNode *dependencies;
2939
+ size_t numDependencies;
2940
+ unsigned int flags;
2941
+ } cuStreamUpdateCaptureDependencies_params;
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_runtime_api_meta.h ADDED
@@ -0,0 +1,2139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // CUDA public interface, for type definitions and api function prototypes
4
+ #include "cuda_runtime_api.h"
5
+
6
+ // *************************************************************************
7
+ // Definitions of structs to hold parameters for each function
8
+ // *************************************************************************
9
+
10
+ // Currently used parameter trace structures
11
+ typedef struct cudaDeviceSetLimit_v3020_params_st {
12
+ enum cudaLimit limit;
13
+ size_t value;
14
+ } cudaDeviceSetLimit_v3020_params;
15
+
16
+ typedef struct cudaDeviceGetLimit_v3020_params_st {
17
+ size_t *pValue;
18
+ enum cudaLimit limit;
19
+ } cudaDeviceGetLimit_v3020_params;
20
+
21
+ typedef struct cudaDeviceGetTexture1DLinearMaxWidth_v11010_params_st {
22
+ size_t *maxWidthInElements;
23
+ const struct cudaChannelFormatDesc *fmtDesc;
24
+ int device;
25
+ } cudaDeviceGetTexture1DLinearMaxWidth_v11010_params;
26
+
27
+ typedef struct cudaDeviceGetCacheConfig_v3020_params_st {
28
+ enum cudaFuncCache *pCacheConfig;
29
+ } cudaDeviceGetCacheConfig_v3020_params;
30
+
31
+ typedef struct cudaDeviceGetStreamPriorityRange_v5050_params_st {
32
+ int *leastPriority;
33
+ int *greatestPriority;
34
+ } cudaDeviceGetStreamPriorityRange_v5050_params;
35
+
36
+ typedef struct cudaDeviceSetCacheConfig_v3020_params_st {
37
+ enum cudaFuncCache cacheConfig;
38
+ } cudaDeviceSetCacheConfig_v3020_params;
39
+
40
+ typedef struct cudaDeviceGetSharedMemConfig_v4020_params_st {
41
+ enum cudaSharedMemConfig *pConfig;
42
+ } cudaDeviceGetSharedMemConfig_v4020_params;
43
+
44
+ typedef struct cudaDeviceSetSharedMemConfig_v4020_params_st {
45
+ enum cudaSharedMemConfig config;
46
+ } cudaDeviceSetSharedMemConfig_v4020_params;
47
+
48
+ typedef struct cudaDeviceGetByPCIBusId_v4010_params_st {
49
+ int *device;
50
+ const char *pciBusId;
51
+ } cudaDeviceGetByPCIBusId_v4010_params;
52
+
53
+ typedef struct cudaDeviceGetPCIBusId_v4010_params_st {
54
+ char *pciBusId;
55
+ int len;
56
+ int device;
57
+ } cudaDeviceGetPCIBusId_v4010_params;
58
+
59
+ typedef struct cudaIpcGetEventHandle_v4010_params_st {
60
+ cudaIpcEventHandle_t *handle;
61
+ cudaEvent_t event;
62
+ } cudaIpcGetEventHandle_v4010_params;
63
+
64
+ typedef struct cudaIpcOpenEventHandle_v4010_params_st {
65
+ cudaEvent_t *event;
66
+ cudaIpcEventHandle_t handle;
67
+ } cudaIpcOpenEventHandle_v4010_params;
68
+
69
+ typedef struct cudaIpcGetMemHandle_v4010_params_st {
70
+ cudaIpcMemHandle_t *handle;
71
+ void *devPtr;
72
+ } cudaIpcGetMemHandle_v4010_params;
73
+
74
+ typedef struct cudaIpcOpenMemHandle_v4010_params_st {
75
+ void **devPtr;
76
+ cudaIpcMemHandle_t handle;
77
+ unsigned int flags;
78
+ } cudaIpcOpenMemHandle_v4010_params;
79
+
80
+ typedef struct cudaIpcCloseMemHandle_v4010_params_st {
81
+ void *devPtr;
82
+ } cudaIpcCloseMemHandle_v4010_params;
83
+
84
+ typedef struct cudaDeviceFlushGPUDirectRDMAWrites_v11030_params_st {
85
+ enum cudaFlushGPUDirectRDMAWritesTarget target;
86
+ enum cudaFlushGPUDirectRDMAWritesScope scope;
87
+ } cudaDeviceFlushGPUDirectRDMAWrites_v11030_params;
88
+
89
+ typedef struct cudaGetErrorName_v6050_params_st {
90
+ cudaError_t error;
91
+ } cudaGetErrorName_v6050_params;
92
+
93
+ typedef struct cudaGetErrorString_v3020_params_st {
94
+ cudaError_t error;
95
+ } cudaGetErrorString_v3020_params;
96
+
97
+ typedef struct cudaGetDeviceCount_v3020_params_st {
98
+ int *count;
99
+ } cudaGetDeviceCount_v3020_params;
100
+
101
+ typedef struct cudaGetDeviceProperties_v3020_params_st {
102
+ struct cudaDeviceProp *prop;
103
+ int device;
104
+ } cudaGetDeviceProperties_v3020_params;
105
+
106
+ typedef struct cudaDeviceGetAttribute_v5000_params_st {
107
+ int *value;
108
+ enum cudaDeviceAttr attr;
109
+ int device;
110
+ } cudaDeviceGetAttribute_v5000_params;
111
+
112
+ typedef struct cudaDeviceGetDefaultMemPool_v11020_params_st {
113
+ cudaMemPool_t *memPool;
114
+ int device;
115
+ } cudaDeviceGetDefaultMemPool_v11020_params;
116
+
117
+ typedef struct cudaDeviceSetMemPool_v11020_params_st {
118
+ int device;
119
+ cudaMemPool_t memPool;
120
+ } cudaDeviceSetMemPool_v11020_params;
121
+
122
+ typedef struct cudaDeviceGetMemPool_v11020_params_st {
123
+ cudaMemPool_t *memPool;
124
+ int device;
125
+ } cudaDeviceGetMemPool_v11020_params;
126
+
127
+ typedef struct cudaDeviceGetNvSciSyncAttributes_v10020_params_st {
128
+ void *nvSciSyncAttrList;
129
+ int device;
130
+ int flags;
131
+ } cudaDeviceGetNvSciSyncAttributes_v10020_params;
132
+
133
+ typedef struct cudaDeviceGetP2PAttribute_v8000_params_st {
134
+ int *value;
135
+ enum cudaDeviceP2PAttr attr;
136
+ int srcDevice;
137
+ int dstDevice;
138
+ } cudaDeviceGetP2PAttribute_v8000_params;
139
+
140
+ typedef struct cudaChooseDevice_v3020_params_st {
141
+ int *device;
142
+ const struct cudaDeviceProp *prop;
143
+ } cudaChooseDevice_v3020_params;
144
+
145
+ typedef struct cudaSetDevice_v3020_params_st {
146
+ int device;
147
+ } cudaSetDevice_v3020_params;
148
+
149
+ typedef struct cudaGetDevice_v3020_params_st {
150
+ int *device;
151
+ } cudaGetDevice_v3020_params;
152
+
153
+ typedef struct cudaSetValidDevices_v3020_params_st {
154
+ int *device_arr;
155
+ int len;
156
+ } cudaSetValidDevices_v3020_params;
157
+
158
+ typedef struct cudaSetDeviceFlags_v3020_params_st {
159
+ unsigned int flags;
160
+ } cudaSetDeviceFlags_v3020_params;
161
+
162
+ typedef struct cudaGetDeviceFlags_v7000_params_st {
163
+ unsigned int *flags;
164
+ } cudaGetDeviceFlags_v7000_params;
165
+
166
+ typedef struct cudaStreamCreate_v3020_params_st {
167
+ cudaStream_t *pStream;
168
+ } cudaStreamCreate_v3020_params;
169
+
170
+ typedef struct cudaStreamCreateWithFlags_v5000_params_st {
171
+ cudaStream_t *pStream;
172
+ unsigned int flags;
173
+ } cudaStreamCreateWithFlags_v5000_params;
174
+
175
+ typedef struct cudaStreamCreateWithPriority_v5050_params_st {
176
+ cudaStream_t *pStream;
177
+ unsigned int flags;
178
+ int priority;
179
+ } cudaStreamCreateWithPriority_v5050_params;
180
+
181
+ typedef struct cudaStreamGetPriority_ptsz_v7000_params_st {
182
+ cudaStream_t hStream;
183
+ int *priority;
184
+ } cudaStreamGetPriority_ptsz_v7000_params;
185
+
186
+ typedef struct cudaStreamGetFlags_ptsz_v7000_params_st {
187
+ cudaStream_t hStream;
188
+ unsigned int *flags;
189
+ } cudaStreamGetFlags_ptsz_v7000_params;
190
+
191
+ typedef struct cudaStreamCopyAttributes_ptsz_v11000_params_st {
192
+ cudaStream_t dst;
193
+ cudaStream_t src;
194
+ } cudaStreamCopyAttributes_ptsz_v11000_params;
195
+
196
+ typedef struct cudaStreamGetAttribute_ptsz_v11000_params_st {
197
+ cudaStream_t hStream;
198
+ cudaStreamAttrID attr;
199
+ cudaStreamAttrValue *value_out;
200
+ } cudaStreamGetAttribute_ptsz_v11000_params;
201
+
202
+ typedef struct cudaStreamSetAttribute_ptsz_v11000_params_st {
203
+ cudaStream_t hStream;
204
+ cudaStreamAttrID attr;
205
+ const cudaStreamAttrValue *value;
206
+ } cudaStreamSetAttribute_ptsz_v11000_params;
207
+
208
+ typedef struct cudaStreamDestroy_v5050_params_st {
209
+ cudaStream_t stream;
210
+ } cudaStreamDestroy_v5050_params;
211
+
212
+ typedef struct cudaStreamWaitEvent_ptsz_v7000_params_st {
213
+ cudaStream_t stream;
214
+ cudaEvent_t event;
215
+ unsigned int flags;
216
+ } cudaStreamWaitEvent_ptsz_v7000_params;
217
+
218
+ typedef struct cudaStreamAddCallback_ptsz_v7000_params_st {
219
+ cudaStream_t stream;
220
+ cudaStreamCallback_t callback;
221
+ void *userData;
222
+ unsigned int flags;
223
+ } cudaStreamAddCallback_ptsz_v7000_params;
224
+
225
+ typedef struct cudaStreamSynchronize_ptsz_v7000_params_st {
226
+ cudaStream_t stream;
227
+ } cudaStreamSynchronize_ptsz_v7000_params;
228
+
229
+ typedef struct cudaStreamQuery_ptsz_v7000_params_st {
230
+ cudaStream_t stream;
231
+ } cudaStreamQuery_ptsz_v7000_params;
232
+
233
+ typedef struct cudaStreamAttachMemAsync_ptsz_v7000_params_st {
234
+ cudaStream_t stream;
235
+ void *devPtr;
236
+ size_t length;
237
+ unsigned int flags;
238
+ } cudaStreamAttachMemAsync_ptsz_v7000_params;
239
+
240
+ typedef struct cudaStreamBeginCapture_ptsz_v10000_params_st {
241
+ cudaStream_t stream;
242
+ enum cudaStreamCaptureMode mode;
243
+ } cudaStreamBeginCapture_ptsz_v10000_params;
244
+
245
+ typedef struct cudaThreadExchangeStreamCaptureMode_v10010_params_st {
246
+ enum cudaStreamCaptureMode *mode;
247
+ } cudaThreadExchangeStreamCaptureMode_v10010_params;
248
+
249
+ typedef struct cudaStreamEndCapture_ptsz_v10000_params_st {
250
+ cudaStream_t stream;
251
+ cudaGraph_t *pGraph;
252
+ } cudaStreamEndCapture_ptsz_v10000_params;
253
+
254
+ typedef struct cudaStreamIsCapturing_ptsz_v10000_params_st {
255
+ cudaStream_t stream;
256
+ enum cudaStreamCaptureStatus *pCaptureStatus;
257
+ } cudaStreamIsCapturing_ptsz_v10000_params;
258
+
259
+ typedef struct cudaStreamGetCaptureInfo_ptsz_v10010_params_st {
260
+ cudaStream_t stream;
261
+ enum cudaStreamCaptureStatus *pCaptureStatus;
262
+ unsigned long long *pId;
263
+ } cudaStreamGetCaptureInfo_ptsz_v10010_params;
264
+
265
+ typedef struct cudaStreamGetCaptureInfo_v2_ptsz_v11030_params_st {
266
+ cudaStream_t stream;
267
+ enum cudaStreamCaptureStatus *captureStatus_out;
268
+ unsigned long long *id_out;
269
+ cudaGraph_t *graph_out;
270
+ const cudaGraphNode_t **dependencies_out;
271
+ size_t *numDependencies_out;
272
+ } cudaStreamGetCaptureInfo_v2_ptsz_v11030_params;
273
+
274
+ typedef struct cudaStreamUpdateCaptureDependencies_v11030_params_st {
275
+ cudaStream_t stream;
276
+ cudaGraphNode_t *dependencies;
277
+ size_t numDependencies;
278
+ unsigned int flags;
279
+ } cudaStreamUpdateCaptureDependencies_v11030_params;
280
+
281
+ typedef struct cudaEventCreate_v3020_params_st {
282
+ cudaEvent_t *event;
283
+ } cudaEventCreate_v3020_params;
284
+
285
+ typedef struct cudaEventCreateWithFlags_v3020_params_st {
286
+ cudaEvent_t *event;
287
+ unsigned int flags;
288
+ } cudaEventCreateWithFlags_v3020_params;
289
+
290
+ typedef struct cudaEventRecord_ptsz_v7000_params_st {
291
+ cudaEvent_t event;
292
+ cudaStream_t stream;
293
+ } cudaEventRecord_ptsz_v7000_params;
294
+
295
+ typedef struct cudaEventRecordWithFlags_ptsz_v11010_params_st {
296
+ cudaEvent_t event;
297
+ cudaStream_t stream;
298
+ unsigned int flags;
299
+ } cudaEventRecordWithFlags_ptsz_v11010_params;
300
+
301
+ typedef struct cudaEventQuery_v3020_params_st {
302
+ cudaEvent_t event;
303
+ } cudaEventQuery_v3020_params;
304
+
305
+ typedef struct cudaEventSynchronize_v3020_params_st {
306
+ cudaEvent_t event;
307
+ } cudaEventSynchronize_v3020_params;
308
+
309
+ typedef struct cudaEventDestroy_v3020_params_st {
310
+ cudaEvent_t event;
311
+ } cudaEventDestroy_v3020_params;
312
+
313
+ typedef struct cudaEventElapsedTime_v3020_params_st {
314
+ float *ms;
315
+ cudaEvent_t start;
316
+ cudaEvent_t end;
317
+ } cudaEventElapsedTime_v3020_params;
318
+
319
+ typedef struct cudaImportExternalMemory_v10000_params_st {
320
+ cudaExternalMemory_t *extMem_out;
321
+ const struct cudaExternalMemoryHandleDesc *memHandleDesc;
322
+ } cudaImportExternalMemory_v10000_params;
323
+
324
+ typedef struct cudaExternalMemoryGetMappedBuffer_v10000_params_st {
325
+ void **devPtr;
326
+ cudaExternalMemory_t extMem;
327
+ const struct cudaExternalMemoryBufferDesc *bufferDesc;
328
+ } cudaExternalMemoryGetMappedBuffer_v10000_params;
329
+
330
+ typedef struct cudaExternalMemoryGetMappedMipmappedArray_v10000_params_st {
331
+ cudaMipmappedArray_t *mipmap;
332
+ cudaExternalMemory_t extMem;
333
+ const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc;
334
+ } cudaExternalMemoryGetMappedMipmappedArray_v10000_params;
335
+
336
+ typedef struct cudaDestroyExternalMemory_v10000_params_st {
337
+ cudaExternalMemory_t extMem;
338
+ } cudaDestroyExternalMemory_v10000_params;
339
+
340
+ typedef struct cudaImportExternalSemaphore_v10000_params_st {
341
+ cudaExternalSemaphore_t *extSem_out;
342
+ const struct cudaExternalSemaphoreHandleDesc *semHandleDesc;
343
+ } cudaImportExternalSemaphore_v10000_params;
344
+
345
+ typedef struct cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
346
+ const cudaExternalSemaphore_t *extSemArray;
347
+ const struct cudaExternalSemaphoreSignalParams *paramsArray;
348
+ unsigned int numExtSems;
349
+ cudaStream_t stream;
350
+ } cudaSignalExternalSemaphoresAsync_v2_ptsz_v11020_params;
351
+
352
+ typedef struct cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params_st {
353
+ const cudaExternalSemaphore_t *extSemArray;
354
+ const struct cudaExternalSemaphoreWaitParams *paramsArray;
355
+ unsigned int numExtSems;
356
+ cudaStream_t stream;
357
+ } cudaWaitExternalSemaphoresAsync_v2_ptsz_v11020_params;
358
+
359
+ typedef struct cudaDestroyExternalSemaphore_v10000_params_st {
360
+ cudaExternalSemaphore_t extSem;
361
+ } cudaDestroyExternalSemaphore_v10000_params;
362
+
363
+ typedef struct cudaLaunchKernel_ptsz_v7000_params_st {
364
+ const void *func;
365
+ dim3 gridDim;
366
+ dim3 blockDim;
367
+ void **args;
368
+ size_t sharedMem;
369
+ cudaStream_t stream;
370
+ } cudaLaunchKernel_ptsz_v7000_params;
371
+
372
+ typedef struct cudaLaunchKernelExC_ptsz_v11060_params_st {
373
+ const cudaLaunchConfig_t *config;
374
+ const void *func;
375
+ void **args;
376
+ } cudaLaunchKernelExC_ptsz_v11060_params;
377
+
378
+ typedef struct cudaLaunchCooperativeKernel_ptsz_v9000_params_st {
379
+ const void *func;
380
+ dim3 gridDim;
381
+ dim3 blockDim;
382
+ void **args;
383
+ size_t sharedMem;
384
+ cudaStream_t stream;
385
+ } cudaLaunchCooperativeKernel_ptsz_v9000_params;
386
+
387
+ typedef struct cudaLaunchCooperativeKernelMultiDevice_v9000_params_st {
388
+ struct cudaLaunchParams *launchParamsList;
389
+ unsigned int numDevices;
390
+ unsigned int flags;
391
+ } cudaLaunchCooperativeKernelMultiDevice_v9000_params;
392
+
393
+ typedef struct cudaFuncSetCacheConfig_v3020_params_st {
394
+ const void *func;
395
+ enum cudaFuncCache cacheConfig;
396
+ } cudaFuncSetCacheConfig_v3020_params;
397
+
398
+ typedef struct cudaFuncSetSharedMemConfig_v4020_params_st {
399
+ const void *func;
400
+ enum cudaSharedMemConfig config;
401
+ } cudaFuncSetSharedMemConfig_v4020_params;
402
+
403
+ typedef struct cudaFuncGetAttributes_v3020_params_st {
404
+ struct cudaFuncAttributes *attr;
405
+ const void *func;
406
+ } cudaFuncGetAttributes_v3020_params;
407
+
408
+ typedef struct cudaFuncSetAttribute_v9000_params_st {
409
+ const void *func;
410
+ enum cudaFuncAttribute attr;
411
+ int value;
412
+ } cudaFuncSetAttribute_v9000_params;
413
+
414
+ typedef struct cudaLaunchHostFunc_ptsz_v10000_params_st {
415
+ cudaStream_t stream;
416
+ cudaHostFn_t fn;
417
+ void *userData;
418
+ } cudaLaunchHostFunc_ptsz_v10000_params;
419
+
420
+ typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params_st {
421
+ int *numBlocks;
422
+ const void *func;
423
+ int blockSize;
424
+ size_t dynamicSMemSize;
425
+ } cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050_params;
426
+
427
+ typedef struct cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params_st {
428
+ size_t *dynamicSmemSize;
429
+ const void *func;
430
+ int numBlocks;
431
+ int blockSize;
432
+ } cudaOccupancyAvailableDynamicSMemPerBlock_v10200_params;
433
+
434
+ typedef struct cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params_st {
435
+ int *numBlocks;
436
+ const void *func;
437
+ int blockSize;
438
+ size_t dynamicSMemSize;
439
+ unsigned int flags;
440
+ } cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000_params;
441
+
442
+ typedef struct cudaOccupancyMaxPotentialClusterSize_v11070_params_st {
443
+ int *clusterSize;
444
+ const void *func;
445
+ const cudaLaunchConfig_t *launchConfig;
446
+ } cudaOccupancyMaxPotentialClusterSize_v11070_params;
447
+
448
+ typedef struct cudaOccupancyMaxActiveClusters_v11070_params_st {
449
+ int *numClusters;
450
+ const void *func;
451
+ const cudaLaunchConfig_t *launchConfig;
452
+ } cudaOccupancyMaxActiveClusters_v11070_params;
453
+
454
+ typedef struct cudaMallocManaged_v6000_params_st {
455
+ void **devPtr;
456
+ size_t size;
457
+ unsigned int flags;
458
+ } cudaMallocManaged_v6000_params;
459
+
460
+ typedef struct cudaMalloc_v3020_params_st {
461
+ void **devPtr;
462
+ size_t size;
463
+ } cudaMalloc_v3020_params;
464
+
465
+ typedef struct cudaMallocHost_v3020_params_st {
466
+ void **ptr;
467
+ size_t size;
468
+ } cudaMallocHost_v3020_params;
469
+
470
+ typedef struct cudaMallocPitch_v3020_params_st {
471
+ void **devPtr;
472
+ size_t *pitch;
473
+ size_t width;
474
+ size_t height;
475
+ } cudaMallocPitch_v3020_params;
476
+
477
+ typedef struct cudaMallocArray_v3020_params_st {
478
+ cudaArray_t *array;
479
+ const struct cudaChannelFormatDesc *desc;
480
+ size_t width;
481
+ size_t height;
482
+ unsigned int flags;
483
+ } cudaMallocArray_v3020_params;
484
+
485
+ typedef struct cudaFree_v3020_params_st {
486
+ void *devPtr;
487
+ } cudaFree_v3020_params;
488
+
489
+ typedef struct cudaFreeHost_v3020_params_st {
490
+ void *ptr;
491
+ } cudaFreeHost_v3020_params;
492
+
493
+ typedef struct cudaFreeArray_v3020_params_st {
494
+ cudaArray_t array;
495
+ } cudaFreeArray_v3020_params;
496
+
497
+ typedef struct cudaFreeMipmappedArray_v5000_params_st {
498
+ cudaMipmappedArray_t mipmappedArray;
499
+ } cudaFreeMipmappedArray_v5000_params;
500
+
501
+ typedef struct cudaHostAlloc_v3020_params_st {
502
+ void **pHost;
503
+ size_t size;
504
+ unsigned int flags;
505
+ } cudaHostAlloc_v3020_params;
506
+
507
+ typedef struct cudaHostRegister_v4000_params_st {
508
+ void *ptr;
509
+ size_t size;
510
+ unsigned int flags;
511
+ } cudaHostRegister_v4000_params;
512
+
513
+ typedef struct cudaHostUnregister_v4000_params_st {
514
+ void *ptr;
515
+ } cudaHostUnregister_v4000_params;
516
+
517
+ typedef struct cudaHostGetDevicePointer_v3020_params_st {
518
+ void **pDevice;
519
+ void *pHost;
520
+ unsigned int flags;
521
+ } cudaHostGetDevicePointer_v3020_params;
522
+
523
+ typedef struct cudaHostGetFlags_v3020_params_st {
524
+ unsigned int *pFlags;
525
+ void *pHost;
526
+ } cudaHostGetFlags_v3020_params;
527
+
528
+ typedef struct cudaMalloc3D_v3020_params_st {
529
+ struct cudaPitchedPtr *pitchedDevPtr;
530
+ struct cudaExtent extent;
531
+ } cudaMalloc3D_v3020_params;
532
+
533
+ typedef struct cudaMalloc3DArray_v3020_params_st {
534
+ cudaArray_t *array;
535
+ const struct cudaChannelFormatDesc *desc;
536
+ struct cudaExtent extent;
537
+ unsigned int flags;
538
+ } cudaMalloc3DArray_v3020_params;
539
+
540
+ typedef struct cudaMallocMipmappedArray_v5000_params_st {
541
+ cudaMipmappedArray_t *mipmappedArray;
542
+ const struct cudaChannelFormatDesc *desc;
543
+ struct cudaExtent extent;
544
+ unsigned int numLevels;
545
+ unsigned int flags;
546
+ } cudaMallocMipmappedArray_v5000_params;
547
+
548
+ typedef struct cudaGetMipmappedArrayLevel_v5000_params_st {
549
+ cudaArray_t *levelArray;
550
+ cudaMipmappedArray_const_t mipmappedArray;
551
+ unsigned int level;
552
+ } cudaGetMipmappedArrayLevel_v5000_params;
553
+
554
+ typedef struct cudaMemcpy3D_ptds_v7000_params_st {
555
+ const struct cudaMemcpy3DParms *p;
556
+ } cudaMemcpy3D_ptds_v7000_params;
557
+
558
+ typedef struct cudaMemcpy3DPeer_ptds_v7000_params_st {
559
+ const struct cudaMemcpy3DPeerParms *p;
560
+ } cudaMemcpy3DPeer_ptds_v7000_params;
561
+
562
+ typedef struct cudaMemcpy3DAsync_ptsz_v7000_params_st {
563
+ const struct cudaMemcpy3DParms *p;
564
+ cudaStream_t stream;
565
+ } cudaMemcpy3DAsync_ptsz_v7000_params;
566
+
567
+ typedef struct cudaMemcpy3DPeerAsync_ptsz_v7000_params_st {
568
+ const struct cudaMemcpy3DPeerParms *p;
569
+ cudaStream_t stream;
570
+ } cudaMemcpy3DPeerAsync_ptsz_v7000_params;
571
+
572
+ typedef struct cudaMemGetInfo_v3020_params_st {
573
+ size_t *free;
574
+ size_t *total;
575
+ } cudaMemGetInfo_v3020_params;
576
+
577
+ typedef struct cudaArrayGetInfo_v4010_params_st {
578
+ struct cudaChannelFormatDesc *desc;
579
+ struct cudaExtent *extent;
580
+ unsigned int *flags;
581
+ cudaArray_t array;
582
+ } cudaArrayGetInfo_v4010_params;
583
+
584
+ typedef struct cudaArrayGetPlane_v11020_params_st {
585
+ cudaArray_t *pPlaneArray;
586
+ cudaArray_t hArray;
587
+ unsigned int planeIdx;
588
+ } cudaArrayGetPlane_v11020_params;
589
+
590
+ typedef struct cudaArrayGetMemoryRequirements_v11060_params_st {
591
+ struct cudaArrayMemoryRequirements *memoryRequirements;
592
+ cudaArray_t array;
593
+ int device;
594
+ } cudaArrayGetMemoryRequirements_v11060_params;
595
+
596
+ typedef struct cudaMipmappedArrayGetMemoryRequirements_v11060_params_st {
597
+ struct cudaArrayMemoryRequirements *memoryRequirements;
598
+ cudaMipmappedArray_t mipmap;
599
+ int device;
600
+ } cudaMipmappedArrayGetMemoryRequirements_v11060_params;
601
+
602
+ typedef struct cudaArrayGetSparseProperties_v11010_params_st {
603
+ struct cudaArraySparseProperties *sparseProperties;
604
+ cudaArray_t array;
605
+ } cudaArrayGetSparseProperties_v11010_params;
606
+
607
+ typedef struct cudaMipmappedArrayGetSparseProperties_v11010_params_st {
608
+ struct cudaArraySparseProperties *sparseProperties;
609
+ cudaMipmappedArray_t mipmap;
610
+ } cudaMipmappedArrayGetSparseProperties_v11010_params;
611
+
612
+ typedef struct cudaMemcpy_ptds_v7000_params_st {
613
+ void *dst;
614
+ const void *src;
615
+ size_t count;
616
+ enum cudaMemcpyKind kind;
617
+ } cudaMemcpy_ptds_v7000_params;
618
+
619
+ typedef struct cudaMemcpyPeer_v4000_params_st {
620
+ void *dst;
621
+ int dstDevice;
622
+ const void *src;
623
+ int srcDevice;
624
+ size_t count;
625
+ } cudaMemcpyPeer_v4000_params;
626
+
627
+ typedef struct cudaMemcpy2D_ptds_v7000_params_st {
628
+ void *dst;
629
+ size_t dpitch;
630
+ const void *src;
631
+ size_t spitch;
632
+ size_t width;
633
+ size_t height;
634
+ enum cudaMemcpyKind kind;
635
+ } cudaMemcpy2D_ptds_v7000_params;
636
+
637
+ typedef struct cudaMemcpy2DToArray_ptds_v7000_params_st {
638
+ cudaArray_t dst;
639
+ size_t wOffset;
640
+ size_t hOffset;
641
+ const void *src;
642
+ size_t spitch;
643
+ size_t width;
644
+ size_t height;
645
+ enum cudaMemcpyKind kind;
646
+ } cudaMemcpy2DToArray_ptds_v7000_params;
647
+
648
+ typedef struct cudaMemcpy2DFromArray_ptds_v7000_params_st {
649
+ void *dst;
650
+ size_t dpitch;
651
+ cudaArray_const_t src;
652
+ size_t wOffset;
653
+ size_t hOffset;
654
+ size_t width;
655
+ size_t height;
656
+ enum cudaMemcpyKind kind;
657
+ } cudaMemcpy2DFromArray_ptds_v7000_params;
658
+
659
+ typedef struct cudaMemcpy2DArrayToArray_ptds_v7000_params_st {
660
+ cudaArray_t dst;
661
+ size_t wOffsetDst;
662
+ size_t hOffsetDst;
663
+ cudaArray_const_t src;
664
+ size_t wOffsetSrc;
665
+ size_t hOffsetSrc;
666
+ size_t width;
667
+ size_t height;
668
+ enum cudaMemcpyKind kind;
669
+ } cudaMemcpy2DArrayToArray_ptds_v7000_params;
670
+
671
+ typedef struct cudaMemcpyToSymbol_ptds_v7000_params_st {
672
+ const void *symbol;
673
+ const void *src;
674
+ size_t count;
675
+ size_t offset;
676
+ enum cudaMemcpyKind kind;
677
+ } cudaMemcpyToSymbol_ptds_v7000_params;
678
+
679
+ typedef struct cudaMemcpyFromSymbol_ptds_v7000_params_st {
680
+ void *dst;
681
+ const void *symbol;
682
+ size_t count;
683
+ size_t offset;
684
+ enum cudaMemcpyKind kind;
685
+ } cudaMemcpyFromSymbol_ptds_v7000_params;
686
+
687
+ typedef struct cudaMemcpyAsync_ptsz_v7000_params_st {
688
+ void *dst;
689
+ const void *src;
690
+ size_t count;
691
+ enum cudaMemcpyKind kind;
692
+ cudaStream_t stream;
693
+ } cudaMemcpyAsync_ptsz_v7000_params;
694
+
695
+ typedef struct cudaMemcpyPeerAsync_v4000_params_st {
696
+ void *dst;
697
+ int dstDevice;
698
+ const void *src;
699
+ int srcDevice;
700
+ size_t count;
701
+ cudaStream_t stream;
702
+ } cudaMemcpyPeerAsync_v4000_params;
703
+
704
+ typedef struct cudaMemcpy2DAsync_ptsz_v7000_params_st {
705
+ void *dst;
706
+ size_t dpitch;
707
+ const void *src;
708
+ size_t spitch;
709
+ size_t width;
710
+ size_t height;
711
+ enum cudaMemcpyKind kind;
712
+ cudaStream_t stream;
713
+ } cudaMemcpy2DAsync_ptsz_v7000_params;
714
+
715
+ typedef struct cudaMemcpy2DToArrayAsync_ptsz_v7000_params_st {
716
+ cudaArray_t dst;
717
+ size_t wOffset;
718
+ size_t hOffset;
719
+ const void *src;
720
+ size_t spitch;
721
+ size_t width;
722
+ size_t height;
723
+ enum cudaMemcpyKind kind;
724
+ cudaStream_t stream;
725
+ } cudaMemcpy2DToArrayAsync_ptsz_v7000_params;
726
+
727
+ typedef struct cudaMemcpy2DFromArrayAsync_ptsz_v7000_params_st {
728
+ void *dst;
729
+ size_t dpitch;
730
+ cudaArray_const_t src;
731
+ size_t wOffset;
732
+ size_t hOffset;
733
+ size_t width;
734
+ size_t height;
735
+ enum cudaMemcpyKind kind;
736
+ cudaStream_t stream;
737
+ } cudaMemcpy2DFromArrayAsync_ptsz_v7000_params;
738
+
739
+ typedef struct cudaMemcpyToSymbolAsync_ptsz_v7000_params_st {
740
+ const void *symbol;
741
+ const void *src;
742
+ size_t count;
743
+ size_t offset;
744
+ enum cudaMemcpyKind kind;
745
+ cudaStream_t stream;
746
+ } cudaMemcpyToSymbolAsync_ptsz_v7000_params;
747
+
748
+ typedef struct cudaMemcpyFromSymbolAsync_ptsz_v7000_params_st {
749
+ void *dst;
750
+ const void *symbol;
751
+ size_t count;
752
+ size_t offset;
753
+ enum cudaMemcpyKind kind;
754
+ cudaStream_t stream;
755
+ } cudaMemcpyFromSymbolAsync_ptsz_v7000_params;
756
+
757
+ typedef struct cudaMemset_ptds_v7000_params_st {
758
+ void *devPtr;
759
+ int value;
760
+ size_t count;
761
+ } cudaMemset_ptds_v7000_params;
762
+
763
+ typedef struct cudaMemset2D_ptds_v7000_params_st {
764
+ void *devPtr;
765
+ size_t pitch;
766
+ int value;
767
+ size_t width;
768
+ size_t height;
769
+ } cudaMemset2D_ptds_v7000_params;
770
+
771
+ typedef struct cudaMemset3D_ptds_v7000_params_st {
772
+ struct cudaPitchedPtr pitchedDevPtr;
773
+ int value;
774
+ struct cudaExtent extent;
775
+ } cudaMemset3D_ptds_v7000_params;
776
+
777
+ typedef struct cudaMemsetAsync_ptsz_v7000_params_st {
778
+ void *devPtr;
779
+ int value;
780
+ size_t count;
781
+ cudaStream_t stream;
782
+ } cudaMemsetAsync_ptsz_v7000_params;
783
+
784
+ typedef struct cudaMemset2DAsync_ptsz_v7000_params_st {
785
+ void *devPtr;
786
+ size_t pitch;
787
+ int value;
788
+ size_t width;
789
+ size_t height;
790
+ cudaStream_t stream;
791
+ } cudaMemset2DAsync_ptsz_v7000_params;
792
+
793
+ typedef struct cudaMemset3DAsync_ptsz_v7000_params_st {
794
+ struct cudaPitchedPtr pitchedDevPtr;
795
+ int value;
796
+ struct cudaExtent extent;
797
+ cudaStream_t stream;
798
+ } cudaMemset3DAsync_ptsz_v7000_params;
799
+
800
+ typedef struct cudaGetSymbolAddress_v3020_params_st {
801
+ void **devPtr;
802
+ const void *symbol;
803
+ } cudaGetSymbolAddress_v3020_params;
804
+
805
+ typedef struct cudaGetSymbolSize_v3020_params_st {
806
+ size_t *size;
807
+ const void *symbol;
808
+ } cudaGetSymbolSize_v3020_params;
809
+
810
+ typedef struct cudaMemPrefetchAsync_ptsz_v8000_params_st {
811
+ const void *devPtr;
812
+ size_t count;
813
+ int dstDevice;
814
+ cudaStream_t stream;
815
+ } cudaMemPrefetchAsync_ptsz_v8000_params;
816
+
817
+ typedef struct cudaMemAdvise_v8000_params_st {
818
+ const void *devPtr;
819
+ size_t count;
820
+ enum cudaMemoryAdvise advice;
821
+ int device;
822
+ } cudaMemAdvise_v8000_params;
823
+
824
+ typedef struct cudaMemRangeGetAttribute_v8000_params_st {
825
+ void *data;
826
+ size_t dataSize;
827
+ enum cudaMemRangeAttribute attribute;
828
+ const void *devPtr;
829
+ size_t count;
830
+ } cudaMemRangeGetAttribute_v8000_params;
831
+
832
+ typedef struct cudaMemRangeGetAttributes_v8000_params_st {
833
+ void **data;
834
+ size_t *dataSizes;
835
+ enum cudaMemRangeAttribute *attributes;
836
+ size_t numAttributes;
837
+ const void *devPtr;
838
+ size_t count;
839
+ } cudaMemRangeGetAttributes_v8000_params;
840
+
841
+ typedef struct cudaMemcpyToArray_ptds_v7000_params_st {
842
+ cudaArray_t dst;
843
+ size_t wOffset;
844
+ size_t hOffset;
845
+ const void *src;
846
+ size_t count;
847
+ enum cudaMemcpyKind kind;
848
+ } cudaMemcpyToArray_ptds_v7000_params;
849
+
850
+ typedef struct cudaMemcpyFromArray_ptds_v7000_params_st {
851
+ void *dst;
852
+ cudaArray_const_t src;
853
+ size_t wOffset;
854
+ size_t hOffset;
855
+ size_t count;
856
+ enum cudaMemcpyKind kind;
857
+ } cudaMemcpyFromArray_ptds_v7000_params;
858
+
859
+ typedef struct cudaMemcpyArrayToArray_ptds_v7000_params_st {
860
+ cudaArray_t dst;
861
+ size_t wOffsetDst;
862
+ size_t hOffsetDst;
863
+ cudaArray_const_t src;
864
+ size_t wOffsetSrc;
865
+ size_t hOffsetSrc;
866
+ size_t count;
867
+ enum cudaMemcpyKind kind;
868
+ } cudaMemcpyArrayToArray_ptds_v7000_params;
869
+
870
+ typedef struct cudaMemcpyToArrayAsync_ptsz_v7000_params_st {
871
+ cudaArray_t dst;
872
+ size_t wOffset;
873
+ size_t hOffset;
874
+ const void *src;
875
+ size_t count;
876
+ enum cudaMemcpyKind kind;
877
+ cudaStream_t stream;
878
+ } cudaMemcpyToArrayAsync_ptsz_v7000_params;
879
+
880
+ typedef struct cudaMemcpyFromArrayAsync_ptsz_v7000_params_st {
881
+ void *dst;
882
+ cudaArray_const_t src;
883
+ size_t wOffset;
884
+ size_t hOffset;
885
+ size_t count;
886
+ enum cudaMemcpyKind kind;
887
+ cudaStream_t stream;
888
+ } cudaMemcpyFromArrayAsync_ptsz_v7000_params;
889
+
890
+ typedef struct cudaMallocAsync_ptsz_v11020_params_st {
891
+ void **devPtr;
892
+ size_t size;
893
+ cudaStream_t hStream;
894
+ } cudaMallocAsync_ptsz_v11020_params;
895
+
896
+ typedef struct cudaFreeAsync_ptsz_v11020_params_st {
897
+ void *devPtr;
898
+ cudaStream_t hStream;
899
+ } cudaFreeAsync_ptsz_v11020_params;
900
+
901
+ typedef struct cudaMemPoolTrimTo_v11020_params_st {
902
+ cudaMemPool_t memPool;
903
+ size_t minBytesToKeep;
904
+ } cudaMemPoolTrimTo_v11020_params;
905
+
906
+ typedef struct cudaMemPoolSetAttribute_v11020_params_st {
907
+ cudaMemPool_t memPool;
908
+ enum cudaMemPoolAttr attr;
909
+ void *value;
910
+ } cudaMemPoolSetAttribute_v11020_params;
911
+
912
+ typedef struct cudaMemPoolGetAttribute_v11020_params_st {
913
+ cudaMemPool_t memPool;
914
+ enum cudaMemPoolAttr attr;
915
+ void *value;
916
+ } cudaMemPoolGetAttribute_v11020_params;
917
+
918
+ typedef struct cudaMemPoolSetAccess_v11020_params_st {
919
+ cudaMemPool_t memPool;
920
+ const struct cudaMemAccessDesc *descList;
921
+ size_t count;
922
+ } cudaMemPoolSetAccess_v11020_params;
923
+
924
+ typedef struct cudaMemPoolGetAccess_v11020_params_st {
925
+ enum cudaMemAccessFlags *flags;
926
+ cudaMemPool_t memPool;
927
+ struct cudaMemLocation *location;
928
+ } cudaMemPoolGetAccess_v11020_params;
929
+
930
+ typedef struct cudaMemPoolCreate_v11020_params_st {
931
+ cudaMemPool_t *memPool;
932
+ const struct cudaMemPoolProps *poolProps;
933
+ } cudaMemPoolCreate_v11020_params;
934
+
935
+ typedef struct cudaMemPoolDestroy_v11020_params_st {
936
+ cudaMemPool_t memPool;
937
+ } cudaMemPoolDestroy_v11020_params;
938
+
939
+ typedef struct cudaMallocFromPoolAsync_ptsz_v11020_params_st {
940
+ void **ptr;
941
+ size_t size;
942
+ cudaMemPool_t memPool;
943
+ cudaStream_t stream;
944
+ } cudaMallocFromPoolAsync_ptsz_v11020_params;
945
+
946
+ typedef struct cudaMemPoolExportToShareableHandle_v11020_params_st {
947
+ void *shareableHandle;
948
+ cudaMemPool_t memPool;
949
+ enum cudaMemAllocationHandleType handleType;
950
+ unsigned int flags;
951
+ } cudaMemPoolExportToShareableHandle_v11020_params;
952
+
953
+ typedef struct cudaMemPoolImportFromShareableHandle_v11020_params_st {
954
+ cudaMemPool_t *memPool;
955
+ void *shareableHandle;
956
+ enum cudaMemAllocationHandleType handleType;
957
+ unsigned int flags;
958
+ } cudaMemPoolImportFromShareableHandle_v11020_params;
959
+
960
+ typedef struct cudaMemPoolExportPointer_v11020_params_st {
961
+ struct cudaMemPoolPtrExportData *exportData;
962
+ void *ptr;
963
+ } cudaMemPoolExportPointer_v11020_params;
964
+
965
+ typedef struct cudaMemPoolImportPointer_v11020_params_st {
966
+ void **ptr;
967
+ cudaMemPool_t memPool;
968
+ struct cudaMemPoolPtrExportData *exportData;
969
+ } cudaMemPoolImportPointer_v11020_params;
970
+
971
+ typedef struct cudaPointerGetAttributes_v4000_params_st {
972
+ struct cudaPointerAttributes *attributes;
973
+ const void *ptr;
974
+ } cudaPointerGetAttributes_v4000_params;
975
+
976
+ typedef struct cudaDeviceCanAccessPeer_v4000_params_st {
977
+ int *canAccessPeer;
978
+ int device;
979
+ int peerDevice;
980
+ } cudaDeviceCanAccessPeer_v4000_params;
981
+
982
+ typedef struct cudaDeviceEnablePeerAccess_v4000_params_st {
983
+ int peerDevice;
984
+ unsigned int flags;
985
+ } cudaDeviceEnablePeerAccess_v4000_params;
986
+
987
+ typedef struct cudaDeviceDisablePeerAccess_v4000_params_st {
988
+ int peerDevice;
989
+ } cudaDeviceDisablePeerAccess_v4000_params;
990
+
991
+ typedef struct cudaGraphicsUnregisterResource_v3020_params_st {
992
+ cudaGraphicsResource_t resource;
993
+ } cudaGraphicsUnregisterResource_v3020_params;
994
+
995
+ typedef struct cudaGraphicsResourceSetMapFlags_v3020_params_st {
996
+ cudaGraphicsResource_t resource;
997
+ unsigned int flags;
998
+ } cudaGraphicsResourceSetMapFlags_v3020_params;
999
+
1000
+ typedef struct cudaGraphicsMapResources_v3020_params_st {
1001
+ int count;
1002
+ cudaGraphicsResource_t *resources;
1003
+ cudaStream_t stream;
1004
+ } cudaGraphicsMapResources_v3020_params;
1005
+
1006
+ typedef struct cudaGraphicsUnmapResources_v3020_params_st {
1007
+ int count;
1008
+ cudaGraphicsResource_t *resources;
1009
+ cudaStream_t stream;
1010
+ } cudaGraphicsUnmapResources_v3020_params;
1011
+
1012
+ typedef struct cudaGraphicsResourceGetMappedPointer_v3020_params_st {
1013
+ void **devPtr;
1014
+ size_t *size;
1015
+ cudaGraphicsResource_t resource;
1016
+ } cudaGraphicsResourceGetMappedPointer_v3020_params;
1017
+
1018
+ typedef struct cudaGraphicsSubResourceGetMappedArray_v3020_params_st {
1019
+ cudaArray_t *array;
1020
+ cudaGraphicsResource_t resource;
1021
+ unsigned int arrayIndex;
1022
+ unsigned int mipLevel;
1023
+ } cudaGraphicsSubResourceGetMappedArray_v3020_params;
1024
+
1025
+ typedef struct cudaGraphicsResourceGetMappedMipmappedArray_v5000_params_st {
1026
+ cudaMipmappedArray_t *mipmappedArray;
1027
+ cudaGraphicsResource_t resource;
1028
+ } cudaGraphicsResourceGetMappedMipmappedArray_v5000_params;
1029
+
1030
+ typedef struct cudaBindTexture_v3020_params_st {
1031
+ size_t *offset;
1032
+ const struct textureReference *texref;
1033
+ const void *devPtr;
1034
+ const struct cudaChannelFormatDesc *desc;
1035
+ size_t size;
1036
+ } cudaBindTexture_v3020_params;
1037
+
1038
+ typedef struct cudaBindTexture2D_v3020_params_st {
1039
+ size_t *offset;
1040
+ const struct textureReference *texref;
1041
+ const void *devPtr;
1042
+ const struct cudaChannelFormatDesc *desc;
1043
+ size_t width;
1044
+ size_t height;
1045
+ size_t pitch;
1046
+ } cudaBindTexture2D_v3020_params;
1047
+
1048
+ typedef struct cudaBindTextureToArray_v3020_params_st {
1049
+ const struct textureReference *texref;
1050
+ cudaArray_const_t array;
1051
+ const struct cudaChannelFormatDesc *desc;
1052
+ } cudaBindTextureToArray_v3020_params;
1053
+
1054
+ typedef struct cudaBindTextureToMipmappedArray_v5000_params_st {
1055
+ const struct textureReference *texref;
1056
+ cudaMipmappedArray_const_t mipmappedArray;
1057
+ const struct cudaChannelFormatDesc *desc;
1058
+ } cudaBindTextureToMipmappedArray_v5000_params;
1059
+
1060
+ typedef struct cudaUnbindTexture_v3020_params_st {
1061
+ const struct textureReference *texref;
1062
+ } cudaUnbindTexture_v3020_params;
1063
+
1064
+ typedef struct cudaGetTextureAlignmentOffset_v3020_params_st {
1065
+ size_t *offset;
1066
+ const struct textureReference *texref;
1067
+ } cudaGetTextureAlignmentOffset_v3020_params;
1068
+
1069
+ typedef struct cudaGetTextureReference_v3020_params_st {
1070
+ const struct textureReference **texref;
1071
+ const void *symbol;
1072
+ } cudaGetTextureReference_v3020_params;
1073
+
1074
+ typedef struct cudaBindSurfaceToArray_v3020_params_st {
1075
+ const struct surfaceReference *surfref;
1076
+ cudaArray_const_t array;
1077
+ const struct cudaChannelFormatDesc *desc;
1078
+ } cudaBindSurfaceToArray_v3020_params;
1079
+
1080
+ typedef struct cudaGetSurfaceReference_v3020_params_st {
1081
+ const struct surfaceReference **surfref;
1082
+ const void *symbol;
1083
+ } cudaGetSurfaceReference_v3020_params;
1084
+
1085
+ typedef struct cudaGetChannelDesc_v3020_params_st {
1086
+ struct cudaChannelFormatDesc *desc;
1087
+ cudaArray_const_t array;
1088
+ } cudaGetChannelDesc_v3020_params;
1089
+
1090
+ typedef struct cudaCreateChannelDesc_v3020_params_st {
1091
+ int x;
1092
+ int y;
1093
+ int z;
1094
+ int w;
1095
+ enum cudaChannelFormatKind f;
1096
+ } cudaCreateChannelDesc_v3020_params;
1097
+
1098
+ typedef struct cudaCreateTextureObject_v5000_params_st {
1099
+ cudaTextureObject_t *pTexObject;
1100
+ const struct cudaResourceDesc *pResDesc;
1101
+ const struct cudaTextureDesc *pTexDesc;
1102
+ const struct cudaResourceViewDesc *pResViewDesc;
1103
+ } cudaCreateTextureObject_v5000_params;
1104
+
1105
+ typedef struct cudaDestroyTextureObject_v5000_params_st {
1106
+ cudaTextureObject_t texObject;
1107
+ } cudaDestroyTextureObject_v5000_params;
1108
+
1109
+ typedef struct cudaGetTextureObjectResourceDesc_v5000_params_st {
1110
+ struct cudaResourceDesc *pResDesc;
1111
+ cudaTextureObject_t texObject;
1112
+ } cudaGetTextureObjectResourceDesc_v5000_params;
1113
+
1114
+ typedef struct cudaGetTextureObjectTextureDesc_v5000_params_st {
1115
+ struct cudaTextureDesc *pTexDesc;
1116
+ cudaTextureObject_t texObject;
1117
+ } cudaGetTextureObjectTextureDesc_v5000_params;
1118
+
1119
+ typedef struct cudaGetTextureObjectResourceViewDesc_v5000_params_st {
1120
+ struct cudaResourceViewDesc *pResViewDesc;
1121
+ cudaTextureObject_t texObject;
1122
+ } cudaGetTextureObjectResourceViewDesc_v5000_params;
1123
+
1124
+ typedef struct cudaCreateSurfaceObject_v5000_params_st {
1125
+ cudaSurfaceObject_t *pSurfObject;
1126
+ const struct cudaResourceDesc *pResDesc;
1127
+ } cudaCreateSurfaceObject_v5000_params;
1128
+
1129
+ typedef struct cudaDestroySurfaceObject_v5000_params_st {
1130
+ cudaSurfaceObject_t surfObject;
1131
+ } cudaDestroySurfaceObject_v5000_params;
1132
+
1133
+ typedef struct cudaGetSurfaceObjectResourceDesc_v5000_params_st {
1134
+ struct cudaResourceDesc *pResDesc;
1135
+ cudaSurfaceObject_t surfObject;
1136
+ } cudaGetSurfaceObjectResourceDesc_v5000_params;
1137
+
1138
+ typedef struct cudaDriverGetVersion_v3020_params_st {
1139
+ int *driverVersion;
1140
+ } cudaDriverGetVersion_v3020_params;
1141
+
1142
+ typedef struct cudaRuntimeGetVersion_v3020_params_st {
1143
+ int *runtimeVersion;
1144
+ } cudaRuntimeGetVersion_v3020_params;
1145
+
1146
+ typedef struct cudaGraphCreate_v10000_params_st {
1147
+ cudaGraph_t *pGraph;
1148
+ unsigned int flags;
1149
+ } cudaGraphCreate_v10000_params;
1150
+
1151
+ typedef struct cudaGraphAddKernelNode_v10000_params_st {
1152
+ cudaGraphNode_t *pGraphNode;
1153
+ cudaGraph_t graph;
1154
+ const cudaGraphNode_t *pDependencies;
1155
+ size_t numDependencies;
1156
+ const struct cudaKernelNodeParams *pNodeParams;
1157
+ } cudaGraphAddKernelNode_v10000_params;
1158
+
1159
+ typedef struct cudaGraphKernelNodeGetParams_v10000_params_st {
1160
+ cudaGraphNode_t node;
1161
+ struct cudaKernelNodeParams *pNodeParams;
1162
+ } cudaGraphKernelNodeGetParams_v10000_params;
1163
+
1164
+ typedef struct cudaGraphKernelNodeSetParams_v10000_params_st {
1165
+ cudaGraphNode_t node;
1166
+ const struct cudaKernelNodeParams *pNodeParams;
1167
+ } cudaGraphKernelNodeSetParams_v10000_params;
1168
+
1169
+ typedef struct cudaGraphKernelNodeCopyAttributes_v11000_params_st {
1170
+ cudaGraphNode_t hSrc;
1171
+ cudaGraphNode_t hDst;
1172
+ } cudaGraphKernelNodeCopyAttributes_v11000_params;
1173
+
1174
+ typedef struct cudaGraphKernelNodeGetAttribute_v11000_params_st {
1175
+ cudaGraphNode_t hNode;
1176
+ cudaKernelNodeAttrID attr;
1177
+ cudaKernelNodeAttrValue *value_out;
1178
+ } cudaGraphKernelNodeGetAttribute_v11000_params;
1179
+
1180
+ typedef struct cudaGraphKernelNodeSetAttribute_v11000_params_st {
1181
+ cudaGraphNode_t hNode;
1182
+ cudaKernelNodeAttrID attr;
1183
+ const cudaKernelNodeAttrValue *value;
1184
+ } cudaGraphKernelNodeSetAttribute_v11000_params;
1185
+
1186
+ typedef struct cudaGraphAddMemcpyNode_v10000_params_st {
1187
+ cudaGraphNode_t *pGraphNode;
1188
+ cudaGraph_t graph;
1189
+ const cudaGraphNode_t *pDependencies;
1190
+ size_t numDependencies;
1191
+ const struct cudaMemcpy3DParms *pCopyParams;
1192
+ } cudaGraphAddMemcpyNode_v10000_params;
1193
+
1194
+ typedef struct cudaGraphAddMemcpyNodeToSymbol_v11010_params_st {
1195
+ cudaGraphNode_t *pGraphNode;
1196
+ cudaGraph_t graph;
1197
+ const cudaGraphNode_t *pDependencies;
1198
+ size_t numDependencies;
1199
+ const void *symbol;
1200
+ const void *src;
1201
+ size_t count;
1202
+ size_t offset;
1203
+ enum cudaMemcpyKind kind;
1204
+ } cudaGraphAddMemcpyNodeToSymbol_v11010_params;
1205
+
1206
+ typedef struct cudaGraphAddMemcpyNodeFromSymbol_v11010_params_st {
1207
+ cudaGraphNode_t *pGraphNode;
1208
+ cudaGraph_t graph;
1209
+ const cudaGraphNode_t *pDependencies;
1210
+ size_t numDependencies;
1211
+ void *dst;
1212
+ const void *symbol;
1213
+ size_t count;
1214
+ size_t offset;
1215
+ enum cudaMemcpyKind kind;
1216
+ } cudaGraphAddMemcpyNodeFromSymbol_v11010_params;
1217
+
1218
+ typedef struct cudaGraphAddMemcpyNode1D_v11010_params_st {
1219
+ cudaGraphNode_t *pGraphNode;
1220
+ cudaGraph_t graph;
1221
+ const cudaGraphNode_t *pDependencies;
1222
+ size_t numDependencies;
1223
+ void *dst;
1224
+ const void *src;
1225
+ size_t count;
1226
+ enum cudaMemcpyKind kind;
1227
+ } cudaGraphAddMemcpyNode1D_v11010_params;
1228
+
1229
+ typedef struct cudaGraphMemcpyNodeGetParams_v10000_params_st {
1230
+ cudaGraphNode_t node;
1231
+ struct cudaMemcpy3DParms *pNodeParams;
1232
+ } cudaGraphMemcpyNodeGetParams_v10000_params;
1233
+
1234
+ typedef struct cudaGraphMemcpyNodeSetParams_v10000_params_st {
1235
+ cudaGraphNode_t node;
1236
+ const struct cudaMemcpy3DParms *pNodeParams;
1237
+ } cudaGraphMemcpyNodeSetParams_v10000_params;
1238
+
1239
+ typedef struct cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params_st {
1240
+ cudaGraphNode_t node;
1241
+ const void *symbol;
1242
+ const void *src;
1243
+ size_t count;
1244
+ size_t offset;
1245
+ enum cudaMemcpyKind kind;
1246
+ } cudaGraphMemcpyNodeSetParamsToSymbol_v11010_params;
1247
+
1248
+ typedef struct cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params_st {
1249
+ cudaGraphNode_t node;
1250
+ void *dst;
1251
+ const void *symbol;
1252
+ size_t count;
1253
+ size_t offset;
1254
+ enum cudaMemcpyKind kind;
1255
+ } cudaGraphMemcpyNodeSetParamsFromSymbol_v11010_params;
1256
+
1257
+ typedef struct cudaGraphMemcpyNodeSetParams1D_v11010_params_st {
1258
+ cudaGraphNode_t node;
1259
+ void *dst;
1260
+ const void *src;
1261
+ size_t count;
1262
+ enum cudaMemcpyKind kind;
1263
+ } cudaGraphMemcpyNodeSetParams1D_v11010_params;
1264
+
1265
+ typedef struct cudaGraphAddMemsetNode_v10000_params_st {
1266
+ cudaGraphNode_t *pGraphNode;
1267
+ cudaGraph_t graph;
1268
+ const cudaGraphNode_t *pDependencies;
1269
+ size_t numDependencies;
1270
+ const struct cudaMemsetParams *pMemsetParams;
1271
+ } cudaGraphAddMemsetNode_v10000_params;
1272
+
1273
+ typedef struct cudaGraphMemsetNodeGetParams_v10000_params_st {
1274
+ cudaGraphNode_t node;
1275
+ struct cudaMemsetParams *pNodeParams;
1276
+ } cudaGraphMemsetNodeGetParams_v10000_params;
1277
+
1278
+ typedef struct cudaGraphMemsetNodeSetParams_v10000_params_st {
1279
+ cudaGraphNode_t node;
1280
+ const struct cudaMemsetParams *pNodeParams;
1281
+ } cudaGraphMemsetNodeSetParams_v10000_params;
1282
+
1283
+ typedef struct cudaGraphAddHostNode_v10000_params_st {
1284
+ cudaGraphNode_t *pGraphNode;
1285
+ cudaGraph_t graph;
1286
+ const cudaGraphNode_t *pDependencies;
1287
+ size_t numDependencies;
1288
+ const struct cudaHostNodeParams *pNodeParams;
1289
+ } cudaGraphAddHostNode_v10000_params;
1290
+
1291
+ typedef struct cudaGraphHostNodeGetParams_v10000_params_st {
1292
+ cudaGraphNode_t node;
1293
+ struct cudaHostNodeParams *pNodeParams;
1294
+ } cudaGraphHostNodeGetParams_v10000_params;
1295
+
1296
+ typedef struct cudaGraphHostNodeSetParams_v10000_params_st {
1297
+ cudaGraphNode_t node;
1298
+ const struct cudaHostNodeParams *pNodeParams;
1299
+ } cudaGraphHostNodeSetParams_v10000_params;
1300
+
1301
+ typedef struct cudaGraphAddChildGraphNode_v10000_params_st {
1302
+ cudaGraphNode_t *pGraphNode;
1303
+ cudaGraph_t graph;
1304
+ const cudaGraphNode_t *pDependencies;
1305
+ size_t numDependencies;
1306
+ cudaGraph_t childGraph;
1307
+ } cudaGraphAddChildGraphNode_v10000_params;
1308
+
1309
+ typedef struct cudaGraphChildGraphNodeGetGraph_v10000_params_st {
1310
+ cudaGraphNode_t node;
1311
+ cudaGraph_t *pGraph;
1312
+ } cudaGraphChildGraphNodeGetGraph_v10000_params;
1313
+
1314
+ typedef struct cudaGraphAddEmptyNode_v10000_params_st {
1315
+ cudaGraphNode_t *pGraphNode;
1316
+ cudaGraph_t graph;
1317
+ const cudaGraphNode_t *pDependencies;
1318
+ size_t numDependencies;
1319
+ } cudaGraphAddEmptyNode_v10000_params;
1320
+
1321
+ typedef struct cudaGraphAddEventRecordNode_v11010_params_st {
1322
+ cudaGraphNode_t *pGraphNode;
1323
+ cudaGraph_t graph;
1324
+ const cudaGraphNode_t *pDependencies;
1325
+ size_t numDependencies;
1326
+ cudaEvent_t event;
1327
+ } cudaGraphAddEventRecordNode_v11010_params;
1328
+
1329
+ typedef struct cudaGraphEventRecordNodeGetEvent_v11010_params_st {
1330
+ cudaGraphNode_t node;
1331
+ cudaEvent_t *event_out;
1332
+ } cudaGraphEventRecordNodeGetEvent_v11010_params;
1333
+
1334
+ typedef struct cudaGraphEventRecordNodeSetEvent_v11010_params_st {
1335
+ cudaGraphNode_t node;
1336
+ cudaEvent_t event;
1337
+ } cudaGraphEventRecordNodeSetEvent_v11010_params;
1338
+
1339
+ typedef struct cudaGraphAddEventWaitNode_v11010_params_st {
1340
+ cudaGraphNode_t *pGraphNode;
1341
+ cudaGraph_t graph;
1342
+ const cudaGraphNode_t *pDependencies;
1343
+ size_t numDependencies;
1344
+ cudaEvent_t event;
1345
+ } cudaGraphAddEventWaitNode_v11010_params;
1346
+
1347
+ typedef struct cudaGraphEventWaitNodeGetEvent_v11010_params_st {
1348
+ cudaGraphNode_t node;
1349
+ cudaEvent_t *event_out;
1350
+ } cudaGraphEventWaitNodeGetEvent_v11010_params;
1351
+
1352
+ typedef struct cudaGraphEventWaitNodeSetEvent_v11010_params_st {
1353
+ cudaGraphNode_t node;
1354
+ cudaEvent_t event;
1355
+ } cudaGraphEventWaitNodeSetEvent_v11010_params;
1356
+
1357
+ typedef struct cudaGraphAddExternalSemaphoresSignalNode_v11020_params_st {
1358
+ cudaGraphNode_t *pGraphNode;
1359
+ cudaGraph_t graph;
1360
+ const cudaGraphNode_t *pDependencies;
1361
+ size_t numDependencies;
1362
+ const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
1363
+ } cudaGraphAddExternalSemaphoresSignalNode_v11020_params;
1364
+
1365
+ typedef struct cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params_st {
1366
+ cudaGraphNode_t hNode;
1367
+ struct cudaExternalSemaphoreSignalNodeParams *params_out;
1368
+ } cudaGraphExternalSemaphoresSignalNodeGetParams_v11020_params;
1369
+
1370
+ typedef struct cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params_st {
1371
+ cudaGraphNode_t hNode;
1372
+ const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
1373
+ } cudaGraphExternalSemaphoresSignalNodeSetParams_v11020_params;
1374
+
1375
+ typedef struct cudaGraphAddExternalSemaphoresWaitNode_v11020_params_st {
1376
+ cudaGraphNode_t *pGraphNode;
1377
+ cudaGraph_t graph;
1378
+ const cudaGraphNode_t *pDependencies;
1379
+ size_t numDependencies;
1380
+ const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
1381
+ } cudaGraphAddExternalSemaphoresWaitNode_v11020_params;
1382
+
1383
+ typedef struct cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params_st {
1384
+ cudaGraphNode_t hNode;
1385
+ struct cudaExternalSemaphoreWaitNodeParams *params_out;
1386
+ } cudaGraphExternalSemaphoresWaitNodeGetParams_v11020_params;
1387
+
1388
+ typedef struct cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params_st {
1389
+ cudaGraphNode_t hNode;
1390
+ const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
1391
+ } cudaGraphExternalSemaphoresWaitNodeSetParams_v11020_params;
1392
+
1393
+ typedef struct cudaGraphAddMemAllocNode_v11040_params_st {
1394
+ cudaGraphNode_t *pGraphNode;
1395
+ cudaGraph_t graph;
1396
+ const cudaGraphNode_t *pDependencies;
1397
+ size_t numDependencies;
1398
+ struct cudaMemAllocNodeParams *nodeParams;
1399
+ } cudaGraphAddMemAllocNode_v11040_params;
1400
+
1401
+ typedef struct cudaGraphMemAllocNodeGetParams_v11040_params_st {
1402
+ cudaGraphNode_t node;
1403
+ struct cudaMemAllocNodeParams *params_out;
1404
+ } cudaGraphMemAllocNodeGetParams_v11040_params;
1405
+
1406
+ typedef struct cudaGraphAddMemFreeNode_v11040_params_st {
1407
+ cudaGraphNode_t *pGraphNode;
1408
+ cudaGraph_t graph;
1409
+ const cudaGraphNode_t *pDependencies;
1410
+ size_t numDependencies;
1411
+ void *dptr;
1412
+ } cudaGraphAddMemFreeNode_v11040_params;
1413
+
1414
+ typedef struct cudaGraphMemFreeNodeGetParams_v11040_params_st {
1415
+ cudaGraphNode_t node;
1416
+ void *dptr_out;
1417
+ } cudaGraphMemFreeNodeGetParams_v11040_params;
1418
+
1419
+ typedef struct cudaDeviceGraphMemTrim_v11040_params_st {
1420
+ int device;
1421
+ } cudaDeviceGraphMemTrim_v11040_params;
1422
+
1423
+ typedef struct cudaDeviceGetGraphMemAttribute_v11040_params_st {
1424
+ int device;
1425
+ enum cudaGraphMemAttributeType attr;
1426
+ void *value;
1427
+ } cudaDeviceGetGraphMemAttribute_v11040_params;
1428
+
1429
+ typedef struct cudaDeviceSetGraphMemAttribute_v11040_params_st {
1430
+ int device;
1431
+ enum cudaGraphMemAttributeType attr;
1432
+ void *value;
1433
+ } cudaDeviceSetGraphMemAttribute_v11040_params;
1434
+
1435
+ typedef struct cudaGraphClone_v10000_params_st {
1436
+ cudaGraph_t *pGraphClone;
1437
+ cudaGraph_t originalGraph;
1438
+ } cudaGraphClone_v10000_params;
1439
+
1440
+ typedef struct cudaGraphNodeFindInClone_v10000_params_st {
1441
+ cudaGraphNode_t *pNode;
1442
+ cudaGraphNode_t originalNode;
1443
+ cudaGraph_t clonedGraph;
1444
+ } cudaGraphNodeFindInClone_v10000_params;
1445
+
1446
+ typedef struct cudaGraphNodeGetType_v10000_params_st {
1447
+ cudaGraphNode_t node;
1448
+ enum cudaGraphNodeType *pType;
1449
+ } cudaGraphNodeGetType_v10000_params;
1450
+
1451
+ typedef struct cudaGraphGetNodes_v10000_params_st {
1452
+ cudaGraph_t graph;
1453
+ cudaGraphNode_t *nodes;
1454
+ size_t *numNodes;
1455
+ } cudaGraphGetNodes_v10000_params;
1456
+
1457
+ typedef struct cudaGraphGetRootNodes_v10000_params_st {
1458
+ cudaGraph_t graph;
1459
+ cudaGraphNode_t *pRootNodes;
1460
+ size_t *pNumRootNodes;
1461
+ } cudaGraphGetRootNodes_v10000_params;
1462
+
1463
+ typedef struct cudaGraphGetEdges_v10000_params_st {
1464
+ cudaGraph_t graph;
1465
+ cudaGraphNode_t *from;
1466
+ cudaGraphNode_t *to;
1467
+ size_t *numEdges;
1468
+ } cudaGraphGetEdges_v10000_params;
1469
+
1470
+ typedef struct cudaGraphNodeGetDependencies_v10000_params_st {
1471
+ cudaGraphNode_t node;
1472
+ cudaGraphNode_t *pDependencies;
1473
+ size_t *pNumDependencies;
1474
+ } cudaGraphNodeGetDependencies_v10000_params;
1475
+
1476
+ typedef struct cudaGraphNodeGetDependentNodes_v10000_params_st {
1477
+ cudaGraphNode_t node;
1478
+ cudaGraphNode_t *pDependentNodes;
1479
+ size_t *pNumDependentNodes;
1480
+ } cudaGraphNodeGetDependentNodes_v10000_params;
1481
+
1482
+ typedef struct cudaGraphAddDependencies_v10000_params_st {
1483
+ cudaGraph_t graph;
1484
+ const cudaGraphNode_t *from;
1485
+ const cudaGraphNode_t *to;
1486
+ size_t numDependencies;
1487
+ } cudaGraphAddDependencies_v10000_params;
1488
+
1489
+ typedef struct cudaGraphRemoveDependencies_v10000_params_st {
1490
+ cudaGraph_t graph;
1491
+ const cudaGraphNode_t *from;
1492
+ const cudaGraphNode_t *to;
1493
+ size_t numDependencies;
1494
+ } cudaGraphRemoveDependencies_v10000_params;
1495
+
1496
+ typedef struct cudaGraphDestroyNode_v10000_params_st {
1497
+ cudaGraphNode_t node;
1498
+ } cudaGraphDestroyNode_v10000_params;
1499
+
1500
+ typedef struct cudaGraphInstantiate_v10000_params_st {
1501
+ cudaGraphExec_t *pGraphExec;
1502
+ cudaGraph_t graph;
1503
+ cudaGraphNode_t *pErrorNode;
1504
+ char *pLogBuffer;
1505
+ size_t bufferSize;
1506
+ } cudaGraphInstantiate_v10000_params;
1507
+
1508
+ typedef struct cudaGraphInstantiateWithFlags_v11040_params_st {
1509
+ cudaGraphExec_t *pGraphExec;
1510
+ cudaGraph_t graph;
1511
+ unsigned long long flags;
1512
+ } cudaGraphInstantiateWithFlags_v11040_params;
1513
+
1514
+ typedef struct cudaGraphExecKernelNodeSetParams_v10010_params_st {
1515
+ cudaGraphExec_t hGraphExec;
1516
+ cudaGraphNode_t node;
1517
+ const struct cudaKernelNodeParams *pNodeParams;
1518
+ } cudaGraphExecKernelNodeSetParams_v10010_params;
1519
+
1520
+ typedef struct cudaGraphExecMemcpyNodeSetParams_v10020_params_st {
1521
+ cudaGraphExec_t hGraphExec;
1522
+ cudaGraphNode_t node;
1523
+ const struct cudaMemcpy3DParms *pNodeParams;
1524
+ } cudaGraphExecMemcpyNodeSetParams_v10020_params;
1525
+
1526
+ typedef struct cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params_st {
1527
+ cudaGraphExec_t hGraphExec;
1528
+ cudaGraphNode_t node;
1529
+ const void *symbol;
1530
+ const void *src;
1531
+ size_t count;
1532
+ size_t offset;
1533
+ enum cudaMemcpyKind kind;
1534
+ } cudaGraphExecMemcpyNodeSetParamsToSymbol_v11010_params;
1535
+
1536
+ typedef struct cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params_st {
1537
+ cudaGraphExec_t hGraphExec;
1538
+ cudaGraphNode_t node;
1539
+ void *dst;
1540
+ const void *symbol;
1541
+ size_t count;
1542
+ size_t offset;
1543
+ enum cudaMemcpyKind kind;
1544
+ } cudaGraphExecMemcpyNodeSetParamsFromSymbol_v11010_params;
1545
+
1546
+ typedef struct cudaGraphExecMemcpyNodeSetParams1D_v11010_params_st {
1547
+ cudaGraphExec_t hGraphExec;
1548
+ cudaGraphNode_t node;
1549
+ void *dst;
1550
+ const void *src;
1551
+ size_t count;
1552
+ enum cudaMemcpyKind kind;
1553
+ } cudaGraphExecMemcpyNodeSetParams1D_v11010_params;
1554
+
1555
+ typedef struct cudaGraphExecMemsetNodeSetParams_v10020_params_st {
1556
+ cudaGraphExec_t hGraphExec;
1557
+ cudaGraphNode_t node;
1558
+ const struct cudaMemsetParams *pNodeParams;
1559
+ } cudaGraphExecMemsetNodeSetParams_v10020_params;
1560
+
1561
+ typedef struct cudaGraphExecHostNodeSetParams_v10020_params_st {
1562
+ cudaGraphExec_t hGraphExec;
1563
+ cudaGraphNode_t node;
1564
+ const struct cudaHostNodeParams *pNodeParams;
1565
+ } cudaGraphExecHostNodeSetParams_v10020_params;
1566
+
1567
+ typedef struct cudaGraphExecChildGraphNodeSetParams_v11010_params_st {
1568
+ cudaGraphExec_t hGraphExec;
1569
+ cudaGraphNode_t node;
1570
+ cudaGraph_t childGraph;
1571
+ } cudaGraphExecChildGraphNodeSetParams_v11010_params;
1572
+
1573
+ typedef struct cudaGraphExecEventRecordNodeSetEvent_v11010_params_st {
1574
+ cudaGraphExec_t hGraphExec;
1575
+ cudaGraphNode_t hNode;
1576
+ cudaEvent_t event;
1577
+ } cudaGraphExecEventRecordNodeSetEvent_v11010_params;
1578
+
1579
+ typedef struct cudaGraphExecEventWaitNodeSetEvent_v11010_params_st {
1580
+ cudaGraphExec_t hGraphExec;
1581
+ cudaGraphNode_t hNode;
1582
+ cudaEvent_t event;
1583
+ } cudaGraphExecEventWaitNodeSetEvent_v11010_params;
1584
+
1585
+ typedef struct cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params_st {
1586
+ cudaGraphExec_t hGraphExec;
1587
+ cudaGraphNode_t hNode;
1588
+ const struct cudaExternalSemaphoreSignalNodeParams *nodeParams;
1589
+ } cudaGraphExecExternalSemaphoresSignalNodeSetParams_v11020_params;
1590
+
1591
+ typedef struct cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params_st {
1592
+ cudaGraphExec_t hGraphExec;
1593
+ cudaGraphNode_t hNode;
1594
+ const struct cudaExternalSemaphoreWaitNodeParams *nodeParams;
1595
+ } cudaGraphExecExternalSemaphoresWaitNodeSetParams_v11020_params;
1596
+
1597
+ typedef struct cudaGraphNodeSetEnabled_v11060_params_st {
1598
+ cudaGraphExec_t hGraphExec;
1599
+ cudaGraphNode_t hNode;
1600
+ unsigned int isEnabled;
1601
+ } cudaGraphNodeSetEnabled_v11060_params;
1602
+
1603
+ typedef struct cudaGraphNodeGetEnabled_v11060_params_st {
1604
+ cudaGraphExec_t hGraphExec;
1605
+ cudaGraphNode_t hNode;
1606
+ unsigned int *isEnabled;
1607
+ } cudaGraphNodeGetEnabled_v11060_params;
1608
+
1609
+ typedef struct cudaGraphExecUpdate_v10020_params_st {
1610
+ cudaGraphExec_t hGraphExec;
1611
+ cudaGraph_t hGraph;
1612
+ cudaGraphNode_t *hErrorNode_out;
1613
+ enum cudaGraphExecUpdateResult *updateResult_out;
1614
+ } cudaGraphExecUpdate_v10020_params;
1615
+
1616
+ typedef struct cudaGraphUpload_ptsz_v10000_params_st {
1617
+ cudaGraphExec_t graphExec;
1618
+ cudaStream_t stream;
1619
+ } cudaGraphUpload_ptsz_v10000_params;
1620
+
1621
+ typedef struct cudaGraphLaunch_ptsz_v10000_params_st {
1622
+ cudaGraphExec_t graphExec;
1623
+ cudaStream_t stream;
1624
+ } cudaGraphLaunch_ptsz_v10000_params;
1625
+
1626
+ typedef struct cudaGraphExecDestroy_v10000_params_st {
1627
+ cudaGraphExec_t graphExec;
1628
+ } cudaGraphExecDestroy_v10000_params;
1629
+
1630
+ typedef struct cudaGraphDestroy_v10000_params_st {
1631
+ cudaGraph_t graph;
1632
+ } cudaGraphDestroy_v10000_params;
1633
+
1634
+ typedef struct cudaGraphDebugDotPrint_v11030_params_st {
1635
+ cudaGraph_t graph;
1636
+ const char *path;
1637
+ unsigned int flags;
1638
+ } cudaGraphDebugDotPrint_v11030_params;
1639
+
1640
+ typedef struct cudaUserObjectCreate_v11030_params_st {
1641
+ cudaUserObject_t *object_out;
1642
+ void *ptr;
1643
+ cudaHostFn_t destroy;
1644
+ unsigned int initialRefcount;
1645
+ unsigned int flags;
1646
+ } cudaUserObjectCreate_v11030_params;
1647
+
1648
+ typedef struct cudaUserObjectRetain_v11030_params_st {
1649
+ cudaUserObject_t object;
1650
+ unsigned int count;
1651
+ } cudaUserObjectRetain_v11030_params;
1652
+
1653
+ typedef struct cudaUserObjectRelease_v11030_params_st {
1654
+ cudaUserObject_t object;
1655
+ unsigned int count;
1656
+ } cudaUserObjectRelease_v11030_params;
1657
+
1658
+ typedef struct cudaGraphRetainUserObject_v11030_params_st {
1659
+ cudaGraph_t graph;
1660
+ cudaUserObject_t object;
1661
+ unsigned int count;
1662
+ unsigned int flags;
1663
+ } cudaGraphRetainUserObject_v11030_params;
1664
+
1665
+ typedef struct cudaGraphReleaseUserObject_v11030_params_st {
1666
+ cudaGraph_t graph;
1667
+ cudaUserObject_t object;
1668
+ unsigned int count;
1669
+ } cudaGraphReleaseUserObject_v11030_params;
1670
+
1671
+ typedef struct cudaGetDriverEntryPoint_ptsz_v11030_params_st {
1672
+ const char *symbol;
1673
+ void **funcPtr;
1674
+ unsigned long long flags;
1675
+ } cudaGetDriverEntryPoint_ptsz_v11030_params;
1676
+
1677
+ typedef struct cudaGetFuncBySymbol_v11000_params_st {
1678
+ cudaFunction_t *functionPtr;
1679
+ const void *symbolPtr;
1680
+ } cudaGetFuncBySymbol_v11000_params;
1681
+
1682
+ typedef struct cudaMemcpy_v3020_params_st {
1683
+ void *dst;
1684
+ const void *src;
1685
+ size_t count;
1686
+ enum cudaMemcpyKind kind;
1687
+ } cudaMemcpy_v3020_params;
1688
+
1689
+ typedef struct cudaMemcpyToSymbol_v3020_params_st {
1690
+ const void *symbol;
1691
+ const void *src;
1692
+ size_t count;
1693
+ size_t offset;
1694
+ enum cudaMemcpyKind kind;
1695
+ } cudaMemcpyToSymbol_v3020_params;
1696
+
1697
+ typedef struct cudaMemcpyFromSymbol_v3020_params_st {
1698
+ void *dst;
1699
+ const void *symbol;
1700
+ size_t count;
1701
+ size_t offset;
1702
+ enum cudaMemcpyKind kind;
1703
+ } cudaMemcpyFromSymbol_v3020_params;
1704
+
1705
+ typedef struct cudaMemcpy2D_v3020_params_st {
1706
+ void *dst;
1707
+ size_t dpitch;
1708
+ const void *src;
1709
+ size_t spitch;
1710
+ size_t width;
1711
+ size_t height;
1712
+ enum cudaMemcpyKind kind;
1713
+ } cudaMemcpy2D_v3020_params;
1714
+
1715
+ typedef struct cudaMemcpyToArray_v3020_params_st {
1716
+ cudaArray_t dst;
1717
+ size_t wOffset;
1718
+ size_t hOffset;
1719
+ const void *src;
1720
+ size_t count;
1721
+ enum cudaMemcpyKind kind;
1722
+ } cudaMemcpyToArray_v3020_params;
1723
+
1724
+ typedef struct cudaMemcpy2DToArray_v3020_params_st {
1725
+ cudaArray_t dst;
1726
+ size_t wOffset;
1727
+ size_t hOffset;
1728
+ const void *src;
1729
+ size_t spitch;
1730
+ size_t width;
1731
+ size_t height;
1732
+ enum cudaMemcpyKind kind;
1733
+ } cudaMemcpy2DToArray_v3020_params;
1734
+
1735
+ typedef struct cudaMemcpyFromArray_v3020_params_st {
1736
+ void *dst;
1737
+ cudaArray_const_t src;
1738
+ size_t wOffset;
1739
+ size_t hOffset;
1740
+ size_t count;
1741
+ enum cudaMemcpyKind kind;
1742
+ } cudaMemcpyFromArray_v3020_params;
1743
+
1744
+ typedef struct cudaMemcpy2DFromArray_v3020_params_st {
1745
+ void *dst;
1746
+ size_t dpitch;
1747
+ cudaArray_const_t src;
1748
+ size_t wOffset;
1749
+ size_t hOffset;
1750
+ size_t width;
1751
+ size_t height;
1752
+ enum cudaMemcpyKind kind;
1753
+ } cudaMemcpy2DFromArray_v3020_params;
1754
+
1755
+ typedef struct cudaMemcpyArrayToArray_v3020_params_st {
1756
+ cudaArray_t dst;
1757
+ size_t wOffsetDst;
1758
+ size_t hOffsetDst;
1759
+ cudaArray_const_t src;
1760
+ size_t wOffsetSrc;
1761
+ size_t hOffsetSrc;
1762
+ size_t count;
1763
+ enum cudaMemcpyKind kind;
1764
+ } cudaMemcpyArrayToArray_v3020_params;
1765
+
1766
+ typedef struct cudaMemcpy2DArrayToArray_v3020_params_st {
1767
+ cudaArray_t dst;
1768
+ size_t wOffsetDst;
1769
+ size_t hOffsetDst;
1770
+ cudaArray_const_t src;
1771
+ size_t wOffsetSrc;
1772
+ size_t hOffsetSrc;
1773
+ size_t width;
1774
+ size_t height;
1775
+ enum cudaMemcpyKind kind;
1776
+ } cudaMemcpy2DArrayToArray_v3020_params;
1777
+
1778
+ typedef struct cudaMemcpy3D_v3020_params_st {
1779
+ const struct cudaMemcpy3DParms *p;
1780
+ } cudaMemcpy3D_v3020_params;
1781
+
1782
+ typedef struct cudaMemcpy3DPeer_v4000_params_st {
1783
+ const struct cudaMemcpy3DPeerParms *p;
1784
+ } cudaMemcpy3DPeer_v4000_params;
1785
+
1786
+ typedef struct cudaMemset_v3020_params_st {
1787
+ void *devPtr;
1788
+ int value;
1789
+ size_t count;
1790
+ } cudaMemset_v3020_params;
1791
+
1792
+ typedef struct cudaMemset2D_v3020_params_st {
1793
+ void *devPtr;
1794
+ size_t pitch;
1795
+ int value;
1796
+ size_t width;
1797
+ size_t height;
1798
+ } cudaMemset2D_v3020_params;
1799
+
1800
+ typedef struct cudaMemset3D_v3020_params_st {
1801
+ struct cudaPitchedPtr pitchedDevPtr;
1802
+ int value;
1803
+ struct cudaExtent extent;
1804
+ } cudaMemset3D_v3020_params;
1805
+
1806
+ typedef struct cudaMemcpyAsync_v3020_params_st {
1807
+ void *dst;
1808
+ const void *src;
1809
+ size_t count;
1810
+ enum cudaMemcpyKind kind;
1811
+ cudaStream_t stream;
1812
+ } cudaMemcpyAsync_v3020_params;
1813
+
1814
+ typedef struct cudaMemcpyToSymbolAsync_v3020_params_st {
1815
+ const void *symbol;
1816
+ const void *src;
1817
+ size_t count;
1818
+ size_t offset;
1819
+ enum cudaMemcpyKind kind;
1820
+ cudaStream_t stream;
1821
+ } cudaMemcpyToSymbolAsync_v3020_params;
1822
+
1823
+ typedef struct cudaMemcpyFromSymbolAsync_v3020_params_st {
1824
+ void *dst;
1825
+ const void *symbol;
1826
+ size_t count;
1827
+ size_t offset;
1828
+ enum cudaMemcpyKind kind;
1829
+ cudaStream_t stream;
1830
+ } cudaMemcpyFromSymbolAsync_v3020_params;
1831
+
1832
+ typedef struct cudaMemcpy2DAsync_v3020_params_st {
1833
+ void *dst;
1834
+ size_t dpitch;
1835
+ const void *src;
1836
+ size_t spitch;
1837
+ size_t width;
1838
+ size_t height;
1839
+ enum cudaMemcpyKind kind;
1840
+ cudaStream_t stream;
1841
+ } cudaMemcpy2DAsync_v3020_params;
1842
+
1843
+ typedef struct cudaMemcpyToArrayAsync_v3020_params_st {
1844
+ cudaArray_t dst;
1845
+ size_t wOffset;
1846
+ size_t hOffset;
1847
+ const void *src;
1848
+ size_t count;
1849
+ enum cudaMemcpyKind kind;
1850
+ cudaStream_t stream;
1851
+ } cudaMemcpyToArrayAsync_v3020_params;
1852
+
1853
+ typedef struct cudaMemcpy2DToArrayAsync_v3020_params_st {
1854
+ cudaArray_t dst;
1855
+ size_t wOffset;
1856
+ size_t hOffset;
1857
+ const void *src;
1858
+ size_t spitch;
1859
+ size_t width;
1860
+ size_t height;
1861
+ enum cudaMemcpyKind kind;
1862
+ cudaStream_t stream;
1863
+ } cudaMemcpy2DToArrayAsync_v3020_params;
1864
+
1865
+ typedef struct cudaMemcpyFromArrayAsync_v3020_params_st {
1866
+ void *dst;
1867
+ cudaArray_const_t src;
1868
+ size_t wOffset;
1869
+ size_t hOffset;
1870
+ size_t count;
1871
+ enum cudaMemcpyKind kind;
1872
+ cudaStream_t stream;
1873
+ } cudaMemcpyFromArrayAsync_v3020_params;
1874
+
1875
+ typedef struct cudaMemcpy2DFromArrayAsync_v3020_params_st {
1876
+ void *dst;
1877
+ size_t dpitch;
1878
+ cudaArray_const_t src;
1879
+ size_t wOffset;
1880
+ size_t hOffset;
1881
+ size_t width;
1882
+ size_t height;
1883
+ enum cudaMemcpyKind kind;
1884
+ cudaStream_t stream;
1885
+ } cudaMemcpy2DFromArrayAsync_v3020_params;
1886
+
1887
+ typedef struct cudaMemcpy3DAsync_v3020_params_st {
1888
+ const struct cudaMemcpy3DParms *p;
1889
+ cudaStream_t stream;
1890
+ } cudaMemcpy3DAsync_v3020_params;
1891
+
1892
+ typedef struct cudaMemcpy3DPeerAsync_v4000_params_st {
1893
+ const struct cudaMemcpy3DPeerParms *p;
1894
+ cudaStream_t stream;
1895
+ } cudaMemcpy3DPeerAsync_v4000_params;
1896
+
1897
+ typedef struct cudaMemsetAsync_v3020_params_st {
1898
+ void *devPtr;
1899
+ int value;
1900
+ size_t count;
1901
+ cudaStream_t stream;
1902
+ } cudaMemsetAsync_v3020_params;
1903
+
1904
+ typedef struct cudaMemset2DAsync_v3020_params_st {
1905
+ void *devPtr;
1906
+ size_t pitch;
1907
+ int value;
1908
+ size_t width;
1909
+ size_t height;
1910
+ cudaStream_t stream;
1911
+ } cudaMemset2DAsync_v3020_params;
1912
+
1913
+ typedef struct cudaMemset3DAsync_v3020_params_st {
1914
+ struct cudaPitchedPtr pitchedDevPtr;
1915
+ int value;
1916
+ struct cudaExtent extent;
1917
+ cudaStream_t stream;
1918
+ } cudaMemset3DAsync_v3020_params;
1919
+
1920
+ typedef struct cudaStreamQuery_v3020_params_st {
1921
+ cudaStream_t stream;
1922
+ } cudaStreamQuery_v3020_params;
1923
+
1924
+ typedef struct cudaStreamGetFlags_v5050_params_st {
1925
+ cudaStream_t hStream;
1926
+ unsigned int *flags;
1927
+ } cudaStreamGetFlags_v5050_params;
1928
+
1929
+ typedef struct cudaStreamGetPriority_v5050_params_st {
1930
+ cudaStream_t hStream;
1931
+ int *priority;
1932
+ } cudaStreamGetPriority_v5050_params;
1933
+
1934
+ typedef struct cudaEventRecord_v3020_params_st {
1935
+ cudaEvent_t event;
1936
+ cudaStream_t stream;
1937
+ } cudaEventRecord_v3020_params;
1938
+
1939
+ typedef struct cudaEventRecordWithFlags_v11010_params_st {
1940
+ cudaEvent_t event;
1941
+ cudaStream_t stream;
1942
+ unsigned int flags;
1943
+ } cudaEventRecordWithFlags_v11010_params;
1944
+
1945
+ typedef struct cudaStreamWaitEvent_v3020_params_st {
1946
+ cudaStream_t stream;
1947
+ cudaEvent_t event;
1948
+ unsigned int flags;
1949
+ } cudaStreamWaitEvent_v3020_params;
1950
+
1951
+ typedef struct cudaStreamAddCallback_v5000_params_st {
1952
+ cudaStream_t stream;
1953
+ cudaStreamCallback_t callback;
1954
+ void *userData;
1955
+ unsigned int flags;
1956
+ } cudaStreamAddCallback_v5000_params;
1957
+
1958
+ typedef struct cudaStreamAttachMemAsync_v6000_params_st {
1959
+ cudaStream_t stream;
1960
+ void *devPtr;
1961
+ size_t length;
1962
+ unsigned int flags;
1963
+ } cudaStreamAttachMemAsync_v6000_params;
1964
+
1965
+ typedef struct cudaStreamSynchronize_v3020_params_st {
1966
+ cudaStream_t stream;
1967
+ } cudaStreamSynchronize_v3020_params;
1968
+
1969
+ typedef struct cudaLaunchKernel_v7000_params_st {
1970
+ const void *func;
1971
+ dim3 gridDim;
1972
+ dim3 blockDim;
1973
+ void **args;
1974
+ size_t sharedMem;
1975
+ cudaStream_t stream;
1976
+ } cudaLaunchKernel_v7000_params;
1977
+
1978
+ typedef struct cudaLaunchKernelExC_v11060_params_st {
1979
+ const cudaLaunchConfig_t *config;
1980
+ const void *func;
1981
+ void **args;
1982
+ } cudaLaunchKernelExC_v11060_params;
1983
+
1984
+ typedef struct cudaLaunchCooperativeKernel_v9000_params_st {
1985
+ const void *func;
1986
+ dim3 gridDim;
1987
+ dim3 blockDim;
1988
+ void **args;
1989
+ size_t sharedMem;
1990
+ cudaStream_t stream;
1991
+ } cudaLaunchCooperativeKernel_v9000_params;
1992
+
1993
+ typedef struct cudaLaunchHostFunc_v10000_params_st {
1994
+ cudaStream_t stream;
1995
+ cudaHostFn_t fn;
1996
+ void *userData;
1997
+ } cudaLaunchHostFunc_v10000_params;
1998
+
1999
+ typedef struct cudaMemPrefetchAsync_v8000_params_st {
2000
+ const void *devPtr;
2001
+ size_t count;
2002
+ int dstDevice;
2003
+ cudaStream_t stream;
2004
+ } cudaMemPrefetchAsync_v8000_params;
2005
+
2006
+ typedef struct cudaSignalExternalSemaphoresAsync_v10000_params_st {
2007
+ const cudaExternalSemaphore_t *extSemArray;
2008
+ const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
2009
+ unsigned int numExtSems;
2010
+ cudaStream_t stream;
2011
+ } cudaSignalExternalSemaphoresAsync_v10000_params;
2012
+
2013
+ typedef struct cudaSignalExternalSemaphoresAsync_ptsz_v10000_params_st {
2014
+ const cudaExternalSemaphore_t *extSemArray;
2015
+ const struct cudaExternalSemaphoreSignalParams_v1 *paramsArray;
2016
+ unsigned int numExtSems;
2017
+ cudaStream_t stream;
2018
+ } cudaSignalExternalSemaphoresAsync_ptsz_v10000_params;
2019
+
2020
+ typedef struct cudaSignalExternalSemaphoresAsync_v2_v11020_params_st {
2021
+ const cudaExternalSemaphore_t *extSemArray;
2022
+ const struct cudaExternalSemaphoreSignalParams *paramsArray;
2023
+ unsigned int numExtSems;
2024
+ cudaStream_t stream;
2025
+ } cudaSignalExternalSemaphoresAsync_v2_v11020_params;
2026
+
2027
+ typedef struct cudaWaitExternalSemaphoresAsync_v10000_params_st {
2028
+ const cudaExternalSemaphore_t *extSemArray;
2029
+ const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
2030
+ unsigned int numExtSems;
2031
+ cudaStream_t stream;
2032
+ } cudaWaitExternalSemaphoresAsync_v10000_params;
2033
+
2034
+ typedef struct cudaWaitExternalSemaphoresAsync_ptsz_v10000_params_st {
2035
+ const cudaExternalSemaphore_t *extSemArray;
2036
+ const struct cudaExternalSemaphoreWaitParams_v1 *paramsArray;
2037
+ unsigned int numExtSems;
2038
+ cudaStream_t stream;
2039
+ } cudaWaitExternalSemaphoresAsync_ptsz_v10000_params;
2040
+
2041
+ typedef struct cudaWaitExternalSemaphoresAsync_v2_v11020_params_st {
2042
+ const cudaExternalSemaphore_t *extSemArray;
2043
+ const struct cudaExternalSemaphoreWaitParams *paramsArray;
2044
+ unsigned int numExtSems;
2045
+ cudaStream_t stream;
2046
+ } cudaWaitExternalSemaphoresAsync_v2_v11020_params;
2047
+
2048
+ typedef struct cudaGraphUpload_v10000_params_st {
2049
+ cudaGraphExec_t graphExec;
2050
+ cudaStream_t stream;
2051
+ } cudaGraphUpload_v10000_params;
2052
+
2053
+ typedef struct cudaGraphLaunch_v10000_params_st {
2054
+ cudaGraphExec_t graphExec;
2055
+ cudaStream_t stream;
2056
+ } cudaGraphLaunch_v10000_params;
2057
+
2058
+ typedef struct cudaStreamBeginCapture_v10000_params_st {
2059
+ cudaStream_t stream;
2060
+ enum cudaStreamCaptureMode mode;
2061
+ } cudaStreamBeginCapture_v10000_params;
2062
+
2063
+ typedef struct cudaStreamEndCapture_v10000_params_st {
2064
+ cudaStream_t stream;
2065
+ cudaGraph_t *pGraph;
2066
+ } cudaStreamEndCapture_v10000_params;
2067
+
2068
+ typedef struct cudaStreamIsCapturing_v10000_params_st {
2069
+ cudaStream_t stream;
2070
+ enum cudaStreamCaptureStatus *pCaptureStatus;
2071
+ } cudaStreamIsCapturing_v10000_params;
2072
+
2073
+ typedef struct cudaStreamGetCaptureInfo_v10010_params_st {
2074
+ cudaStream_t stream;
2075
+ enum cudaStreamCaptureStatus *captureStatus_out;
2076
+ unsigned long long *id_out;
2077
+ } cudaStreamGetCaptureInfo_v10010_params;
2078
+
2079
+ typedef struct cudaStreamGetCaptureInfo_v2_v11030_params_st {
2080
+ cudaStream_t stream;
2081
+ enum cudaStreamCaptureStatus *captureStatus_out;
2082
+ unsigned long long *id_out;
2083
+ cudaGraph_t *graph_out;
2084
+ const cudaGraphNode_t **dependencies_out;
2085
+ size_t *numDependencies_out;
2086
+ } cudaStreamGetCaptureInfo_v2_v11030_params;
2087
+
2088
+ typedef struct cudaStreamUpdateCaptureDependencies_ptsz_v11030_params_st {
2089
+ cudaStream_t stream;
2090
+ cudaGraphNode_t *dependencies;
2091
+ size_t numDependencies;
2092
+ unsigned int flags;
2093
+ } cudaStreamUpdateCaptureDependencies_ptsz_v11030_params;
2094
+
2095
+ typedef struct cudaStreamCopyAttributes_v11000_params_st {
2096
+ cudaStream_t dstStream;
2097
+ cudaStream_t srcStream;
2098
+ } cudaStreamCopyAttributes_v11000_params;
2099
+
2100
+ typedef struct cudaStreamGetAttribute_v11000_params_st {
2101
+ cudaStream_t stream;
2102
+ cudaStreamAttrID attr;
2103
+ cudaStreamAttrValue *value;
2104
+ } cudaStreamGetAttribute_v11000_params;
2105
+
2106
+ typedef struct cudaStreamSetAttribute_v11000_params_st {
2107
+ cudaStream_t stream;
2108
+ cudaStreamAttrID attr;
2109
+ const cudaStreamAttrValue *param;
2110
+ } cudaStreamSetAttribute_v11000_params;
2111
+
2112
+ typedef struct cudaMallocAsync_v11020_params_st {
2113
+ void **devPtr;
2114
+ size_t size;
2115
+ cudaStream_t hStream;
2116
+ } cudaMallocAsync_v11020_params;
2117
+
2118
+ typedef struct cudaFreeAsync_v11020_params_st {
2119
+ void *devPtr;
2120
+ cudaStream_t hStream;
2121
+ } cudaFreeAsync_v11020_params;
2122
+
2123
+ typedef struct cudaMallocFromPoolAsync_v11020_params_st {
2124
+ void **ptr;
2125
+ size_t size;
2126
+ cudaMemPool_t memPool;
2127
+ cudaStream_t stream;
2128
+ } cudaMallocFromPoolAsync_v11020_params;
2129
+
2130
+ typedef struct cudaGetDriverEntryPoint_v11030_params_st {
2131
+ const char *symbol;
2132
+ void **funcPtr;
2133
+ unsigned long long flags;
2134
+ } cudaGetDriverEntryPoint_v11030_params;
2135
+
2136
+ // Parameter trace structures for removed functions
2137
+
2138
+
2139
+ // End of parameter trace structures
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/include/generated_cuda_vdpau_interop_meta.h ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // This file is generated. Any changes you make will be lost during the next clean build.
2
+
3
+ // CUDA public interface, for type definitions and api function prototypes
4
+ #include "cuda_vdpau_interop.h"
5
+
6
+ // *************************************************************************
7
+ // Definitions of structs to hold parameters for each function
8
+ // *************************************************************************
9
+
10
+ // Currently used parameter trace structures
11
+ typedef struct cudaVDPAUGetDevice_v3020_params_st {
12
+ int *device;
13
+ VdpDevice vdpDevice;
14
+ VdpGetProcAddress *vdpGetProcAddress;
15
+ } cudaVDPAUGetDevice_v3020_params;
16
+
17
+ typedef struct cudaVDPAUSetVDPAUDevice_v3020_params_st {
18
+ int device;
19
+ VdpDevice vdpDevice;
20
+ VdpGetProcAddress *vdpGetProcAddress;
21
+ } cudaVDPAUSetVDPAUDevice_v3020_params;
22
+
23
+ typedef struct cudaGraphicsVDPAURegisterVideoSurface_v3020_params_st {
24
+ struct cudaGraphicsResource **resource;
25
+ VdpVideoSurface vdpSurface;
26
+ unsigned int flags;
27
+ } cudaGraphicsVDPAURegisterVideoSurface_v3020_params;
28
+
29
+ typedef struct cudaGraphicsVDPAURegisterOutputSurface_v3020_params_st {
30
+ struct cudaGraphicsResource **resource;
31
+ VdpOutputSurface vdpSurface;
32
+ unsigned int flags;
33
+ } cudaGraphicsVDPAURegisterOutputSurface_v3020_params;
34
+
35
+ // Parameter trace structures for removed functions
36
+
37
+
38
+ // End of parameter trace structures
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/include/cudnn_cnn_infer.h ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright 2017-2022 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /*
51
+ * cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions.
52
+ */
53
+
54
+ #if !defined(CUDNN_CNN_INFER_H_)
55
+ #define CUDNN_CNN_INFER_H_
56
+
57
+ #pragma once
58
+ #include <cuda_runtime.h>
59
+ #include <stdint.h>
60
+
61
+ #include "cudnn_version.h"
62
+ #include "cudnn_ops_infer.h"
63
+
64
+ /* These version numbers are autogenerated, do not edit manually. */
65
+ #define CUDNN_CNN_INFER_MAJOR 8
66
+ #define CUDNN_CNN_INFER_MINOR 7
67
+ #define CUDNN_CNN_INFER_PATCH 0
68
+
69
+ #if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) || \
70
+ (CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL)
71
+ #error Version mismatch in cuDNN CNN INFER!!!
72
+ #endif
73
+
74
+ #if defined(__cplusplus)
75
+ extern "C" {
76
+ #endif
77
+
78
+ typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t;
79
+
80
+ /*
81
+ * convolution mode
82
+ */
83
+ typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
84
+
85
+ /*
86
+ * CUDNN Reorder
87
+ */
88
+ typedef enum {
89
+ CUDNN_DEFAULT_REORDER = 0,
90
+ CUDNN_NO_REORDER = 1,
91
+ } cudnnReorderType_t;
92
+
93
+ typedef struct cudnnConvolutionFwdAlgoPerfStruct {
94
+ cudnnConvolutionFwdAlgo_t algo;
95
+ cudnnStatus_t status;
96
+ float time;
97
+ size_t memory;
98
+ cudnnDeterminism_t determinism;
99
+ cudnnMathType_t mathType;
100
+ int reserved[3];
101
+ } cudnnConvolutionFwdAlgoPerf_t;
102
+
103
+ /* Create an instance of convolution descriptor */
104
+ cudnnStatus_t CUDNNWINAPI
105
+ cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
106
+
107
+ /* Destroy an instance of convolution descriptor */
108
+ cudnnStatus_t CUDNNWINAPI
109
+ cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
110
+
111
+ cudnnStatus_t CUDNNWINAPI
112
+ cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
113
+
114
+ cudnnStatus_t CUDNNWINAPI
115
+ cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
116
+
117
+ cudnnStatus_t CUDNNWINAPI
118
+ cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
119
+
120
+ cudnnStatus_t CUDNNWINAPI
121
+ cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
122
+
123
+ cudnnStatus_t CUDNNWINAPI
124
+ cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
125
+
126
+ cudnnStatus_t CUDNNWINAPI
127
+ cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
128
+
129
+ cudnnStatus_t CUDNNWINAPI
130
+ cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
131
+ int pad_h, /* zero-padding height */
132
+ int pad_w, /* zero-padding width */
133
+ int u, /* vertical filter stride */
134
+ int v, /* horizontal filter stride */
135
+ int dilation_h, /* filter dilation in the vertical dimension */
136
+ int dilation_w, /* filter dilation in the horizontal dimension */
137
+ cudnnConvolutionMode_t mode,
138
+ cudnnDataType_t computeType);
139
+
140
+ cudnnStatus_t CUDNNWINAPI
141
+ cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
142
+ int *pad_h, /* zero-padding height */
143
+ int *pad_w, /* zero-padding width */
144
+ int *u, /* vertical filter stride */
145
+ int *v, /* horizontal filter stride */
146
+ int *dilation_h, /* filter dilation in the vertical dimension */
147
+ int *dilation_w, /* filter dilation in the horizontal dimension */
148
+ cudnnConvolutionMode_t *mode,
149
+ cudnnDataType_t *computeType);
150
+
151
+ cudnnStatus_t CUDNNWINAPI
152
+ cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
153
+ int arrayLength, /* nbDims-2 size */
154
+ const int padA[],
155
+ const int filterStrideA[],
156
+ const int dilationA[],
157
+ cudnnConvolutionMode_t mode,
158
+ cudnnDataType_t computeType); /* convolution data type */
159
+
160
+ /* Helper function to return the dimensions of the output tensor given a convolution descriptor */
161
+ cudnnStatus_t CUDNNWINAPI
162
+ cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
163
+ int arrayLengthRequested,
164
+ int *arrayLength,
165
+ int padA[],
166
+ int strideA[],
167
+ int dilationA[],
168
+ cudnnConvolutionMode_t *mode,
169
+ cudnnDataType_t *computeType); /* convolution data type */
170
+
171
+ cudnnStatus_t CUDNNWINAPI
172
+ cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
173
+ const cudnnTensorDescriptor_t inputTensorDesc,
174
+ const cudnnFilterDescriptor_t filterDesc,
175
+ int *n,
176
+ int *c,
177
+ int *h,
178
+ int *w);
179
+
180
+ /* Helper function to return the dimensions of the output tensor given a convolution descriptor */
181
+ cudnnStatus_t CUDNNWINAPI
182
+ cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
183
+ const cudnnTensorDescriptor_t inputTensorDesc,
184
+ const cudnnFilterDescriptor_t filterDesc,
185
+ int nbDims,
186
+ int tensorOuputDimA[]);
187
+
188
+ /* helper function to provide the convolution forward algo that fit best the requirement */
189
+ cudnnStatus_t CUDNNWINAPI
190
+ cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
191
+
192
+ cudnnStatus_t CUDNNWINAPI
193
+ cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
194
+ const cudnnTensorDescriptor_t srcDesc,
195
+ const cudnnFilterDescriptor_t filterDesc,
196
+ const cudnnConvolutionDescriptor_t convDesc,
197
+ const cudnnTensorDescriptor_t destDesc,
198
+ const int requestedAlgoCount,
199
+ int *returnedAlgoCount,
200
+ cudnnConvolutionFwdAlgoPerf_t *perfResults);
201
+
202
+ cudnnStatus_t CUDNNWINAPI
203
+ cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
204
+ const cudnnTensorDescriptor_t xDesc,
205
+ const cudnnFilterDescriptor_t wDesc,
206
+ const cudnnConvolutionDescriptor_t convDesc,
207
+ const cudnnTensorDescriptor_t yDesc,
208
+ const int requestedAlgoCount,
209
+ int *returnedAlgoCount,
210
+ cudnnConvolutionFwdAlgoPerf_t *perfResults);
211
+
212
+ cudnnStatus_t CUDNNWINAPI
213
+ cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
214
+ const cudnnTensorDescriptor_t xDesc,
215
+ const void *x,
216
+ const cudnnFilterDescriptor_t wDesc,
217
+ const void *w,
218
+ const cudnnConvolutionDescriptor_t convDesc,
219
+ const cudnnTensorDescriptor_t yDesc,
220
+ void *y,
221
+ const int requestedAlgoCount,
222
+ int *returnedAlgoCount,
223
+ cudnnConvolutionFwdAlgoPerf_t *perfResults,
224
+ void *workSpace,
225
+ size_t workSpaceSizeInBytes);
226
+
227
+ cudnnStatus_t CUDNNWINAPI
228
+ cudnnIm2Col(cudnnHandle_t handle,
229
+ const cudnnTensorDescriptor_t xDesc,
230
+ const void *x,
231
+ const cudnnFilterDescriptor_t wDesc,
232
+ const cudnnConvolutionDescriptor_t convDesc,
233
+ void *colBuffer);
234
+
235
+ cudnnStatus_t CUDNNWINAPI
236
+ cudnnReorderFilterAndBias(cudnnHandle_t handle,
237
+ const cudnnFilterDescriptor_t filterDesc,
238
+ cudnnReorderType_t reorderType,
239
+ const void *filterData,
240
+ void *reorderedFilterData,
241
+ int reorderBias,
242
+ const void *biasData,
243
+ void *reorderedBiasData);
244
+
245
+ /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
246
+ cudnnStatus_t CUDNNWINAPI
247
+ cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
248
+ const cudnnTensorDescriptor_t xDesc,
249
+ const cudnnFilterDescriptor_t wDesc,
250
+ const cudnnConvolutionDescriptor_t convDesc,
251
+ const cudnnTensorDescriptor_t yDesc,
252
+ cudnnConvolutionFwdAlgo_t algo,
253
+ size_t *sizeInBytes);
254
+
255
+ /* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
256
+
257
+ /* Function to perform the forward pass for batch convolution */
258
+ cudnnStatus_t CUDNNWINAPI
259
+ cudnnConvolutionForward(cudnnHandle_t handle,
260
+ const void *alpha,
261
+ const cudnnTensorDescriptor_t xDesc,
262
+ const void *x,
263
+ const cudnnFilterDescriptor_t wDesc,
264
+ const void *w,
265
+ const cudnnConvolutionDescriptor_t convDesc,
266
+ cudnnConvolutionFwdAlgo_t algo,
267
+ void *workSpace,
268
+ size_t workSpaceSizeInBytes,
269
+ const void *beta,
270
+ const cudnnTensorDescriptor_t yDesc,
271
+ void *y);
272
+
273
+ /* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
274
+ cudnnStatus_t CUDNNWINAPI
275
+ cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
276
+ const void *alpha1,
277
+ const cudnnTensorDescriptor_t xDesc,
278
+ const void *x,
279
+ const cudnnFilterDescriptor_t wDesc,
280
+ const void *w,
281
+ const cudnnConvolutionDescriptor_t convDesc,
282
+ cudnnConvolutionFwdAlgo_t algo,
283
+ void *workSpace,
284
+ size_t workSpaceSizeInBytes,
285
+ const void *alpha2,
286
+ const cudnnTensorDescriptor_t zDesc,
287
+ const void *z,
288
+ const cudnnTensorDescriptor_t biasDesc,
289
+ const void *bias,
290
+ const cudnnActivationDescriptor_t activationDesc,
291
+ const cudnnTensorDescriptor_t yDesc,
292
+ void *y);
293
+
294
+ /* helper function to provide the convolution backward data algo that fit best the requirement */
295
+
296
+ typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
297
+ cudnnConvolutionBwdDataAlgo_t algo;
298
+ cudnnStatus_t status;
299
+ float time;
300
+ size_t memory;
301
+ cudnnDeterminism_t determinism;
302
+ cudnnMathType_t mathType;
303
+ int reserved[3];
304
+ } cudnnConvolutionBwdDataAlgoPerf_t;
305
+
306
+ cudnnStatus_t CUDNNWINAPI
307
+ cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
308
+
309
+ cudnnStatus_t CUDNNWINAPI
310
+ cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
311
+ const cudnnFilterDescriptor_t wDesc,
312
+ const cudnnTensorDescriptor_t dyDesc,
313
+ const cudnnConvolutionDescriptor_t convDesc,
314
+ const cudnnTensorDescriptor_t dxDesc,
315
+ const int requestedAlgoCount,
316
+ int *returnedAlgoCount,
317
+ cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
318
+
319
+ cudnnStatus_t CUDNNWINAPI
320
+ cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
321
+ const cudnnFilterDescriptor_t wDesc,
322
+ const void *w,
323
+ const cudnnTensorDescriptor_t dyDesc,
324
+ const void *dy,
325
+ const cudnnConvolutionDescriptor_t convDesc,
326
+ const cudnnTensorDescriptor_t dxDesc,
327
+ void *dx,
328
+ const int requestedAlgoCount,
329
+ int *returnedAlgoCount,
330
+ cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
331
+ void *workSpace,
332
+ size_t workSpaceSizeInBytes);
333
+
334
+ cudnnStatus_t CUDNNWINAPI
335
+ cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
336
+ const cudnnFilterDescriptor_t filterDesc,
337
+ const cudnnTensorDescriptor_t diffDesc,
338
+ const cudnnConvolutionDescriptor_t convDesc,
339
+ const cudnnTensorDescriptor_t gradDesc,
340
+ const int requestedAlgoCount,
341
+ int *returnedAlgoCount,
342
+ cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
343
+
344
+ /*
345
+ * convolution algorithm (which requires potentially some workspace)
346
+ */
347
+
348
+ /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
349
+ cudnnStatus_t CUDNNWINAPI
350
+ cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
351
+ const cudnnFilterDescriptor_t wDesc,
352
+ const cudnnTensorDescriptor_t dyDesc,
353
+ const cudnnConvolutionDescriptor_t convDesc,
354
+ const cudnnTensorDescriptor_t dxDesc,
355
+ cudnnConvolutionBwdDataAlgo_t algo,
356
+ size_t *sizeInBytes);
357
+
358
+ cudnnStatus_t CUDNNWINAPI
359
+ cudnnConvolutionBackwardData(cudnnHandle_t handle,
360
+ const void *alpha,
361
+ const cudnnFilterDescriptor_t wDesc,
362
+ const void *w,
363
+ const cudnnTensorDescriptor_t dyDesc,
364
+ const void *dy,
365
+ const cudnnConvolutionDescriptor_t convDesc,
366
+ cudnnConvolutionBwdDataAlgo_t algo,
367
+ void *workSpace,
368
+ size_t workSpaceSizeInBytes,
369
+ const void *beta,
370
+ const cudnnTensorDescriptor_t dxDesc,
371
+ void *dx);
372
+
373
+ /* Helper function to calculate folding descriptors for dgrad */
374
+ cudnnStatus_t CUDNNWINAPI
375
+ cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
376
+ const cudnnFilterDescriptor_t filterDesc,
377
+ const cudnnTensorDescriptor_t diffDesc,
378
+ const cudnnConvolutionDescriptor_t convDesc,
379
+ const cudnnTensorDescriptor_t gradDesc,
380
+ const cudnnTensorFormat_t transformFormat,
381
+ cudnnFilterDescriptor_t foldedFilterDesc,
382
+ cudnnTensorDescriptor_t paddedDiffDesc,
383
+ cudnnConvolutionDescriptor_t foldedConvDesc,
384
+ cudnnTensorDescriptor_t foldedGradDesc,
385
+ cudnnTensorTransformDescriptor_t filterFoldTransDesc,
386
+ cudnnTensorTransformDescriptor_t diffPadTransDesc,
387
+ cudnnTensorTransformDescriptor_t gradFoldTransDesc,
388
+ cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
389
+
390
+ /* cudnnFusedOps... */
391
+ struct cudnnFusedOpsConstParamStruct;
392
+ typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t;
393
+
394
+ struct cudnnFusedOpsVariantParamStruct;
395
+ typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t;
396
+
397
+ struct cudnnFusedOpsPlanStruct;
398
+ typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t;
399
+
400
+ typedef enum {
401
+ /* each op in [ ] can be disabled by passing NULL ptr */
402
+ /* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
403
+ CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
404
+ /* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
405
+ CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
406
+ /* utility for BN training in BN-conv fusion */
407
+ /* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
408
+ /* optionally update running stats and generate saved stats */
409
+ CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
410
+ /* utility for BN inference in BN-conv fusion */
411
+ /* computes the equivalent scale and bias from learned running stats and learned scale, bias */
412
+ CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
413
+ /* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
414
+ CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
415
+ /* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */
416
+ CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
417
+ /* reserved for future use */
418
+ CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
419
+ } cudnnFusedOps_t;
420
+
421
+ typedef enum {
422
+ /* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
423
+ /* get XDESC: pass previously created cudnnTensorDescriptor_t */
424
+ CUDNN_PARAM_XDESC = 0,
425
+ /* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
426
+ CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
427
+ /* set/get BN_MODE: pass cudnnBatchNormMode_t* */
428
+ CUDNN_PARAM_BN_MODE = 2,
429
+ /* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
430
+ /* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
431
+ CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
432
+ /* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
433
+ CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
434
+ /* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
435
+ CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
436
+ /* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
437
+ /* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
438
+ CUDNN_PARAM_ACTIVATION_DESC = 6,
439
+ /* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
440
+ /* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
441
+ CUDNN_PARAM_CONV_DESC = 7,
442
+ /* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
443
+ /* get WDESC: pass previously created cudnnFilterDescriptor_t */
444
+ CUDNN_PARAM_WDESC = 8,
445
+ /* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
446
+ CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
447
+ /* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
448
+ /* get DWDESC: pass previously created cudnnFilterDescriptor_t */
449
+ CUDNN_PARAM_DWDESC = 10,
450
+ /* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
451
+ CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
452
+ /* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
453
+ /* get YDESC: pass previously created cudnnTensorDescriptor_t */
454
+ CUDNN_PARAM_YDESC = 12,
455
+ /* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
456
+ CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
457
+ /* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
458
+ /* get DYDESC: pass previously created cudnnTensorDescriptor_t */
459
+ CUDNN_PARAM_DYDESC = 14,
460
+ /* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
461
+ CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
462
+ /* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
463
+ /* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
464
+ CUDNN_PARAM_YSTATS_DESC = 16,
465
+ /* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
466
+ CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
467
+ /* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
468
+ CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
469
+ /* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
470
+ /* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
471
+ CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
472
+ /* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
473
+ CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
474
+ /* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
475
+ CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
476
+ /* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
477
+ CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
478
+ /* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
479
+ CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
480
+ /* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
481
+ CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
482
+ /* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
483
+ CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
484
+
485
+ /* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
486
+ /* get ZDESC: pass previously created cudnnTensorDescriptor_t */
487
+ CUDNN_PARAM_ZDESC = 26,
488
+ /* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
489
+ CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
490
+ /* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
491
+ /* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
492
+ CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
493
+ /* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
494
+ CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
495
+ /* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
496
+ CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
497
+
498
+ /* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
499
+ /* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
500
+ CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
501
+ /* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
502
+ CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
503
+
504
+ /* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
505
+ /* get DXDESC: pass previously created cudnnTensorDescriptor_t */
506
+ CUDNN_PARAM_DXDESC = 33,
507
+ /* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
508
+ CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
509
+ /* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
510
+ /* get DZDESC: pass previously created cudnnTensorDescriptor_t */
511
+ CUDNN_PARAM_DZDESC = 35,
512
+ /* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
513
+ CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
514
+ /* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
515
+ CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
516
+ /* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
517
+ CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
518
+ } cudnnFusedOpsConstParamLabel_t;
519
+
520
+ typedef enum {
521
+ CUDNN_PTR_NULL = 0,
522
+ CUDNN_PTR_ELEM_ALIGNED = 1,
523
+ CUDNN_PTR_16B_ALIGNED = 2,
524
+ } cudnnFusedOpsPointerPlaceHolder_t;
525
+
526
+ typedef enum {
527
+ /* set: pass void* pointing to dev memory */
528
+ /* get: pass void** pointing to host memory */
529
+ CUDNN_PTR_XDATA = 0,
530
+ CUDNN_PTR_BN_EQSCALE = 1,
531
+ CUDNN_PTR_BN_EQBIAS = 2,
532
+ CUDNN_PTR_WDATA = 3,
533
+ CUDNN_PTR_DWDATA = 4,
534
+ CUDNN_PTR_YDATA = 5,
535
+ CUDNN_PTR_DYDATA = 6,
536
+ CUDNN_PTR_YSUM = 7,
537
+ CUDNN_PTR_YSQSUM = 8,
538
+ CUDNN_PTR_WORKSPACE = 9,
539
+ CUDNN_PTR_BN_SCALE = 10,
540
+ CUDNN_PTR_BN_BIAS = 11,
541
+ CUDNN_PTR_BN_SAVED_MEAN = 12,
542
+ CUDNN_PTR_BN_SAVED_INVSTD = 13,
543
+ CUDNN_PTR_BN_RUNNING_MEAN = 14,
544
+ CUDNN_PTR_BN_RUNNING_VAR = 15,
545
+ CUDNN_PTR_ZDATA = 16,
546
+ CUDNN_PTR_BN_Z_EQSCALE = 17,
547
+ CUDNN_PTR_BN_Z_EQBIAS = 18,
548
+ CUDNN_PTR_ACTIVATION_BITMASK = 19,
549
+ CUDNN_PTR_DXDATA = 20,
550
+ CUDNN_PTR_DZDATA = 21,
551
+ CUDNN_PTR_BN_DSCALE = 22,
552
+ CUDNN_PTR_BN_DBIAS = 23,
553
+
554
+ /* set/get: pass size_t* pointing to host memory */
555
+ CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
556
+ /* set/get: pass int64_t* pointing to host memory */
557
+ CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
558
+ /* set/get: pass double* pointing to host memory */
559
+ CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
560
+ /* set/get: pass double* pointing to host memory */
561
+ CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
562
+ } cudnnFusedOpsVariantParamLabel_t;
563
+
564
+ cudnnStatus_t CUDNNWINAPI
565
+ cudnnCnnInferVersionCheck(void);
566
+
567
+ #if defined(__cplusplus)
568
+ }
569
+ #endif
570
+
571
+ #endif /* CUDNN_CNN_INFER_H_ */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (221 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/include/cufftw.h ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ /* Copyright 2005-2014 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * The source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * The Licensed Deliverables contained herein are PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and are being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /*!
51
+ * \file cufftw.h
52
+ * \brief Public header file for the NVIDIA CUDA FFTW library (CUFFTW)
53
+ */
54
+
55
+ #ifndef _CUFFTW_H_
56
+ #define _CUFFTW_H_
57
+
58
+
59
+ #include <stdio.h>
60
+ #include "cufft.h"
61
+
62
+ #ifdef __cplusplus
63
+ extern "C" {
64
+ #endif
65
+
66
+ // transform direction
67
+ #define FFTW_FORWARD -1
68
+ #define FFTW_INVERSE 1
69
+ #define FFTW_BACKWARD 1
70
+
71
+ // Planner flags
72
+
73
+ #define FFTW_ESTIMATE 0x01
74
+ #define FFTW_MEASURE 0x02
75
+ #define FFTW_PATIENT 0x03
76
+ #define FFTW_EXHAUSTIVE 0x04
77
+ #define FFTW_WISDOM_ONLY 0x05
78
+
79
+ //Algorithm restriction flags
80
+
81
+ #define FFTW_DESTROY_INPUT 0x08
82
+ #define FFTW_PRESERVE_INPUT 0x0C
83
+ #define FFTW_UNALIGNED 0x10
84
+
85
+ // CUFFTW defines and supports the following data types
86
+
87
+ // note if complex.h has been included we use the C99 complex types
88
+ #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined (complex)
89
+ typedef double _Complex fftw_complex;
90
+ typedef float _Complex fftwf_complex;
91
+ #else
92
+ typedef double fftw_complex[2];
93
+ typedef float fftwf_complex[2];
94
+ #endif
95
+
96
+ typedef void *fftw_plan;
97
+
98
+ typedef void *fftwf_plan;
99
+
100
+ typedef struct {
101
+ int n;
102
+ int is;
103
+ int os;
104
+ } fftw_iodim;
105
+
106
+ typedef fftw_iodim fftwf_iodim;
107
+
108
+ typedef struct {
109
+ ptrdiff_t n;
110
+ ptrdiff_t is;
111
+ ptrdiff_t os;
112
+ } fftw_iodim64;
113
+
114
+ typedef fftw_iodim64 fftwf_iodim64;
115
+
116
+
117
+ // CUFFTW defines and supports the following double precision APIs
118
+
119
+
120
+ fftw_plan CUFFTAPI fftw_plan_dft_1d(int n,
121
+ fftw_complex *in,
122
+ fftw_complex *out,
123
+ int sign,
124
+ unsigned flags);
125
+
126
+ fftw_plan CUFFTAPI fftw_plan_dft_2d(int n0,
127
+ int n1,
128
+ fftw_complex *in,
129
+ fftw_complex *out,
130
+ int sign,
131
+ unsigned flags);
132
+
133
+ fftw_plan CUFFTAPI fftw_plan_dft_3d(int n0,
134
+ int n1,
135
+ int n2,
136
+ fftw_complex *in,
137
+ fftw_complex *out,
138
+ int sign,
139
+ unsigned flags);
140
+
141
+ fftw_plan CUFFTAPI fftw_plan_dft(int rank,
142
+ const int *n,
143
+ fftw_complex *in,
144
+ fftw_complex *out,
145
+ int sign,
146
+ unsigned flags);
147
+
148
+ fftw_plan CUFFTAPI fftw_plan_dft_r2c_1d(int n,
149
+ double *in,
150
+ fftw_complex *out,
151
+ unsigned flags);
152
+
153
+ fftw_plan CUFFTAPI fftw_plan_dft_r2c_2d(int n0,
154
+ int n1,
155
+ double *in,
156
+ fftw_complex *out,
157
+ unsigned flags);
158
+
159
+ fftw_plan CUFFTAPI fftw_plan_dft_r2c_3d(int n0,
160
+ int n1,
161
+ int n2,
162
+ double *in,
163
+ fftw_complex *out,
164
+ unsigned flags);
165
+
166
+ fftw_plan CUFFTAPI fftw_plan_dft_r2c(int rank,
167
+ const int *n,
168
+ double *in,
169
+ fftw_complex *out,
170
+ unsigned flags);
171
+
172
+ fftw_plan CUFFTAPI fftw_plan_dft_c2r_1d(int n,
173
+ fftw_complex *in,
174
+ double *out,
175
+ unsigned flags);
176
+
177
+ fftw_plan CUFFTAPI fftw_plan_dft_c2r_2d(int n0,
178
+ int n1,
179
+ fftw_complex *in,
180
+ double *out,
181
+ unsigned flags);
182
+
183
+ fftw_plan CUFFTAPI fftw_plan_dft_c2r_3d(int n0,
184
+ int n1,
185
+ int n2,
186
+ fftw_complex *in,
187
+ double *out,
188
+ unsigned flags);
189
+
190
+ fftw_plan CUFFTAPI fftw_plan_dft_c2r(int rank,
191
+ const int *n,
192
+ fftw_complex *in,
193
+ double *out,
194
+ unsigned flags);
195
+
196
+
197
+ fftw_plan CUFFTAPI fftw_plan_many_dft(int rank,
198
+ const int *n,
199
+ int batch,
200
+ fftw_complex *in,
201
+ const int *inembed, int istride, int idist,
202
+ fftw_complex *out,
203
+ const int *onembed, int ostride, int odist,
204
+ int sign, unsigned flags);
205
+
206
+ fftw_plan CUFFTAPI fftw_plan_many_dft_r2c(int rank,
207
+ const int *n,
208
+ int batch,
209
+ double *in,
210
+ const int *inembed, int istride, int idist,
211
+ fftw_complex *out,
212
+ const int *onembed, int ostride, int odist,
213
+ unsigned flags);
214
+
215
+ fftw_plan CUFFTAPI fftw_plan_many_dft_c2r(int rank,
216
+ const int *n,
217
+ int batch,
218
+ fftw_complex *in,
219
+ const int *inembed, int istride, int idist,
220
+ double *out,
221
+ const int *onembed, int ostride, int odist,
222
+ unsigned flags);
223
+
224
+ fftw_plan CUFFTAPI fftw_plan_guru_dft(int rank, const fftw_iodim *dims,
225
+ int batch_rank, const fftw_iodim *batch_dims,
226
+ fftw_complex *in, fftw_complex *out,
227
+ int sign, unsigned flags);
228
+
229
+ fftw_plan CUFFTAPI fftw_plan_guru_dft_r2c(int rank, const fftw_iodim *dims,
230
+ int batch_rank, const fftw_iodim *batch_dims,
231
+ double *in, fftw_complex *out,
232
+ unsigned flags);
233
+
234
+ fftw_plan CUFFTAPI fftw_plan_guru_dft_c2r(int rank, const fftw_iodim *dims,
235
+ int batch_rank, const fftw_iodim *batch_dims,
236
+ fftw_complex *in, double *out,
237
+ unsigned flags);
238
+
239
+ void CUFFTAPI fftw_execute(const fftw_plan plan);
240
+
241
+ void CUFFTAPI fftw_execute_dft(const fftw_plan plan,
242
+ fftw_complex *idata,
243
+ fftw_complex *odata);
244
+
245
+ void CUFFTAPI fftw_execute_dft_r2c(const fftw_plan plan,
246
+ double *idata,
247
+ fftw_complex *odata);
248
+
249
+ void CUFFTAPI fftw_execute_dft_c2r(const fftw_plan plan,
250
+ fftw_complex *idata,
251
+ double *odata);
252
+
253
+
254
+ // CUFFTW defines and supports the following single precision APIs
255
+
256
+ fftwf_plan CUFFTAPI fftwf_plan_dft_1d(int n,
257
+ fftwf_complex *in,
258
+ fftwf_complex *out,
259
+ int sign,
260
+ unsigned flags);
261
+
262
+ fftwf_plan CUFFTAPI fftwf_plan_dft_2d(int n0,
263
+ int n1,
264
+ fftwf_complex *in,
265
+ fftwf_complex *out,
266
+ int sign,
267
+ unsigned flags);
268
+
269
+ fftwf_plan CUFFTAPI fftwf_plan_dft_3d(int n0,
270
+ int n1,
271
+ int n2,
272
+ fftwf_complex *in,
273
+ fftwf_complex *out,
274
+ int sign,
275
+ unsigned flags);
276
+
277
+ fftwf_plan CUFFTAPI fftwf_plan_dft(int rank,
278
+ const int *n,
279
+ fftwf_complex *in,
280
+ fftwf_complex *out,
281
+ int sign,
282
+ unsigned flags);
283
+
284
+ fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_1d(int n,
285
+ float *in,
286
+ fftwf_complex *out,
287
+ unsigned flags);
288
+
289
+ fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_2d(int n0,
290
+ int n1,
291
+ float *in,
292
+ fftwf_complex *out,
293
+ unsigned flags);
294
+
295
+ fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_3d(int n0,
296
+ int n1,
297
+ int n2,
298
+ float *in,
299
+ fftwf_complex *out,
300
+ unsigned flags);
301
+
302
+ fftwf_plan CUFFTAPI fftwf_plan_dft_r2c(int rank,
303
+ const int *n,
304
+ float *in,
305
+ fftwf_complex *out,
306
+ unsigned flags);
307
+
308
+ fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_1d(int n,
309
+ fftwf_complex *in,
310
+ float *out,
311
+ unsigned flags);
312
+
313
+ fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_2d(int n0,
314
+ int n1,
315
+ fftwf_complex *in,
316
+ float *out,
317
+ unsigned flags);
318
+
319
+ fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_3d(int n0,
320
+ int n1,
321
+ int n2,
322
+ fftwf_complex *in,
323
+ float *out,
324
+ unsigned flags);
325
+
326
+ fftwf_plan CUFFTAPI fftwf_plan_dft_c2r(int rank,
327
+ const int *n,
328
+ fftwf_complex *in,
329
+ float *out,
330
+ unsigned flags);
331
+
332
+ fftwf_plan CUFFTAPI fftwf_plan_many_dft(int rank,
333
+ const int *n,
334
+ int batch,
335
+ fftwf_complex *in,
336
+ const int *inembed, int istride, int idist,
337
+ fftwf_complex *out,
338
+ const int *onembed, int ostride, int odist,
339
+ int sign, unsigned flags);
340
+
341
+ fftwf_plan CUFFTAPI fftwf_plan_many_dft_r2c(int rank,
342
+ const int *n,
343
+ int batch,
344
+ float *in,
345
+ const int *inembed, int istride, int idist,
346
+ fftwf_complex *out,
347
+ const int *onembed, int ostride, int odist,
348
+ unsigned flags);
349
+
350
+ fftwf_plan CUFFTAPI fftwf_plan_many_dft_c2r(int rank,
351
+ const int *n,
352
+ int batch,
353
+ fftwf_complex *in,
354
+ const int *inembed, int istride, int idist,
355
+ float *out,
356
+ const int *onembed, int ostride, int odist,
357
+ unsigned flags);
358
+
359
+ fftwf_plan CUFFTAPI fftwf_plan_guru_dft(int rank, const fftwf_iodim *dims,
360
+ int batch_rank, const fftwf_iodim *batch_dims,
361
+ fftwf_complex *in, fftwf_complex *out,
362
+ int sign, unsigned flags);
363
+
364
+ fftwf_plan CUFFTAPI fftwf_plan_guru_dft_r2c(int rank, const fftwf_iodim *dims,
365
+ int batch_rank, const fftwf_iodim *batch_dims,
366
+ float *in, fftwf_complex *out,
367
+ unsigned flags);
368
+
369
+ fftwf_plan CUFFTAPI fftwf_plan_guru_dft_c2r(int rank, const fftwf_iodim *dims,
370
+ int batch_rank, const fftwf_iodim *batch_dims,
371
+ fftwf_complex *in, float *out,
372
+ unsigned flags);
373
+
374
+ void CUFFTAPI fftwf_execute(const fftw_plan plan);
375
+
376
+ void CUFFTAPI fftwf_execute_dft(const fftwf_plan plan,
377
+ fftwf_complex *idata,
378
+ fftwf_complex *odata);
379
+
380
+ void CUFFTAPI fftwf_execute_dft_r2c(const fftwf_plan plan,
381
+ float *idata,
382
+ fftwf_complex *odata);
383
+
384
+ void CUFFTAPI fftwf_execute_dft_c2r(const fftwf_plan plan,
385
+ fftwf_complex *idata,
386
+ float *odata);
387
+
388
+ /// CUFFTW 64-bit Guru Interface
389
+ /// dp
390
+ fftw_plan CUFFTAPI fftw_plan_guru64_dft(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, fftw_complex* out, int sign, unsigned flags);
391
+
392
+ fftw_plan CUFFTAPI fftw_plan_guru64_dft_r2c(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, double* in, fftw_complex* out, unsigned flags);
393
+
394
+ fftw_plan CUFFTAPI fftw_plan_guru64_dft_c2r(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, double* out, unsigned flags);
395
+
396
+ /// sp
397
+ fftwf_plan CUFFTAPI fftwf_plan_guru64_dft(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, fftwf_complex* out, int sign, unsigned flags);
398
+
399
+ fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_r2c(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, float* in, fftwf_complex* out, unsigned flags);
400
+
401
+ fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_c2r(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, float* out, unsigned flags);
402
+
403
+ #ifdef _WIN32
404
+ #define _CUFFTAPI(T) T CUFFTAPI
405
+ #else
406
+ #define _CUFFTAPI(T) CUFFTAPI T
407
+ #endif
408
+
409
+ // CUFFTW defines and supports the following support APIs
410
+ _CUFFTAPI(void *) fftw_malloc(size_t n);
411
+
412
+ _CUFFTAPI(void *) fftwf_malloc(size_t n);
413
+
414
+ void CUFFTAPI fftw_free(void *pointer);
415
+
416
+ void CUFFTAPI fftwf_free(void *pointer);
417
+
418
+ void CUFFTAPI fftw_export_wisdom_to_file(FILE * output_file);
419
+
420
+ void CUFFTAPI fftwf_export_wisdom_to_file(FILE * output_file);
421
+
422
+ void CUFFTAPI fftw_import_wisdom_from_file(FILE * input_file);
423
+
424
+ void CUFFTAPI fftwf_import_wisdom_from_file(FILE * input_file);
425
+
426
+ void CUFFTAPI fftw_print_plan(const fftw_plan plan);
427
+
428
+ void CUFFTAPI fftwf_print_plan(const fftwf_plan plan);
429
+
430
+ void CUFFTAPI fftw_set_timelimit(double seconds);
431
+
432
+ void CUFFTAPI fftwf_set_timelimit(double seconds);
433
+
434
+ double CUFFTAPI fftw_cost(const fftw_plan plan);
435
+
436
+ double CUFFTAPI fftwf_cost(const fftw_plan plan);
437
+
438
+ void CUFFTAPI fftw_flops(const fftw_plan plan, double *add, double *mul, double *fma);
439
+
440
+ void CUFFTAPI fftwf_flops(const fftw_plan plan, double *add, double *mul, double *fma);
441
+
442
+ void CUFFTAPI fftw_destroy_plan(fftw_plan plan);
443
+
444
+ void CUFFTAPI fftwf_destroy_plan(fftwf_plan plan);
445
+
446
+ void CUFFTAPI fftw_cleanup(void);
447
+
448
+ void CUFFTAPI fftwf_cleanup(void);
449
+
450
+ #ifdef __cplusplus
451
+ }
452
+ #endif
453
+
454
+ #endif /* _CUFFTW_H_ */
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (217 Bytes). View file
 
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/idna/__pycache__/idnadata.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0c01fe1a0e5738b15b6952c63eebb273a28c12beefd13f01594da265a1b156
3
+ size 101565
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without
4
+ modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this
7
+ list of conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+
13
+ 3. Neither the name of the copyright holder nor the names of its contributors
14
+ may be used to endorse or promote products derived from this software
15
+ without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
29
+ external contributions to this project including patches, pull requests, etc.
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/RECORD ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ../../../bin/pybind11-config,sha256=KwKhJwrv86OeAvCUq7sBopc-kDZzCJdnh_4RZIF8T-c,265
2
+ pybind11-2.13.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
3
+ pybind11-2.13.6.dist-info/LICENSE,sha256=g5ZbhDuY9nDTqFvQQe1LNyyOxQ17SlmVqDrGl7pnXcs,1684
4
+ pybind11-2.13.6.dist-info/METADATA,sha256=Gg_aZ0f3aFFDF3bQvgzR9kwVT_jogjVEc74kDVldlq0,9513
5
+ pybind11-2.13.6.dist-info/RECORD,,
6
+ pybind11-2.13.6.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ pybind11-2.13.6.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
8
+ pybind11-2.13.6.dist-info/entry_points.txt,sha256=Q_kAwEJBDz8wHD0V50hY3AvchDk3Pfyeox2YHrAcWZ0,105
9
+ pybind11-2.13.6.dist-info/top_level.txt,sha256=d1mqwSpUlmlZhXDQ9Y57eNlXc3dVDM1toKmfC1kJbvU,9
10
+ pybind11/__init__.py,sha256=9vt06pvuwvdKW0YwYQKOTxBEgmQ0kb5ZUOJrgtGhdKs,459
11
+ pybind11/__main__.py,sha256=p8vZ4btnkb_TaF03R1ac7qHmp-Eut86gCSUcVP8F3i4,2526
12
+ pybind11/__pycache__/__init__.cpython-311.pyc,,
13
+ pybind11/__pycache__/__main__.cpython-311.pyc,,
14
+ pybind11/__pycache__/_version.cpython-311.pyc,,
15
+ pybind11/__pycache__/commands.cpython-311.pyc,,
16
+ pybind11/__pycache__/setup_helpers.cpython-311.pyc,,
17
+ pybind11/_version.py,sha256=XUUceDIbc3kdRixyEVMy5v0LcGF36QUxMG9rJHlT6P4,232
18
+ pybind11/commands.py,sha256=V43hKb7VE_abYZvaO-TpJLOU65n6W3ZrdYHGF3G3qUs,1243
19
+ pybind11/include/pybind11/attr.h,sha256=QPjH7BfhL8QFwHHkrDak8gNOLMlb1itAO5fobjdoLp8,24334
20
+ pybind11/include/pybind11/buffer_info.h,sha256=_FcQisqdpphfWXKeCGNv3Gq5ivy1z-qF3d1Noeteaok,7778
21
+ pybind11/include/pybind11/cast.h,sha256=8gJ4Y4nc83dyq12CuU7ircAvAV1HoEZEVr0UyfeLQNA,71696
22
+ pybind11/include/pybind11/chrono.h,sha256=A23naeloqn-1NKVAABOsJtHU9Vz8lfvrAICuLk-7qBM,8458
23
+ pybind11/include/pybind11/common.h,sha256=ATg9Bt1pwF8qnNuI086fprM4CUTdrZdk_g2HXE1Sf6A,120
24
+ pybind11/include/pybind11/complex.h,sha256=AaDZ-rEmK4tFaue-K9P5y3TxxnaQF6JwZ_6LAzkdLQI,2096
25
+ pybind11/include/pybind11/detail/class.h,sha256=Bjk3K6xAMgwxPNTKfik7SC5Y24wgKs8Oz5VjvFdy0kA,29026
26
+ pybind11/include/pybind11/detail/common.h,sha256=uxFMVYKW87YPbUz8Mo70xoVrpK2D1NzhKSwlDpwrJxo,54708
27
+ pybind11/include/pybind11/detail/cpp_conduit.h,sha256=Bbx5728XzvyCL2gfW7kG6vgDltS5-V5gtkNQFPFevXg,2589
28
+ pybind11/include/pybind11/detail/descr.h,sha256=D63pIHsF3luO_g51CjbJU8Wl9VOihciEXQhXvfRg-Rk,6035
29
+ pybind11/include/pybind11/detail/exception_translation.h,sha256=fM1J19z00AuDlozHt0srpCJr-1uWW4kj_fLdSJDbdY8,2600
30
+ pybind11/include/pybind11/detail/init.h,sha256=Sb1UkPecC5l9xj5naYLdUM7qIRLVpe614H9Frvyg8xg,17983
31
+ pybind11/include/pybind11/detail/internals.h,sha256=xs-I7JdJACxx7gJf12HBLjL007jRXcAffPDsd0oTrq4,31985
32
+ pybind11/include/pybind11/detail/type_caster_base.h,sha256=mdgZ-FIkxdSShMPPe69EXxjvd1eQDDBVX835B7XqCNo,48938
33
+ pybind11/include/pybind11/detail/typeid.h,sha256=jw5pr9m72vkDsloT8vxl9wj17VJGcEdXDyziBlt89Js,1625
34
+ pybind11/include/pybind11/detail/value_and_holder.h,sha256=hwNYlqxjUhlUqihwMjr6s3LhhKlZiTLaWREtQrgOAkQ,2814
35
+ pybind11/include/pybind11/eigen.h,sha256=-HmSA1kgwCQ-GHUt7PHtTEc-vxqw9xARpF8PHWJip28,316
36
+ pybind11/include/pybind11/eigen/common.h,sha256=dIeqmK7IzW5K4k2larPnA1A863rDp38U9YbNIwiIyYk,378
37
+ pybind11/include/pybind11/eigen/matrix.h,sha256=VjCfx8M2AcD3m8THUbIEYidJyIClaNw9jMbd_Fzfo1s,32142
38
+ pybind11/include/pybind11/eigen/tensor.h,sha256=csE3_N9yy-9k0SWQPJuAxmv8Jp_-lFrrPdVOyMV8-gc,18384
39
+ pybind11/include/pybind11/embed.h,sha256=F3JQiOWnLGSuZ0NuEyBWFhHyVdczD8D_67kriU4QfsY,13362
40
+ pybind11/include/pybind11/eval.h,sha256=7re-O2Eor1yD0Q_KgFkHIjKD17ejzII687Yszl9_KfE,4731
41
+ pybind11/include/pybind11/functional.h,sha256=iOyYuNmbI-K3zgc1IMDwe4iHEOO3F8vwZbVSvbgxFQ4,5267
42
+ pybind11/include/pybind11/gil.h,sha256=hsJj6z1iXqlo5c7fPCgEvK_-eeDoKZm7PKPwPNCdVVo,7702
43
+ pybind11/include/pybind11/gil_safe_call_once.h,sha256=KKcy9Wgc_MJY-U5WpCZeNyzW7oVmC-d6yXkgephZ7zs,3993
44
+ pybind11/include/pybind11/iostream.h,sha256=K5rPXoCYN325r1PptcJCIhPhgtRtTJQjMr7bvUIOwxk,8862
45
+ pybind11/include/pybind11/numpy.h,sha256=xREhfycUTCOPF8CF-UWRdoLX0B23V6YWRiBqeRRElZg,84442
46
+ pybind11/include/pybind11/operators.h,sha256=224RoAXcv1la4NNY9rQ3aD_AeC8S9ZKx3HVK1O8B4MU,9103
47
+ pybind11/include/pybind11/options.h,sha256=qXvmnj--9fZSp56NYefnB3W5V17ppHlY1Srgo3DNBpw,2734
48
+ pybind11/include/pybind11/pybind11.h,sha256=hbzXHRCBIW7dwtwaKjXKPC0Nl1MGHZ5-BjGsMlE3LuU,129898
49
+ pybind11/include/pybind11/pytypes.h,sha256=BF8x4S5fsAzWf-d9pu83UsqjwRRo0ragHPy9sDOpUvk,99894
50
+ pybind11/include/pybind11/stl.h,sha256=aMi1OCCw2Zb-IRLSlAtQEJJHtWsRJiLT9dKDMHST1Ic,15532
51
+ pybind11/include/pybind11/stl/filesystem.h,sha256=lcYRCwNA8Xf4e4FRbeYh36SAwQjxKgyTXXdrguR4gM4,4559
52
+ pybind11/include/pybind11/stl_bind.h,sha256=B5t8E0A4Zdgm2sF0J8Q_UI2U5uqEBQ9TsJCelsJ4q0E,28495
53
+ pybind11/include/pybind11/type_caster_pyobject_ptr.h,sha256=H7pKBYTvUlibiJQEcKmeAkygSQwoCkuIyukNSDmVq-U,1929
54
+ pybind11/include/pybind11/typing.h,sha256=PIjZFNNzY_KsrkHQPlg0Vt24jlTi6kThdOldEJjchtY,7000
55
+ pybind11/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ pybind11/setup_helpers.py,sha256=AwD_CjfVzX653nW4_i0U4bkFMCG4ZILoMZixyL8CZ4o,17490
57
+ pybind11/share/cmake/pybind11/FindPythonLibsNew.cmake,sha256=_ZVzgVp6GQSEEv-b2iuauqTgoi1k2jHiNJlpl25MN-4,12187
58
+ pybind11/share/cmake/pybind11/pybind11Common.cmake,sha256=lvJJ518cN7SjKDgjpXw0XU0eKW358wEloIcKCyCNPB0,16164
59
+ pybind11/share/cmake/pybind11/pybind11Config.cmake,sha256=I96KX_zIZvLHbedHknVBj2YKhMt_QjM5LhCbzVNTvD8,7959
60
+ pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake,sha256=vDsLSBg7-Nop8Ar9wRe0xKgGUV4LRzWE4XE0kE5B6fE,1403
61
+ pybind11/share/cmake/pybind11/pybind11GuessPythonExtSuffix.cmake,sha256=WvhK2E-vWi9ArY0WJZXEK4kEFHpDQjl-au963hqH0r0,3321
62
+ pybind11/share/cmake/pybind11/pybind11NewTools.cmake,sha256=zGLNjL28gzi8tvwiabudLsye7id_sZI5ooYfiBBllvM,12169
63
+ pybind11/share/cmake/pybind11/pybind11Targets.cmake,sha256=tIjPtIpfb5m9POtu484cjGgNyWc5E4bbKzESLrcOLA0,4271
64
+ pybind11/share/cmake/pybind11/pybind11Tools.cmake,sha256=5K6EahoS7wIaQIhjrDS4p4jTpYr0b_MronXKee8zCAc,8565
65
+ pybind11/share/pkgconfig/pybind11.pc,sha256=M17R2NbpW6o7ujxioMP5M6WgVGrmJ_1vu_-E-H_rbes,171
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/REQUESTED ADDED
File without changes
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11-2.13.6.dist-info/entry_points.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [console_scripts]
2
+ pybind11-config = pybind11.__main__:main
3
+
4
+ [pipx.run]
5
+ pybind11 = pybind11.__main__:main
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/detail/cpp_conduit.h ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) 2024 The pybind Community.
2
+
3
+ #pragma once
4
+
5
+ #include <pybind11/pytypes.h>
6
+
7
+ #include "common.h"
8
+ #include "internals.h"
9
+
10
+ #include <typeinfo>
11
+
12
+ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
13
+ PYBIND11_NAMESPACE_BEGIN(detail)
14
+
15
+ // Forward declaration needed here: Refactoring opportunity.
16
+ extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *);
17
+
18
+ inline bool type_is_managed_by_our_internals(PyTypeObject *type_obj) {
19
+ #if defined(PYPY_VERSION)
20
+ auto &internals = get_internals();
21
+ return bool(internals.registered_types_py.find(type_obj)
22
+ != internals.registered_types_py.end());
23
+ #else
24
+ return bool(type_obj->tp_new == pybind11_object_new);
25
+ #endif
26
+ }
27
+
28
+ inline bool is_instance_method_of_type(PyTypeObject *type_obj, PyObject *attr_name) {
29
+ PyObject *descr = _PyType_Lookup(type_obj, attr_name);
30
+ return bool((descr != nullptr) && PyInstanceMethod_Check(descr));
31
+ }
32
+
33
+ inline object try_get_cpp_conduit_method(PyObject *obj) {
34
+ if (PyType_Check(obj)) {
35
+ return object();
36
+ }
37
+ PyTypeObject *type_obj = Py_TYPE(obj);
38
+ str attr_name("_pybind11_conduit_v1_");
39
+ bool assumed_to_be_callable = false;
40
+ if (type_is_managed_by_our_internals(type_obj)) {
41
+ if (!is_instance_method_of_type(type_obj, attr_name.ptr())) {
42
+ return object();
43
+ }
44
+ assumed_to_be_callable = true;
45
+ }
46
+ PyObject *method = PyObject_GetAttr(obj, attr_name.ptr());
47
+ if (method == nullptr) {
48
+ PyErr_Clear();
49
+ return object();
50
+ }
51
+ if (!assumed_to_be_callable && PyCallable_Check(method) == 0) {
52
+ Py_DECREF(method);
53
+ return object();
54
+ }
55
+ return reinterpret_steal<object>(method);
56
+ }
57
+
58
+ inline void *try_raw_pointer_ephemeral_from_cpp_conduit(handle src,
59
+ const std::type_info *cpp_type_info) {
60
+ object method = try_get_cpp_conduit_method(src.ptr());
61
+ if (method) {
62
+ capsule cpp_type_info_capsule(const_cast<void *>(static_cast<const void *>(cpp_type_info)),
63
+ typeid(std::type_info).name());
64
+ object cpp_conduit = method(bytes(PYBIND11_PLATFORM_ABI_ID),
65
+ cpp_type_info_capsule,
66
+ bytes("raw_pointer_ephemeral"));
67
+ if (isinstance<capsule>(cpp_conduit)) {
68
+ return reinterpret_borrow<capsule>(cpp_conduit).get_pointer();
69
+ }
70
+ }
71
+ return nullptr;
72
+ }
73
+
74
+ #define PYBIND11_HAS_CPP_CONDUIT 1
75
+
76
+ PYBIND11_NAMESPACE_END(detail)
77
+ PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/gil.h ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ pybind11/gil.h: RAII helpers for managing the GIL
3
+
4
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
5
+
6
+ All rights reserved. Use of this source code is governed by a
7
+ BSD-style license that can be found in the LICENSE file.
8
+ */
9
+
10
+ #pragma once
11
+
12
+ #include "detail/common.h"
13
+
14
+ #include <cassert>
15
+
16
+ #if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
17
+ # include "detail/internals.h"
18
+ #endif
19
+
20
+ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
21
+
22
+ PYBIND11_NAMESPACE_BEGIN(detail)
23
+
24
+ // forward declarations
25
+ PyThreadState *get_thread_state_unchecked();
26
+
27
+ PYBIND11_NAMESPACE_END(detail)
28
+
29
+ #if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
30
+
31
+ /* The functions below essentially reproduce the PyGILState_* API using a RAII
32
+ * pattern, but there are a few important differences:
33
+ *
34
+ * 1. When acquiring the GIL from an non-main thread during the finalization
35
+ * phase, the GILState API blindly terminates the calling thread, which
36
+ * is often not what is wanted. This API does not do this.
37
+ *
38
+ * 2. The gil_scoped_release function can optionally cut the relationship
39
+ * of a PyThreadState and its associated thread, which allows moving it to
40
+ * another thread (this is a fairly rare/advanced use case).
41
+ *
42
+ * 3. The reference count of an acquired thread state can be controlled. This
43
+ * can be handy to prevent cases where callbacks issued from an external
44
+ * thread would otherwise constantly construct and destroy thread state data
45
+ * structures.
46
+ *
47
+ * See the Python bindings of NanoGUI (http://github.com/wjakob/nanogui) for an
48
+ * example which uses features 2 and 3 to migrate the Python thread of
49
+ * execution to another thread (to run the event loop on the original thread,
50
+ * in this case).
51
+ */
52
+
53
+ class gil_scoped_acquire {
54
+ public:
55
+ PYBIND11_NOINLINE gil_scoped_acquire() {
56
+ auto &internals = detail::get_internals();
57
+ tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);
58
+
59
+ if (!tstate) {
60
+ /* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if
61
+ calling from a Python thread). Since we use a different key, this ensures
62
+ we don't create a new thread state and deadlock in PyEval_AcquireThread
63
+ below. Note we don't save this state with internals.tstate, since we don't
64
+ create it we would fail to clear it (its reference count should be > 0). */
65
+ tstate = PyGILState_GetThisThreadState();
66
+ }
67
+
68
+ if (!tstate) {
69
+ tstate = PyThreadState_New(internals.istate);
70
+ # if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
71
+ if (!tstate) {
72
+ pybind11_fail("scoped_acquire: could not create thread state!");
73
+ }
74
+ # endif
75
+ tstate->gilstate_counter = 0;
76
+ PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate);
77
+ } else {
78
+ release = detail::get_thread_state_unchecked() != tstate;
79
+ }
80
+
81
+ if (release) {
82
+ PyEval_AcquireThread(tstate);
83
+ }
84
+
85
+ inc_ref();
86
+ }
87
+
88
+ gil_scoped_acquire(const gil_scoped_acquire &) = delete;
89
+ gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
90
+
91
+ void inc_ref() { ++tstate->gilstate_counter; }
92
+
93
+ PYBIND11_NOINLINE void dec_ref() {
94
+ --tstate->gilstate_counter;
95
+ # if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
96
+ if (detail::get_thread_state_unchecked() != tstate) {
97
+ pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!");
98
+ }
99
+ if (tstate->gilstate_counter < 0) {
100
+ pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!");
101
+ }
102
+ # endif
103
+ if (tstate->gilstate_counter == 0) {
104
+ # if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
105
+ if (!release) {
106
+ pybind11_fail("scoped_acquire::dec_ref(): internal error!");
107
+ }
108
+ # endif
109
+ PyThreadState_Clear(tstate);
110
+ if (active) {
111
+ PyThreadState_DeleteCurrent();
112
+ }
113
+ PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
114
+ release = false;
115
+ }
116
+ }
117
+
118
+ /// This method will disable the PyThreadState_DeleteCurrent call and the
119
+ /// GIL won't be acquired. This method should be used if the interpreter
120
+ /// could be shutting down when this is called, as thread deletion is not
121
+ /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
122
+ /// protect subsequent code.
123
+ PYBIND11_NOINLINE void disarm() { active = false; }
124
+
125
+ PYBIND11_NOINLINE ~gil_scoped_acquire() {
126
+ dec_ref();
127
+ if (release) {
128
+ PyEval_SaveThread();
129
+ }
130
+ }
131
+
132
+ private:
133
+ PyThreadState *tstate = nullptr;
134
+ bool release = true;
135
+ bool active = true;
136
+ };
137
+
138
+ class gil_scoped_release {
139
+ public:
140
+ // PRECONDITION: The GIL must be held when this constructor is called.
141
+ explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) {
142
+ assert(PyGILState_Check());
143
+ // `get_internals()` must be called here unconditionally in order to initialize
144
+ // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
145
+ // initialization race could occur as multiple threads try `gil_scoped_acquire`.
146
+ auto &internals = detail::get_internals();
147
+ // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
148
+ tstate = PyEval_SaveThread();
149
+ if (disassoc) {
150
+ // Python >= 3.7 can remove this, it's an int before 3.7
151
+ // NOLINTNEXTLINE(readability-qualified-auto)
152
+ auto key = internals.tstate;
153
+ PYBIND11_TLS_DELETE_VALUE(key);
154
+ }
155
+ }
156
+
157
+ gil_scoped_release(const gil_scoped_release &) = delete;
158
+ gil_scoped_release &operator=(const gil_scoped_release &) = delete;
159
+
160
+ /// This method will disable the PyThreadState_DeleteCurrent call and the
161
+ /// GIL won't be acquired. This method should be used if the interpreter
162
+ /// could be shutting down when this is called, as thread deletion is not
163
+ /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
164
+ /// protect subsequent code.
165
+ PYBIND11_NOINLINE void disarm() { active = false; }
166
+
167
+ ~gil_scoped_release() {
168
+ if (!tstate) {
169
+ return;
170
+ }
171
+ // `PyEval_RestoreThread()` should not be called if runtime is finalizing
172
+ if (active) {
173
+ PyEval_RestoreThread(tstate);
174
+ }
175
+ if (disassoc) {
176
+ // Python >= 3.7 can remove this, it's an int before 3.7
177
+ // NOLINTNEXTLINE(readability-qualified-auto)
178
+ auto key = detail::get_internals().tstate;
179
+ PYBIND11_TLS_REPLACE_VALUE(key, tstate);
180
+ }
181
+ }
182
+
183
+ private:
184
+ PyThreadState *tstate;
185
+ bool disassoc;
186
+ bool active = true;
187
+ };
188
+
189
+ #else // PYBIND11_SIMPLE_GIL_MANAGEMENT
190
+
191
+ class gil_scoped_acquire {
192
+ PyGILState_STATE state;
193
+
194
+ public:
195
+ gil_scoped_acquire() : state{PyGILState_Ensure()} {}
196
+ gil_scoped_acquire(const gil_scoped_acquire &) = delete;
197
+ gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
198
+ ~gil_scoped_acquire() { PyGILState_Release(state); }
199
+ void disarm() {}
200
+ };
201
+
202
+ class gil_scoped_release {
203
+ PyThreadState *state;
204
+
205
+ public:
206
+ // PRECONDITION: The GIL must be held when this constructor is called.
207
+ gil_scoped_release() {
208
+ assert(PyGILState_Check());
209
+ state = PyEval_SaveThread();
210
+ }
211
+ gil_scoped_release(const gil_scoped_release &) = delete;
212
+ gil_scoped_release &operator=(const gil_scoped_release &) = delete;
213
+ ~gil_scoped_release() { PyEval_RestoreThread(state); }
214
+ void disarm() {}
215
+ };
216
+
217
+ #endif // PYBIND11_SIMPLE_GIL_MANAGEMENT
218
+
219
+ PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pybind11/include/pybind11/iostream.h ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ pybind11/iostream.h -- Tools to assist with redirecting cout and cerr to Python
3
+
4
+ Copyright (c) 2017 Henry F. Schreiner
5
+
6
+ All rights reserved. Use of this source code is governed by a
7
+ BSD-style license that can be found in the LICENSE file.
8
+
9
+ WARNING: The implementation in this file is NOT thread safe. Multiple
10
+ threads writing to a redirected ostream concurrently cause data races
11
+ and potentially buffer overflows. Therefore it is currently a requirement
12
+ that all (possibly) concurrent redirected ostream writes are protected by
13
+ a mutex.
14
+ #HelpAppreciated: Work on iostream.h thread safety.
15
+ For more background see the discussions under
16
+ https://github.com/pybind/pybind11/pull/2982 and
17
+ https://github.com/pybind/pybind11/pull/2995.
18
+ */
19
+
20
+ #pragma once
21
+
22
+ #include "pybind11.h"
23
+
24
+ #include <algorithm>
25
+ #include <cstring>
26
+ #include <iostream>
27
+ #include <iterator>
28
+ #include <memory>
29
+ #include <ostream>
30
+ #include <streambuf>
31
+ #include <string>
32
+ #include <utility>
33
+
34
+ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
35
+ PYBIND11_NAMESPACE_BEGIN(detail)
36
+
37
+ // Buffer that writes to Python instead of C++
38
+ class pythonbuf : public std::streambuf {
39
+ private:
40
+ using traits_type = std::streambuf::traits_type;
41
+
42
+ const size_t buf_size;
43
+ std::unique_ptr<char[]> d_buffer;
44
+ object pywrite;
45
+ object pyflush;
46
+
47
+ int overflow(int c) override {
48
+ if (!traits_type::eq_int_type(c, traits_type::eof())) {
49
+ *pptr() = traits_type::to_char_type(c);
50
+ pbump(1);
51
+ }
52
+ return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof();
53
+ }
54
+
55
+ // Computes how many bytes at the end of the buffer are part of an
56
+ // incomplete sequence of UTF-8 bytes.
57
+ // Precondition: pbase() < pptr()
58
+ size_t utf8_remainder() const {
59
+ const auto rbase = std::reverse_iterator<char *>(pbase());
60
+ const auto rpptr = std::reverse_iterator<char *>(pptr());
61
+ auto is_ascii = [](char c) { return (static_cast<unsigned char>(c) & 0x80) == 0x00; };
62
+ auto is_leading = [](char c) { return (static_cast<unsigned char>(c) & 0xC0) == 0xC0; };
63
+ auto is_leading_2b = [](char c) { return static_cast<unsigned char>(c) <= 0xDF; };
64
+ auto is_leading_3b = [](char c) { return static_cast<unsigned char>(c) <= 0xEF; };
65
+ // If the last character is ASCII, there are no incomplete code points
66
+ if (is_ascii(*rpptr)) {
67
+ return 0;
68
+ }
69
+ // Otherwise, work back from the end of the buffer and find the first
70
+ // UTF-8 leading byte
71
+ const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
72
+ const auto leading = std::find_if(rpptr, rpend, is_leading);
73
+ if (leading == rbase) {
74
+ return 0;
75
+ }
76
+ const auto dist = static_cast<size_t>(leading - rpptr);
77
+ size_t remainder = 0;
78
+
79
+ if (dist == 0) {
80
+ remainder = 1; // 1-byte code point is impossible
81
+ } else if (dist == 1) {
82
+ remainder = is_leading_2b(*leading) ? 0 : dist + 1;
83
+ } else if (dist == 2) {
84
+ remainder = is_leading_3b(*leading) ? 0 : dist + 1;
85
+ }
86
+ // else if (dist >= 3), at least 4 bytes before encountering an UTF-8
87
+ // leading byte, either no remainder or invalid UTF-8.
88
+ // Invalid UTF-8 will cause an exception later when converting
89
+ // to a Python string, so that's not handled here.
90
+ return remainder;
91
+ }
92
+
93
+ // This function must be non-virtual to be called in a destructor.
94
+ int _sync() {
95
+ if (pbase() != pptr()) { // If buffer is not empty
96
+ gil_scoped_acquire tmp;
97
+ // This subtraction cannot be negative, so dropping the sign.
98
+ auto size = static_cast<size_t>(pptr() - pbase());
99
+ size_t remainder = utf8_remainder();
100
+
101
+ if (size > remainder) {
102
+ str line(pbase(), size - remainder);
103
+ pywrite(std::move(line));
104
+ pyflush();
105
+ }
106
+
107
+ // Copy the remainder at the end of the buffer to the beginning:
108
+ if (remainder > 0) {
109
+ std::memmove(pbase(), pptr() - remainder, remainder);
110
+ }
111
+ setp(pbase(), epptr());
112
+ pbump(static_cast<int>(remainder));
113
+ }
114
+ return 0;
115
+ }
116
+
117
+ int sync() override { return _sync(); }
118
+
119
+ public:
120
+ explicit pythonbuf(const object &pyostream, size_t buffer_size = 1024)
121
+ : buf_size(buffer_size), d_buffer(new char[buf_size]), pywrite(pyostream.attr("write")),
122
+ pyflush(pyostream.attr("flush")) {
123
+ setp(d_buffer.get(), d_buffer.get() + buf_size - 1);
124
+ }
125
+
126
+ pythonbuf(pythonbuf &&) = default;
127
+
128
+ /// Sync before destroy
129
+ ~pythonbuf() override { _sync(); }
130
+ };
131
+
132
+ PYBIND11_NAMESPACE_END(detail)
133
+
134
+ /** \rst
135
+ This a move-only guard that redirects output.
136
+
137
+ .. code-block:: cpp
138
+
139
+ #include <pybind11/iostream.h>
140
+
141
+ ...
142
+
143
+ {
144
+ py::scoped_ostream_redirect output;
145
+ std::cout << "Hello, World!"; // Python stdout
146
+ } // <-- return std::cout to normal
147
+
148
+ You can explicitly pass the c++ stream and the python object,
149
+ for example to guard stderr instead.
150
+
151
+ .. code-block:: cpp
152
+
153
+ {
154
+ py::scoped_ostream_redirect output{
155
+ std::cerr, py::module::import("sys").attr("stderr")};
156
+ std::cout << "Hello, World!";
157
+ }
158
+ \endrst */
159
+ class scoped_ostream_redirect {
160
+ protected:
161
+ std::streambuf *old;
162
+ std::ostream &costream;
163
+ detail::pythonbuf buffer;
164
+
165
+ public:
166
+ explicit scoped_ostream_redirect(std::ostream &costream = std::cout,
167
+ const object &pyostream
168
+ = module_::import("sys").attr("stdout"))
169
+ : costream(costream), buffer(pyostream) {
170
+ old = costream.rdbuf(&buffer);
171
+ }
172
+
173
+ ~scoped_ostream_redirect() { costream.rdbuf(old); }
174
+
175
+ scoped_ostream_redirect(const scoped_ostream_redirect &) = delete;
176
+ scoped_ostream_redirect(scoped_ostream_redirect &&other) = default;
177
+ scoped_ostream_redirect &operator=(const scoped_ostream_redirect &) = delete;
178
+ scoped_ostream_redirect &operator=(scoped_ostream_redirect &&) = delete;
179
+ };
180
+
181
+ /** \rst
182
+ Like `scoped_ostream_redirect`, but redirects cerr by default. This class
183
+ is provided primary to make ``py::call_guard`` easier to make.
184
+
185
+ .. code-block:: cpp
186
+
187
+ m.def("noisy_func", &noisy_func,
188
+ py::call_guard<scoped_ostream_redirect,
189
+ scoped_estream_redirect>());
190
+
191
+ \endrst */
192
+ class scoped_estream_redirect : public scoped_ostream_redirect {
193
+ public:
194
+ explicit scoped_estream_redirect(std::ostream &costream = std::cerr,
195
+ const object &pyostream
196
+ = module_::import("sys").attr("stderr"))
197
+ : scoped_ostream_redirect(costream, pyostream) {}
198
+ };
199
+
200
+ PYBIND11_NAMESPACE_BEGIN(detail)
201
+
202
+ // Class to redirect output as a context manager. C++ backend.
203
+ class OstreamRedirect {
204
+ bool do_stdout_;
205
+ bool do_stderr_;
206
+ std::unique_ptr<scoped_ostream_redirect> redirect_stdout;
207
+ std::unique_ptr<scoped_estream_redirect> redirect_stderr;
208
+
209
+ public:
210
+ explicit OstreamRedirect(bool do_stdout = true, bool do_stderr = true)
211
+ : do_stdout_(do_stdout), do_stderr_(do_stderr) {}
212
+
213
+ void enter() {
214
+ if (do_stdout_) {
215
+ redirect_stdout.reset(new scoped_ostream_redirect());
216
+ }
217
+ if (do_stderr_) {
218
+ redirect_stderr.reset(new scoped_estream_redirect());
219
+ }
220
+ }
221
+
222
+ void exit() {
223
+ redirect_stdout.reset();
224
+ redirect_stderr.reset();
225
+ }
226
+ };
227
+
228
+ PYBIND11_NAMESPACE_END(detail)
229
+
230
+ /** \rst
231
+ This is a helper function to add a C++ redirect context manager to Python
232
+ instead of using a C++ guard. To use it, add the following to your binding code:
233
+
234
+ .. code-block:: cpp
235
+
236
+ #include <pybind11/iostream.h>
237
+
238
+ ...
239
+
240
+ py::add_ostream_redirect(m, "ostream_redirect");
241
+
242
+ You now have a Python context manager that redirects your output:
243
+
244
+ .. code-block:: python
245
+
246
+ with m.ostream_redirect():
247
+ m.print_to_cout_function()
248
+
249
+ This manager can optionally be told which streams to operate on:
250
+
251
+ .. code-block:: python
252
+
253
+ with m.ostream_redirect(stdout=true, stderr=true):
254
+ m.noisy_function_with_error_printing()
255
+
256
+ \endrst */
257
+ inline class_<detail::OstreamRedirect>
258
+ add_ostream_redirect(module_ m, const std::string &name = "ostream_redirect") {
259
+ return class_<detail::OstreamRedirect>(std::move(m), name.c_str(), module_local())
260
+ .def(init<bool, bool>(), arg("stdout") = true, arg("stderr") = true)
261
+ .def("__enter__", &detail::OstreamRedirect::enter)
262
+ .def("__exit__", [](detail::OstreamRedirect &self_, const args &) { self_.exit(); });
263
+ }
264
+
265
+ PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_C.cpython-311-x86_64-linux-gnu.so ADDED
Binary file (37.9 kB). View file