Skip to content

Commit 87ee176

Browse files
authored
Fix #1806: Fix use-after-free in graph-related APIs (#2083)
* Fix docstrings in runtime * Fix #1806: Fix use-after-free in various Graph APIs * Use C++ struct assignment rather than memcpy * Add missing changes
1 parent 2523e97 commit 87ee176

5 files changed

Lines changed: 266 additions & 24 deletions

File tree

cuda_bindings/cuda/bindings/driver.pyx.in

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

4-
# This code was automatically generated with version 13.2.0, generator version 0.3.1.dev1568+g289771de9.d20260413. Do not modify it directly.
4+
# This code was automatically generated with version 13.2.0, generator version 0.3.1.dev1630+gadce055ea.d20260422. Do not modify it directly.
55
from typing import Any, Optional
66
import cython
77
import ctypes
@@ -40589,9 +40589,13 @@ def cuStreamGetCaptureInfo(hStream):
4058940589
with nogil:
4059040590
err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, <cydriver.cuuint64_t*>id_out._pvt_ptr, <cydriver.CUgraph*>graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out)
4059140591
if CUresult(err) == CUresult(0):
40592-
pydependencies_out = [CUgraphNode(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
40592+
pydependencies_out = [CUgraphNode() for _ in range(numDependencies_out)]
40593+
for idx in range(numDependencies_out):
40594+
(<CUgraphNode>pydependencies_out[idx])._pvt_ptr[0] = cydependencies_out[idx]
4059340595
if CUresult(err) == CUresult(0):
40594-
pyedgeData_out = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData_out[idx]) for idx in range(numDependencies_out)]
40596+
pyedgeData_out = [CUgraphEdgeData() for _ in range(numDependencies_out)]
40597+
for idx in range(numDependencies_out):
40598+
(<CUgraphEdgeData>pyedgeData_out[idx])._pvt_ptr[0] = cyedgeData_out[idx]
4059540599
if err != cydriver.CUDA_SUCCESS:
4059640600
return (_CUresult(err), None, None, None, None, None, None)
4059740601
return (_CUresult_SUCCESS, CUstreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out)
@@ -47195,7 +47199,9 @@ def cuGraphGetNodes(hGraph, size_t numNodes = 0):
4719547199
with nogil:
4719647200
err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes)
4719747201
if CUresult(err) == CUresult(0):
47198-
pynodes = [CUgraphNode(init_value=<void_ptr>cynodes[idx]) for idx in range(_graph_length)]
47202+
pynodes = [CUgraphNode() for _ in range(_graph_length)]
47203+
for idx in range(_graph_length):
47204+
(<CUgraphNode>pynodes[idx])._pvt_ptr[0] = cynodes[idx]
4719947205
if cynodes is not NULL:
4720047206
free(cynodes)
4720147207
if err != cydriver.CUDA_SUCCESS:
@@ -47254,7 +47260,9 @@ def cuGraphGetRootNodes(hGraph, size_t numRootNodes = 0):
4725447260
with nogil:
4725547261
err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes)
4725647262
if CUresult(err) == CUresult(0):
47257-
pyrootNodes = [CUgraphNode(init_value=<void_ptr>cyrootNodes[idx]) for idx in range(_graph_length)]
47263+
pyrootNodes = [CUgraphNode() for _ in range(_graph_length)]
47264+
for idx in range(_graph_length):
47265+
(<CUgraphNode>pyrootNodes[idx])._pvt_ptr[0] = cyrootNodes[idx]
4725847266
if cyrootNodes is not NULL:
4725947267
free(cyrootNodes)
4726047268
if err != cydriver.CUDA_SUCCESS:
@@ -47336,15 +47344,21 @@ def cuGraphGetEdges(hGraph, size_t numEdges = 0):
4733647344
with nogil:
4733747345
err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges)
4733847346
if CUresult(err) == CUresult(0):
47339-
pyfrom_ = [CUgraphNode(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
47347+
pyfrom_ = [CUgraphNode() for _ in range(_graph_length)]
47348+
for idx in range(_graph_length):
47349+
(<CUgraphNode>pyfrom_[idx])._pvt_ptr[0] = cyfrom_[idx]
4734047350
if cyfrom_ is not NULL:
4734147351
free(cyfrom_)
4734247352
if CUresult(err) == CUresult(0):
47343-
pyto = [CUgraphNode(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
47353+
pyto = [CUgraphNode() for _ in range(_graph_length)]
47354+
for idx in range(_graph_length):
47355+
(<CUgraphNode>pyto[idx])._pvt_ptr[0] = cyto[idx]
4734447356
if cyto is not NULL:
4734547357
free(cyto)
4734647358
if CUresult(err) == CUresult(0):
47347-
pyedgeData = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
47359+
pyedgeData = [CUgraphEdgeData() for _ in range(_graph_length)]
47360+
for idx in range(_graph_length):
47361+
(<CUgraphEdgeData>pyedgeData[idx])._pvt_ptr[0] = cyedgeData[idx]
4734847362
if cyedgeData is not NULL:
4734947363
free(cyedgeData)
4735047364
if err != cydriver.CUDA_SUCCESS:
@@ -47417,11 +47431,15 @@ def cuGraphNodeGetDependencies(hNode, size_t numDependencies = 0):
4741747431
with nogil:
4741847432
err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, cyedgeData, &numDependencies)
4741947433
if CUresult(err) == CUresult(0):
47420-
pydependencies = [CUgraphNode(init_value=<void_ptr>cydependencies[idx]) for idx in range(_graph_length)]
47434+
pydependencies = [CUgraphNode() for _ in range(_graph_length)]
47435+
for idx in range(_graph_length):
47436+
(<CUgraphNode>pydependencies[idx])._pvt_ptr[0] = cydependencies[idx]
4742147437
if cydependencies is not NULL:
4742247438
free(cydependencies)
4742347439
if CUresult(err) == CUresult(0):
47424-
pyedgeData = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
47440+
pyedgeData = [CUgraphEdgeData() for _ in range(_graph_length)]
47441+
for idx in range(_graph_length):
47442+
(<CUgraphEdgeData>pyedgeData[idx])._pvt_ptr[0] = cyedgeData[idx]
4742547443
if cyedgeData is not NULL:
4742647444
free(cyedgeData)
4742747445
if err != cydriver.CUDA_SUCCESS:
@@ -47494,11 +47512,15 @@ def cuGraphNodeGetDependentNodes(hNode, size_t numDependentNodes = 0):
4749447512
with nogil:
4749547513
err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes)
4749647514
if CUresult(err) == CUresult(0):
47497-
pydependentNodes = [CUgraphNode(init_value=<void_ptr>cydependentNodes[idx]) for idx in range(_graph_length)]
47515+
pydependentNodes = [CUgraphNode() for _ in range(_graph_length)]
47516+
for idx in range(_graph_length):
47517+
(<CUgraphNode>pydependentNodes[idx])._pvt_ptr[0] = cydependentNodes[idx]
4749847518
if cydependentNodes is not NULL:
4749947519
free(cydependentNodes)
4750047520
if CUresult(err) == CUresult(0):
47501-
pyedgeData = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
47521+
pyedgeData = [CUgraphEdgeData() for _ in range(_graph_length)]
47522+
for idx in range(_graph_length):
47523+
(<CUgraphEdgeData>pyedgeData[idx])._pvt_ptr[0] = cyedgeData[idx]
4750247524
if cyedgeData is not NULL:
4750347525
free(cyedgeData)
4750447526
if err != cydriver.CUDA_SUCCESS:

cuda_bindings/cuda/bindings/runtime.pyx.in

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24290,9 +24290,13 @@ def cudaStreamGetCaptureInfo(stream):
2429024290
with nogil:
2429124291
err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, <cyruntime.cudaGraph_t*>graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out)
2429224292
if cudaError_t(err) == cudaError_t(0):
24293-
pydependencies_out = [cudaGraphNode_t(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
24293+
pydependencies_out = [cudaGraphNode_t() for _ in range(numDependencies_out)]
24294+
for idx in range(numDependencies_out):
24295+
(<cudaGraphNode_t>pydependencies_out[idx])._pvt_ptr[0] = cydependencies_out[idx]
2429424296
if cudaError_t(err) == cudaError_t(0):
24295-
pyedgeData_out = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData_out[idx]) for idx in range(numDependencies_out)]
24297+
pyedgeData_out = [cudaGraphEdgeData() for _ in range(numDependencies_out)]
24298+
for idx in range(numDependencies_out):
24299+
(<cudaGraphEdgeData>pyedgeData_out[idx])._pvt_ptr[0] = cyedgeData_out[idx]
2429624300
if err != cyruntime.cudaSuccess:
2429724301
return (_cudaError_t(err), None, None, None, None, None, None)
2429824302
return (_cudaError_t_SUCCESS, cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out)
@@ -35805,7 +35809,9 @@ def cudaGraphGetNodes(graph, size_t numNodes = 0):
3580535809
with nogil:
3580635810
err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes)
3580735811
if cudaError_t(err) == cudaError_t(0):
35808-
pynodes = [cudaGraphNode_t(init_value=<void_ptr>cynodes[idx]) for idx in range(_graph_length)]
35812+
pynodes = [cudaGraphNode_t() for _ in range(_graph_length)]
35813+
for idx in range(_graph_length):
35814+
(<cudaGraphNode_t>pynodes[idx])._pvt_ptr[0] = cynodes[idx]
3580935815
if cynodes is not NULL:
3581035816
free(cynodes)
3581135817
if err != cyruntime.cudaSuccess:
@@ -35864,7 +35870,9 @@ def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0):
3586435870
with nogil:
3586535871
err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes)
3586635872
if cudaError_t(err) == cudaError_t(0):
35867-
pypRootNodes = [cudaGraphNode_t(init_value=<void_ptr>cypRootNodes[idx]) for idx in range(_graph_length)]
35873+
pypRootNodes = [cudaGraphNode_t() for _ in range(_graph_length)]
35874+
for idx in range(_graph_length):
35875+
(<cudaGraphNode_t>pypRootNodes[idx])._pvt_ptr[0] = cypRootNodes[idx]
3586835876
if cypRootNodes is not NULL:
3586935877
free(cypRootNodes)
3587035878
if err != cyruntime.cudaSuccess:
@@ -35946,15 +35954,21 @@ def cudaGraphGetEdges(graph, size_t numEdges = 0):
3594635954
with nogil:
3594735955
err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, cyedgeData, &numEdges)
3594835956
if cudaError_t(err) == cudaError_t(0):
35949-
pyfrom_ = [cudaGraphNode_t(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
35957+
pyfrom_ = [cudaGraphNode_t() for _ in range(_graph_length)]
35958+
for idx in range(_graph_length):
35959+
(<cudaGraphNode_t>pyfrom_[idx])._pvt_ptr[0] = cyfrom_[idx]
3595035960
if cyfrom_ is not NULL:
3595135961
free(cyfrom_)
3595235962
if cudaError_t(err) == cudaError_t(0):
35953-
pyto = [cudaGraphNode_t(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
35963+
pyto = [cudaGraphNode_t() for _ in range(_graph_length)]
35964+
for idx in range(_graph_length):
35965+
(<cudaGraphNode_t>pyto[idx])._pvt_ptr[0] = cyto[idx]
3595435966
if cyto is not NULL:
3595535967
free(cyto)
3595635968
if cudaError_t(err) == cudaError_t(0):
35957-
pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
35969+
pyedgeData = [cudaGraphEdgeData() for _ in range(_graph_length)]
35970+
for idx in range(_graph_length):
35971+
(<cudaGraphEdgeData>pyedgeData[idx])._pvt_ptr[0] = cyedgeData[idx]
3595835972
if cyedgeData is not NULL:
3595935973
free(cyedgeData)
3596035974
if err != cyruntime.cudaSuccess:
@@ -36027,11 +36041,15 @@ def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0):
3602736041
with nogil:
3602836042
err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, cyedgeData, &pNumDependencies)
3602936043
if cudaError_t(err) == cudaError_t(0):
36030-
pypDependencies = [cudaGraphNode_t(init_value=<void_ptr>cypDependencies[idx]) for idx in range(_graph_length)]
36044+
pypDependencies = [cudaGraphNode_t() for _ in range(_graph_length)]
36045+
for idx in range(_graph_length):
36046+
(<cudaGraphNode_t>pypDependencies[idx])._pvt_ptr[0] = cypDependencies[idx]
3603136047
if cypDependencies is not NULL:
3603236048
free(cypDependencies)
3603336049
if cudaError_t(err) == cudaError_t(0):
36034-
pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
36050+
pyedgeData = [cudaGraphEdgeData() for _ in range(_graph_length)]
36051+
for idx in range(_graph_length):
36052+
(<cudaGraphEdgeData>pyedgeData[idx])._pvt_ptr[0] = cyedgeData[idx]
3603536053
if cyedgeData is not NULL:
3603636054
free(cyedgeData)
3603736055
if err != cyruntime.cudaSuccess:
@@ -36104,11 +36122,15 @@ def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0):
3610436122
with nogil:
3610536123
err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes)
3610636124
if cudaError_t(err) == cudaError_t(0):
36107-
pypDependentNodes = [cudaGraphNode_t(init_value=<void_ptr>cypDependentNodes[idx]) for idx in range(_graph_length)]
36125+
pypDependentNodes = [cudaGraphNode_t() for _ in range(_graph_length)]
36126+
for idx in range(_graph_length):
36127+
(<cudaGraphNode_t>pypDependentNodes[idx])._pvt_ptr[0] = cypDependentNodes[idx]
3610836128
if cypDependentNodes is not NULL:
3610936129
free(cypDependentNodes)
3611036130
if cudaError_t(err) == cudaError_t(0):
36111-
pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
36131+
pyedgeData = [cudaGraphEdgeData() for _ in range(_graph_length)]
36132+
for idx in range(_graph_length):
36133+
(<cudaGraphEdgeData>pyedgeData[idx])._pvt_ptr[0] = cyedgeData[idx]
3611236134
if cyedgeData is not NULL:
3611336135
free(cyedgeData)
3611436136
if err != cyruntime.cudaSuccess:
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
.. SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
.. module:: cuda.bindings
5+
6+
``cuda-bindings`` 13.3.0 Release notes
7+
======================================
8+
9+
Highlights
10+
----------
11+
12+
13+
Bugfixes
14+
--------
15+
16+
* Fixed a use-after-free in ``cudaGraphGetEdges``, ``cudaGraphNodeGetDependencies``,
17+
``cudaGraphNodeGetDependentNodes``, ``cudaStreamGetCaptureInfo``, and their
18+
driver-API counterparts (``cuGraphGetEdges``, ``cuGraphNodeGetDependencies``,
19+
``cuGraphNodeGetDependentNodes``, ``cuStreamGetCaptureInfo``). The returned
20+
``cudaGraphEdgeData``/``CUgraphEdgeData`` wrappers were backed by a scratch
21+
buffer that was freed before the call returned, leaving every wrapper holding
22+
a dangling pointer. The returned wrappers now own deep copies of the edge
23+
data.
24+
(`Issue #1804 <https://github.com/NVIDIA/cuda-python/issues/1804>`_)
25+
26+
27+
Miscellaneous
28+
-------------
29+
30+
31+
Known issues
32+
------------
33+
34+
* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``.

cuda_bindings/tests/test_cuda.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,87 @@ def test_cuGraphExecGetId(device, ctx):
964964
assert err == cuda.CUresult.CUDA_SUCCESS
965965

966966

967+
def test_cuGraphGetEdges_edgeData_outlives_call(device, ctx):
968+
# Regression test for https://github.com/NVIDIA/cuda-python/issues/1804
969+
# cuGraphGetEdges previously returned CUgraphEdgeData wrappers backed by
970+
# a scratch buffer that was freed before the call returned, leaving the
971+
# wrappers pointing at freed memory. Ensure the returned objects remain
972+
# readable after the call and after subsequent allocations.
973+
err, graph = cuda.cuGraphCreate(0)
974+
assert err == cuda.CUresult.CUDA_SUCCESS
975+
try:
976+
err, n0 = cuda.cuGraphAddEmptyNode(graph, None, 0)
977+
assert err == cuda.CUresult.CUDA_SUCCESS
978+
err, n1 = cuda.cuGraphAddEmptyNode(graph, [n0], 1)
979+
assert err == cuda.CUresult.CUDA_SUCCESS
980+
err, n2 = cuda.cuGraphAddEmptyNode(graph, [n0, n1], 2)
981+
assert err == cuda.CUresult.CUDA_SUCCESS
982+
983+
err, _, _, _, num_edges = cuda.cuGraphGetEdges(graph)
984+
assert err == cuda.CUresult.CUDA_SUCCESS
985+
assert num_edges == 3
986+
err, from_nodes, to_nodes, edge_data, num_edges = cuda.cuGraphGetEdges(graph, num_edges)
987+
assert err == cuda.CUresult.CUDA_SUCCESS
988+
assert len(edge_data) == num_edges == 3
989+
990+
# Stir the heap to make a use-after-free more likely to surface.
991+
for _ in range(64):
992+
err, _, _, _, _ = cuda.cuGraphGetEdges(graph, num_edges)
993+
assert err == cuda.CUresult.CUDA_SUCCESS
994+
err, _, _, _ = cuda.cuGraphNodeGetDependencies(n1, 1)
995+
assert err == cuda.CUresult.CUDA_SUCCESS
996+
997+
# Each wrapper must still own its data.
998+
for ed in edge_data:
999+
assert ed.from_port == 0
1000+
assert ed.to_port == 0
1001+
assert int(ed.type) == 0
1002+
assert ed.reserved == b"\x00" * 5
1003+
finally:
1004+
(err,) = cuda.cuGraphDestroy(graph)
1005+
assert err == cuda.CUresult.CUDA_SUCCESS
1006+
1007+
1008+
def test_cuGraphNodeGetDependencies_edgeData_outlives_call(device, ctx):
1009+
# Companion regression test for #1804 covering the dependency-query path.
1010+
err, graph = cuda.cuGraphCreate(0)
1011+
assert err == cuda.CUresult.CUDA_SUCCESS
1012+
try:
1013+
err, n0 = cuda.cuGraphAddEmptyNode(graph, None, 0)
1014+
assert err == cuda.CUresult.CUDA_SUCCESS
1015+
err, n1 = cuda.cuGraphAddEmptyNode(graph, [n0], 1)
1016+
assert err == cuda.CUresult.CUDA_SUCCESS
1017+
1018+
err, _, _, num_deps = cuda.cuGraphNodeGetDependencies(n1)
1019+
assert err == cuda.CUresult.CUDA_SUCCESS
1020+
assert num_deps == 1
1021+
err, deps, edge_data, num_deps = cuda.cuGraphNodeGetDependencies(n1, num_deps)
1022+
assert err == cuda.CUresult.CUDA_SUCCESS
1023+
assert len(edge_data) == num_deps == 1
1024+
1025+
err, _, _, num_dependents = cuda.cuGraphNodeGetDependentNodes(n0)
1026+
assert err == cuda.CUresult.CUDA_SUCCESS
1027+
assert num_dependents == 1
1028+
err, dependents, dep_edge_data, num_dependents = cuda.cuGraphNodeGetDependentNodes(n0, num_dependents)
1029+
assert err == cuda.CUresult.CUDA_SUCCESS
1030+
assert len(dep_edge_data) == num_dependents == 1
1031+
1032+
for _ in range(64):
1033+
err, _, _, _ = cuda.cuGraphNodeGetDependencies(n1, num_deps)
1034+
assert err == cuda.CUresult.CUDA_SUCCESS
1035+
err, _, _, _ = cuda.cuGraphNodeGetDependentNodes(n0, num_dependents)
1036+
assert err == cuda.CUresult.CUDA_SUCCESS
1037+
1038+
for ed in edge_data + dep_edge_data:
1039+
assert ed.from_port == 0
1040+
assert ed.to_port == 0
1041+
assert int(ed.type) == 0
1042+
assert ed.reserved == b"\x00" * 5
1043+
finally:
1044+
(err,) = cuda.cuGraphDestroy(graph)
1045+
assert err == cuda.CUresult.CUDA_SUCCESS
1046+
1047+
9671048
@pytest.mark.skipif(
9681049
driverVersionLessThan(13010) or not supportsCudaAPI("cuGraphNodeGetLocalId"),
9691050
reason="Requires CUDA 13.1+",

0 commit comments

Comments
 (0)