Skip to content

Commit 73ab82f

Browse files
authored
Fix bug 5811173: update cufile tests and configuration (#1821)
* Fix bug 5811173: update cufile tests and configuration test_cufile.py: skip compat bool params in set_get_parameter_bool Avoid setting allow_compat_mode/force_compat_mode before driver_open; pending values can be applied on first open and interact badly with cufile.json when nvidia-fs is not loaded (DRIVER_NOT_INITIALIZED). Compat behavior remains covered elsewhere. cufile.json: Set allow_compat_mode to true * cufile tests: snapshot parameter baselines after driver_open Open driver once to read size_t/bool/string originals, then close before set/get/restore round-trips so pending does not restore invalid pre-open values (e.g. per-buffer cache 0). Aligns with review feedback. * test(cufile): skip PROFILE_NVTX in bool param snapshot on cuFile >= 1.16 * test(cufile): factor driver open/close into _cufile_driver_session context manager * test(cufile): use cufile_env_json for parameter round-trip tests Attach cufile_env_json fixture to size_t, bool, and string parameter tests so compat-related bools can be exercised before first driver_open without relying on skipping allow_compat_mode/force_compat_mode on systems without nvidia-fs (Ralf's patch).
1 parent 41555d7 commit 73ab82f

2 files changed

Lines changed: 43 additions & 16 deletions

File tree

cuda_bindings/tests/cufile.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
// e.g : export CUFILE_ENV_PATH_JSON="/home/<xxx>/cufile.json"
44

55

6+
"properties" : {
7+
"allow_compat_mode" : true
8+
},
9+
610
"execution" : {
711
// max number of workitems in the queue;
812
"max_io_queue_depth": 128,

cuda_bindings/tests/test_cufile.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import platform
1010
import subprocess
1111
import tempfile
12-
from contextlib import suppress
12+
from contextlib import contextmanager, suppress
1313
from functools import cache
1414

1515
import pytest
@@ -28,6 +28,16 @@
2828
cufile = pytest.importorskip("cuda.bindings.cufile", reason="skipping tests on Windows")
2929

3030

31+
@contextmanager
32+
def _cufile_driver_session():
33+
"""Open the cuFile driver for a block; always close in a finally (mirrors try/finally)."""
34+
cufile.driver_open()
35+
try:
36+
yield
37+
finally:
38+
cufile.driver_close()
39+
40+
3141
@pytest.fixture
3242
def cufile_env_json(monkeypatch):
3343
"""Set CUFILE_ENV_PATH_JSON environment variable for async tests."""
@@ -1422,7 +1432,7 @@ def test_batch_io_large_operations():
14221432
@pytest.mark.skipif(
14231433
cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later"
14241434
)
1425-
@pytest.mark.usefixtures("ctx")
1435+
@pytest.mark.usefixtures("ctx", "cufile_env_json")
14261436
def test_set_get_parameter_size_t():
14271437
"""Test setting and getting size_t parameters with cuFile validation."""
14281438
param_val_pairs = (
@@ -1439,8 +1449,13 @@ def test_set_get_parameter_size_t():
14391449
(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, 4), # Max 4 parallel requests
14401450
)
14411451

1452+
# Snapshot baselines after driver_open so getters reflect merged config (defaults + JSON),
1453+
# not pre-open pending state that could restore invalid values (e.g. 0 for per-buffer cache).
1454+
with _cufile_driver_session():
1455+
originals = {param: cufile.get_parameter_size_t(param) for param, _ in param_val_pairs}
1456+
14421457
def test_param(param, val):
1443-
orig_val = cufile.get_parameter_size_t(param)
1458+
orig_val = originals[param]
14441459
cufile.set_parameter_size_t(param, val)
14451460
retrieved_val = cufile.get_parameter_size_t(param)
14461461
assert retrieved_val == val
@@ -1454,9 +1469,11 @@ def test_param(param, val):
14541469
@pytest.mark.skipif(
14551470
cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later"
14561471
)
1457-
@pytest.mark.usefixtures("ctx")
1472+
@pytest.mark.usefixtures("ctx", "cufile_env_json")
14581473
def test_set_get_parameter_bool():
14591474
"""Test setting and getting boolean parameters with cuFile validation."""
1475+
# Load the compat-enabled test config before the first driver_open so the compat
1476+
# bool params can still be round-tripped on systems without nvidia-fs.
14601477
param_val_pairs = (
14611478
(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True),
14621479
(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False),
@@ -1471,28 +1488,29 @@ def test_set_get_parameter_bool():
14711488
(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False),
14721489
(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True),
14731490
)
1491+
# PROFILE_NVTX is deprecated (CTK 13.1.0+); cuFile >= 1.16 rejects bool getters for it.
1492+
if cufile.get_version() >= 1160:
1493+
param_val_pairs = tuple((p, v) for p, v in param_val_pairs if p is not cufile.BoolConfigParameter.PROFILE_NVTX)
1494+
1495+
with _cufile_driver_session():
1496+
originals = {param: cufile.get_parameter_bool(param) for param, _ in param_val_pairs}
14741497

14751498
def test_param(param, val):
1476-
orig_val = cufile.get_parameter_bool(param)
1499+
orig_val = originals[param]
14771500
cufile.set_parameter_bool(param, val)
14781501
retrieved_val = cufile.get_parameter_bool(param)
14791502
assert retrieved_val is val
14801503
cufile.set_parameter_bool(param, orig_val)
14811504

1482-
try:
1483-
# Test setting and getting various boolean parameters
1484-
for param, val in param_val_pairs:
1485-
test_param(param, val)
1486-
except cufile.cuFileError:
1487-
if cufile.get_version() < 1160:
1488-
raise
1489-
assert param is cufile.BoolConfigParameter.PROFILE_NVTX # Deprecated in CTK 13.1.0
1505+
# Test setting and getting various boolean parameters
1506+
for param, val in param_val_pairs:
1507+
test_param(param, val)
14901508

14911509

14921510
@pytest.mark.skipif(
14931511
cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later"
14941512
)
1495-
@pytest.mark.usefixtures("ctx")
1513+
@pytest.mark.usefixtures("ctx", "cufile_env_json")
14961514
def test_set_get_parameter_string(tmp_path):
14971515
"""Test setting and getting string parameters with cuFile validation."""
14981516
temp_dir = tempfile.gettempdir()
@@ -1513,8 +1531,11 @@ def test_set_get_parameter_string(tmp_path):
15131531
), # Test log directory
15141532
)
15151533

1534+
with _cufile_driver_session():
1535+
originals = {param: cufile.get_parameter_string(param, 256) for param, _, _ in param_val_pairs}
1536+
15161537
def test_param(param, val, default_val):
1517-
orig_val = cufile.get_parameter_string(param, 256)
1538+
orig_val = originals[param]
15181539

15191540
val_b = val.encode("utf-8")
15201541
val_buf = ctypes.create_string_buffer(val_b)
@@ -1951,7 +1972,9 @@ def test_set_parameter_posix_pool_slab_array(slab_sizes, slab_counts, driver_con
19511972
retrieved_sizes_addr = ctypes.addressof(retrieved_sizes)
19521973
retrieved_counts_addr = ctypes.addressof(retrieved_counts)
19531974

1954-
cufile.get_parameter_posix_pool_slab_array(retrieved_sizes_addr, retrieved_counts_addr, n_slab_sizes)
1975+
# Open cuFile driver AFTER setting parameters
1976+
with _cufile_driver_session():
1977+
cufile.get_parameter_posix_pool_slab_array(retrieved_sizes_addr, retrieved_counts_addr, n_slab_sizes)
19551978

19561979
# Verify they match what we set
19571980
assert list(retrieved_sizes) == slab_sizes

0 commit comments

Comments
 (0)