Skip to content

Commit e1eabf8

Browse files
committed
fix tests that have side effects & bugs; remove skip
1 parent 12f4ad0 commit e1eabf8

File tree

1 file changed

+99
-211
lines changed

1 file changed

+99
-211
lines changed

cuda_bindings/tests/test_cufile.py

Lines changed: 99 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,14 @@ def cufile_env_json():
5050
logging.info(f"Using cuFile config: {config_path}")
5151
assert os.path.isfile(config_path)
5252
os.environ["CUFILE_ENV_PATH_JSON"] = config_path
53+
5354
yield
5455

5556
# Restore original value or remove if it wasn't set
5657
if original_value is not None:
5758
os.environ["CUFILE_ENV_PATH_JSON"] = original_value
5859
else:
59-
os.environ.pop("CUFILE_ENV_PATH_JSON", None)
60+
del os.environ["CUFILE_ENV_PATH_JSON"]
6061

6162

6263
@cache
@@ -1643,94 +1644,31 @@ def test_set_get_parameter_size_t():
16431644
(err,) = cuda.cuCtxSetCurrent(ctx)
16441645
assert err == cuda.CUresult.CUDA_SUCCESS
16451646

1646-
try:
1647-
# Test setting and getting various size_t parameters
1648-
1649-
# Test poll threshold size (in KB)
1650-
poll_threshold_kb = 64 # 64KB threshold
1651-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, poll_threshold_kb)
1652-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB)
1653-
assert retrieved_value == poll_threshold_kb, (
1654-
f"Poll threshold mismatch: set {poll_threshold_kb}, got {retrieved_value}"
1655-
)
1656-
1657-
# Test max direct IO size (in KB)
1658-
max_direct_io_kb = 1024 # 1MB max direct IO size
1659-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, max_direct_io_kb)
1660-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB)
1661-
assert retrieved_value == max_direct_io_kb, (
1662-
f"Max direct IO size mismatch: set {max_direct_io_kb}, got {retrieved_value}"
1663-
)
1664-
1665-
# Test max device cache size (in KB)
1666-
max_cache_kb = 512 # 512KB max cache size
1667-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, max_cache_kb)
1668-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB)
1669-
assert retrieved_value == max_cache_kb, f"Max cache size mismatch: set {max_cache_kb}, got {retrieved_value}"
1670-
1671-
# Test per buffer cache size (in KB)
1672-
per_buffer_cache_kb = 128 # 128KB per buffer cache
1673-
cufile.set_parameter_size_t(
1674-
cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, per_buffer_cache_kb
1675-
)
1676-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB)
1677-
assert retrieved_value == per_buffer_cache_kb, (
1678-
f"Per buffer cache size mismatch: set {per_buffer_cache_kb}, got {retrieved_value}"
1679-
)
1680-
1681-
# Test max device pinned memory size (in KB)
1682-
max_pinned_kb = 2048 # 2MB max pinned memory
1683-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, max_pinned_kb)
1684-
retrieved_value = cufile.get_parameter_size_t(
1685-
cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB
1686-
)
1687-
assert retrieved_value == max_pinned_kb, (
1688-
f"Max pinned memory size mismatch: set {max_pinned_kb}, got {retrieved_value}"
1689-
)
1690-
1691-
# Test IO batch size
1692-
batch_size = 16 # 16 operations per batch
1693-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, batch_size)
1694-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE)
1695-
assert retrieved_value == batch_size, f"IO batch size mismatch: set {batch_size}, got {retrieved_value}"
1696-
1697-
# Test batch IO timeout (in milliseconds)
1698-
timeout_ms = 5000 # 5 second timeout
1699-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, timeout_ms)
1700-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS)
1701-
assert retrieved_value == timeout_ms, f"Batch IO timeout mismatch: set {timeout_ms}, got {retrieved_value}"
1702-
1703-
# Test execution parameters
1704-
max_io_queue_depth = 32 # Max 32 operations in queue
1705-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, max_io_queue_depth)
1706-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH)
1707-
assert retrieved_value == max_io_queue_depth, (
1708-
f"Max IO queue depth mismatch: set {max_io_queue_depth}, got {retrieved_value}"
1709-
)
1710-
1711-
max_io_threads = 8 # Max 8 IO threads
1712-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, max_io_threads)
1713-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS)
1714-
assert retrieved_value == max_io_threads, (
1715-
f"Max IO threads mismatch: set {max_io_threads}, got {retrieved_value}"
1716-
)
1717-
1718-
min_io_threshold_kb = 4 # 4KB minimum IO threshold
1719-
cufile.set_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, min_io_threshold_kb)
1720-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB)
1721-
assert retrieved_value == min_io_threshold_kb, (
1722-
f"Min IO threshold mismatch: set {min_io_threshold_kb}, got {retrieved_value}"
1723-
)
1647+
param_val_pairs = (
1648+
(cufile.SizeTConfigParameter.POLLTHRESHOLD_SIZE_KB, 64), # 64KB threshold
1649+
(cufile.SizeTConfigParameter.PROPERTIES_MAX_DIRECT_IO_SIZE_KB, 1024), # 1MB max direct IO size
1650+
(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_CACHE_SIZE_KB, 512), # 512KB max cache size
1651+
(cufile.SizeTConfigParameter.PROPERTIES_PER_BUFFER_CACHE_SIZE_KB, 128), # 128KB per buffer cache
1652+
(cufile.SizeTConfigParameter.PROPERTIES_MAX_DEVICE_PINNED_MEM_SIZE_KB, 2048), # 2MB max pinned memory
1653+
(cufile.SizeTConfigParameter.PROPERTIES_IO_BATCHSIZE, 16), # 16 operations per batch
1654+
(cufile.SizeTConfigParameter.PROPERTIES_BATCH_IO_TIMEOUT_MS, 5000), # 5 second timeout
1655+
(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_QUEUE_DEPTH, 32), # Max 32 operations in queue
1656+
(cufile.SizeTConfigParameter.EXECUTION_MAX_IO_THREADS, 8), # Max 8 IO threads
1657+
(cufile.SizeTConfigParameter.EXECUTION_MIN_IO_THRESHOLD_SIZE_KB, 4), # 4KB minimum IO threshold
1658+
(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, 4), # Max 4 parallel requests
1659+
)
17241660

1725-
max_request_parallelism = 4 # Max 4 parallel requests
1726-
cufile.set_parameter_size_t(
1727-
cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM, max_request_parallelism
1728-
)
1729-
retrieved_value = cufile.get_parameter_size_t(cufile.SizeTConfigParameter.EXECUTION_MAX_REQUEST_PARALLELISM)
1730-
assert retrieved_value == max_request_parallelism, (
1731-
f"Max request parallelism mismatch: set {max_request_parallelism}, got {retrieved_value}"
1732-
)
1661+
def test_param(param, val):
1662+
orig_val = cufile.get_parameter_size_t(param)
1663+
cufile.set_parameter_size_t(param, val)
1664+
retrieved_val = cufile.get_parameter_size_t(param)
1665+
assert retrieved_val == val
1666+
cufile.set_parameter_size_t(param, orig_val)
17331667

1668+
try:
1669+
# Test setting and getting various size_t parameters
1670+
for param, val in param_val_pairs:
1671+
test_param(param, val)
17341672
finally:
17351673
cuda.cuDevicePrimaryCtxRelease(device)
17361674

@@ -1753,77 +1691,40 @@ def test_set_get_parameter_bool():
17531691
(err,) = cuda.cuCtxSetCurrent(ctx)
17541692
assert err == cuda.CUresult.CUDA_SUCCESS
17551693

1756-
try:
1757-
# Test setting and getting various boolean parameters
1694+
param_val_pairs = (
1695+
(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True),
1696+
(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False),
1697+
(cufile.BoolConfigParameter.FORCE_COMPAT_MODE, False),
1698+
(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE, True),
1699+
(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO, True),
1700+
(cufile.BoolConfigParameter.PROFILE_NVTX, False),
1701+
(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY, True),
1702+
(cufile.BoolConfigParameter.USE_PCIP2PDMA, True),
1703+
(cufile.BoolConfigParameter.PREFER_IO_URING, False),
1704+
(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE, True),
1705+
(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False),
1706+
(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True),
1707+
)
17581708

1759-
# Test poll mode
1760-
cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE, True)
1761-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_USE_POLL_MODE)
1762-
assert retrieved_value is True, f"Poll mode mismatch: set True, got {retrieved_value}"
1763-
1764-
# Test compatibility mode
1765-
cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE, False)
1766-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_COMPAT_MODE)
1767-
assert retrieved_value is False, f"Compatibility mode mismatch: set False, got {retrieved_value}"
1768-
1769-
# Test force compatibility mode
1770-
cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE, False)
1771-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_COMPAT_MODE)
1772-
assert retrieved_value is False, f"Force compatibility mode mismatch: set False, got {retrieved_value}"
1773-
1774-
# Test aggressive API check
1775-
cufile.set_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE, True)
1776-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FS_MISC_API_CHECK_AGGRESSIVE)
1777-
assert retrieved_value is True, f"Aggressive API check mismatch: set True, got {retrieved_value}"
1778-
1779-
# Test parallel IO
1780-
cufile.set_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO, True)
1781-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.EXECUTION_PARALLEL_IO)
1782-
assert retrieved_value is True, f"Parallel IO mismatch: set True, got {retrieved_value}"
1783-
1784-
# Test NVTX profiling
1785-
cufile.set_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX, False)
1786-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROFILE_NVTX)
1787-
assert retrieved_value is False, f"NVTX profiling mismatch: set False, got {retrieved_value}"
1788-
1789-
# Test system memory allowance
1790-
cufile.set_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY, True)
1791-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PROPERTIES_ALLOW_SYSTEM_MEMORY)
1792-
assert retrieved_value is True, f"System memory allowance mismatch: set True, got {retrieved_value}"
1793-
1794-
# Test PCI P2P DMA
1795-
cufile.set_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA, True)
1796-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.USE_PCIP2PDMA)
1797-
assert retrieved_value is True, f"PCI P2P DMA mismatch: set True, got {retrieved_value}"
1798-
1799-
# Test IO uring preference
1800-
cufile.set_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING, False)
1801-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.PREFER_IO_URING)
1802-
assert retrieved_value is False, f"IO uring preference mismatch: set False, got {retrieved_value}"
1803-
1804-
# Test force O_DIRECT mode
1805-
cufile.set_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE, True)
1806-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.FORCE_ODIRECT_MODE)
1807-
assert retrieved_value is True, f"Force O_DIRECT mode mismatch: set True, got {retrieved_value}"
1808-
1809-
# Test topology detection skip
1810-
cufile.set_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION, False)
1811-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.SKIP_TOPOLOGY_DETECTION)
1812-
assert retrieved_value is False, f"Topology detection skip mismatch: set False, got {retrieved_value}"
1813-
1814-
# Test stream memops bypass
1815-
cufile.set_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS, True)
1816-
retrieved_value = cufile.get_parameter_bool(cufile.BoolConfigParameter.STREAM_MEMOPS_BYPASS)
1817-
assert retrieved_value is True, f"Stream memops bypass mismatch: set True, got {retrieved_value}"
1709+
def test_param(param, val):
1710+
orig_val = cufile.get_parameter_bool(param)
1711+
cufile.set_parameter_bool(param, val)
1712+
retrieved_val = cufile.get_parameter_bool(param)
1713+
assert retrieved_val is val
1714+
cufile.set_parameter_bool(param, orig_val)
18181715

1716+
try:
1717+
# Test setting and getting various boolean parameters
1718+
for param, val in param_val_pairs:
1719+
test_param(param, val)
18191720
finally:
18201721
cuda.cuDevicePrimaryCtxRelease(device)
18211722

18221723

18231724
@pytest.mark.skipif(
18241725
cufileVersionLessThan(1140), reason="cuFile parameter APIs require cuFile library version 1.14.0 or later"
18251726
)
1826-
def test_set_get_parameter_string():
1727+
def test_set_get_parameter_string(tmp_path):
18271728
"""Test setting and getting string parameters with cuFile validation."""
18281729

18291730
# Initialize CUDA
@@ -1838,75 +1739,62 @@ def test_set_get_parameter_string():
18381739
(err,) = cuda.cuCtxSetCurrent(ctx)
18391740
assert err == cuda.CUresult.CUDA_SUCCESS
18401741

1841-
try:
1842-
# Test setting and getting various string parameters
1843-
# Note: String parameter tests may have issues with the current implementation
1844-
1845-
# Test logging level
1846-
logging_level = "INFO"
1847-
try:
1848-
# Convert Python string to null-terminated C string
1849-
logging_level_bytes = logging_level.encode("utf-8") + b"\x00"
1850-
logging_level_buffer = ctypes.create_string_buffer(logging_level_bytes)
1851-
cufile.set_parameter_string(
1852-
cufile.StringConfigParameter.LOGGING_LEVEL, int(ctypes.addressof(logging_level_buffer))
1853-
)
1854-
retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOGGING_LEVEL, 256)
1855-
# Use safe_decode_string to handle null terminators and padding
1856-
retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8"))
1857-
logging.info(f"Logging level test: set {logging_level}, got {retrieved_value}")
1858-
# The retrieved value should be a string, so we can compare directly
1859-
assert retrieved_value == logging_level, (
1860-
f"Logging level mismatch: set {logging_level}, got {retrieved_value}"
1861-
)
1862-
except Exception as e:
1863-
logging.error(f"Logging level test failed: {e}")
1864-
# Re-raise the exception to make the test fail
1865-
raise
1866-
1867-
# Test environment log file path
1868-
logfile_path = tempfile.gettempdir() + "/cufile.log"
1869-
try:
1870-
# Convert Python string to null-terminated C string
1871-
logfile_path_bytes = logfile_path.encode("utf-8") + b"\x00"
1872-
logfile_buffer = ctypes.create_string_buffer(logfile_path_bytes)
1873-
cufile.set_parameter_string(
1874-
cufile.StringConfigParameter.ENV_LOGFILE_PATH, int(ctypes.addressof(logfile_buffer))
1875-
)
1876-
retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.ENV_LOGFILE_PATH, 256)
1877-
# Use safe_decode_string to handle null terminators and padding
1878-
retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8"))
1879-
logging.info(f"Log file path test: set {logfile_path}, got {retrieved_value}")
1880-
# The retrieved value should be a string, so we can compare directly
1881-
assert retrieved_value == logfile_path, f"Log file path mismatch: set {logfile_path}, got {retrieved_value}"
1882-
except Exception as e:
1883-
logging.error(f"Log file path test failed: {e}")
1884-
# Re-raise the exception to make the test fail
1885-
raise
1742+
temp_dir = tempfile.gettempdir()
1743+
# must be set to avoid getter error when testing ENV_LOGFILE_PATH...
1744+
os.environ["CUFILE_LOGFILE_PATH"] = ""
1745+
1746+
param_val_pairs = (
1747+
(cufile.StringConfigParameter.LOGGING_LEVEL, "INFO", "DEBUG"), # Test logging level
1748+
(
1749+
cufile.StringConfigParameter.ENV_LOGFILE_PATH,
1750+
os.path.join(temp_dir, "cufile.log"),
1751+
str(tmp_path / "cufile.log"),
1752+
), # Test environment log file path
1753+
(
1754+
cufile.StringConfigParameter.LOG_DIR,
1755+
os.path.join(temp_dir, "cufile_logs"),
1756+
str(tmp_path),
1757+
), # Test log directory
1758+
)
18861759

1887-
# Test log directory
1888-
log_dir = tempfile.gettempdir() + "/cufile_logs"
1760+
def test_param(param, val, default_val):
1761+
orig_val = cufile.get_parameter_string(param, 256)
1762+
# Use safe_decode_string to handle null terminators and padding
1763+
orig_val = safe_decode_string(orig_val.encode("utf-8"))
1764+
1765+
val_b = val.encode("utf-8")
1766+
val_buf = ctypes.create_string_buffer(val_b)
1767+
default_val_b = default_val.encode("utf-8")
1768+
defualt_val_buf = ctypes.create_string_buffer(default_val_b)
1769+
orig_val_b = orig_val.encode("utf-8")
1770+
orig_val_buf = ctypes.create_string_buffer(orig_val_b)
1771+
1772+
# Round-trip test
1773+
cufile.set_parameter_string(param, int(ctypes.addressof(val_buf)))
1774+
retrieved_val = cufile.get_parameter_string(param, 256)
1775+
retrieved_val = safe_decode_string(retrieved_val.encode("utf-8"))
1776+
assert retrieved_val == val
1777+
1778+
# Restore
18891779
try:
1890-
# Convert Python string to null-terminated C string
1891-
log_dir_bytes = log_dir.encode("utf-8") + b"\x00"
1892-
log_dir_buffer = ctypes.create_string_buffer(log_dir_bytes)
1893-
cufile.set_parameter_string(cufile.StringConfigParameter.LOG_DIR, int(ctypes.addressof(log_dir_buffer)))
1894-
retrieved_value_raw = cufile.get_parameter_string(cufile.StringConfigParameter.LOG_DIR, 256)
1895-
# Use safe_decode_string to handle null terminators and padding
1896-
retrieved_value = safe_decode_string(retrieved_value_raw.encode("utf-8"))
1897-
logging.info(f"Log directory test: set {log_dir}, got {retrieved_value}")
1898-
# The retrieved value should be a string, so we can compare directly
1899-
assert retrieved_value == log_dir, f"Log directory mismatch: set {log_dir}, got {retrieved_value}"
1900-
except Exception as e:
1901-
logging.error(f"Log directory test failed: {e}")
1902-
# Re-raise the exception to make the test fail
1903-
raise
1780+
# Currently this line will raise, see below.
1781+
cufile.set_parameter_string(param, int(ctypes.addressof(orig_val_buf)))
1782+
except:
1783+
# This block will always be reached because cuFILE could start with garbage default (empty string)
1784+
# that cannot be restored. In other words, cuFILE does honor the common sense that getter/setter
1785+
# should be round-tripable.
1786+
cufile.set_parameter_string(param, int(ctypes.addressof(defualt_val_buf)))
19041787

1788+
try:
1789+
# Test setting and getting various string parameters
1790+
# Note: String parameter tests may have issues with the current implementation
1791+
for param, val, default_val in param_val_pairs:
1792+
test_param(param, val, default_val)
19051793
finally:
1794+
del os.environ["CUFILE_LOGFILE_PATH"]
19061795
cuda.cuDevicePrimaryCtxRelease(device)
19071796

19081797

1909-
@pytest.mark.skip("TODO: Failing in CI for an unknown reason. See #1147")
19101798
@pytest.mark.skipif(
19111799
cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later"
19121800
)

0 commit comments

Comments
 (0)