Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions mssql_python/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,21 +1198,21 @@ def getinfo(self, info_type: int) -> Union[str, int, bool, None]:
# Make sure we use the correct amount of data based on length
actual_data = data[:length]

# Now decode the string data
try:
return actual_data.decode("utf-8").rstrip("\0")
except UnicodeDecodeError:
# SQLGetInfoW returns UTF-16LE encoded strings
# Try encodings in order: UTF-16LE (Windows), UTF-8, Latin-1
for encoding in ("utf-16-le", "utf-8", "latin1"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why latin1 ? :) Asking out of curiosity. IIUC, utf-16-le and utf-8 might be sufficient. If you have other reasons to add latin1, it would be good to know

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, I had included this in code as some kind of fallback strategy - however the expected encoding is UTF-16LE, and UTF-8 is a reasonable fallback for edge cases where data might come pre-decoded
Better to remove it. I'll make this change. Thanks for the suggestion!

try:
return actual_data.decode("latin1").rstrip("\0")
except Exception as e:
logger.debug(
"error",
"Failed to decode string in getinfo: %s. "
"Returning None to avoid silent corruption.",
e,
)
# Explicitly return None to signal decoding failure
return None
return actual_data.decode(encoding).rstrip("\0")
except UnicodeDecodeError:
continue

# All decodings failed
logger.debug(
"error",
"Failed to decode string in getinfo with any supported encoding. "
"Returning None to avoid silent corruption.",
)
return None
else:
# If it's not bytes, return as is
return data
Expand Down
16 changes: 0 additions & 16 deletions mssql_python/pybind/build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -157,22 +157,6 @@ if exist "%OUTPUT_DIR%\%PYD_NAME%" (
echo [WARNING] PDB file !PDB_NAME! not found in output directory.
)

setlocal enabledelayedexpansion
for %%I in ("%SOURCE_DIR%..") do (
set PARENT_DIR=%%~fI
)
echo [DIAGNOSTIC] Parent is: !PARENT_DIR!

set VCREDIST_DLL_PATH=!PARENT_DIR!\libs\windows\!ARCH!\vcredist\msvcp140.dll
echo [DIAGNOSTIC] Looking for msvcp140.dll at "!VCREDIST_DLL_PATH!"

if exist "!VCREDIST_DLL_PATH!" (
copy /Y "!VCREDIST_DLL_PATH!" "%SOURCE_DIR%\.."
echo [SUCCESS] Copied msvcp140.dll from !VCREDIST_DLL_PATH! to "%SOURCE_DIR%\.."
) else (
echo [ERROR] Could not find msvcp140.dll at "!VCREDIST_DLL_PATH!"
exit /b 1
)
) else (
echo [ERROR] Could not find built .pyd file: %PYD_NAME%
REM Exit with an error code here if the .pyd file is not found
Expand Down
27 changes: 27 additions & 0 deletions tests/test_003_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5471,6 +5471,33 @@ def test_getinfo_basic_driver_info(db_connection):
pytest.fail(f"getinfo failed for basic driver info: {e}")


def test_getinfo_string_encoding_utf16(db_connection):
"""Test that string values from getinfo are properly decoded from UTF-16."""

# Test string info types that should not contain null bytes
string_info_types = [
("SQL_DRIVER_VER", sql_const.SQL_DRIVER_VER.value),
("SQL_DRIVER_NAME", sql_const.SQL_DRIVER_NAME.value),
("SQL_DRIVER_ODBC_VER", sql_const.SQL_DRIVER_ODBC_VER.value),
("SQL_SERVER_NAME", sql_const.SQL_SERVER_NAME.value),
]

for name, info_type in string_info_types:
result = db_connection.getinfo(info_type)

if result is not None:
# Verify it's a string
assert isinstance(result, str), \
f"{name}: Expected str, got {type(result).__name__}"

# Verify no null bytes (indicates UTF-16 decoded as UTF-8 bug)
assert '\x00' not in result, \
f"{name} contains null bytes, likely UTF-16/UTF-8 encoding mismatch: {repr(result)}"

# Verify it's not empty (optional, but good sanity check)
assert len(result) > 0, f"{name} returned empty string"


def test_getinfo_sql_support(db_connection):
"""Test SQL support and conformance info types."""

Expand Down
Loading