Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e3c9c4f
Final Linting for all cpp and python files with Workflow
jahnvi480 Nov 17, 2025
c2e32cb
Pushing changes in confest
jahnvi480 Nov 17, 2025
9847708
Final
jahnvi480 Nov 17, 2025
83b011e
Add VS Code extension recommendations for development setup
jahnvi480 Nov 18, 2025
ade9a05
Merge branch 'main' of https://github.com/microsoft/mssql-python into…
jahnvi480 Nov 19, 2025
10744c8
FIX: Encoding Decoding
jahnvi480 Nov 24, 2025
27310da
Python linting issue
jahnvi480 Nov 24, 2025
b1bbd7d
Resolving conflicts
jahnvi480 Nov 25, 2025
6583708
Resolving comments
jahnvi480 Nov 28, 2025
4ad47ca
Resolving comments
jahnvi480 Nov 28, 2025
79706c7
Merge branch 'main' into jahnvi/250_encoding_decoding
jahnvi480 Nov 28, 2025
8f44b1a
resolving conflicy
jahnvi480 Dec 1, 2025
4c01d60
Resolving conflicts
jahnvi480 Dec 1, 2025
fdec610
Changing testcases for linux and mac
jahnvi480 Dec 1, 2025
9439d1e
Resolving conflicts
jahnvi480 Dec 1, 2025
2ed2b8c
Resolving conflicts
jahnvi480 Dec 1, 2025
ca40f5e
Resolving issue on Ubuntu
jahnvi480 Dec 4, 2025
a906837
Merge branch 'main' into jahnvi/250_encoding_decoding
jahnvi480 Dec 4, 2025
c75e672
Resolving issue on Ubuntu
jahnvi480 Dec 4, 2025
e145104
Improving code coverage
jahnvi480 Dec 4, 2025
1d5981b
Resolving comments
jahnvi480 Dec 4, 2025
5b18aa9
Increasing code coverage
jahnvi480 Dec 4, 2025
3b55036
Increasing code coverage
jahnvi480 Dec 4, 2025
2af1138
Increasing code coverage
jahnvi480 Dec 4, 2025
5a4f6b1
Increasing code coverage
jahnvi480 Dec 4, 2025
0368be9
Increasing code coverage
jahnvi480 Dec 5, 2025
f7fd125
Increasing code coverage
jahnvi480 Dec 5, 2025
1eaed2c
Increasing code coverage
jahnvi480 Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 136 additions & 9 deletions mssql_python/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@
INFO_TYPE_STRING_THRESHOLD: int = 10000

# UTF-16 encoding variants that should use SQL_WCHAR by default
UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16", "utf-16le", "utf-16be"])
# Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16le", "utf-16be"])

# Valid encoding characters (alphanumeric, dash, underscore only)
import string

VALID_ENCODING_CHARS: frozenset[str] = frozenset(string.ascii_letters + string.digits + "-_")


def _validate_encoding(encoding: str) -> bool:
Expand All @@ -70,7 +76,17 @@ def _validate_encoding(encoding: str) -> bool:
Note:
Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
Cache size is limited to 128 entries which should cover most use cases.
Also validates that encoding name only contains safe characters.
"""
# First check for dangerous characters (security validation)
if not all(c in VALID_ENCODING_CHARS for c in encoding):
return False

# Check length limit (prevent DOS)
if len(encoding) > 100:
return False

# Then check if it's a valid Python codec
try:
codecs.lookup(encoding)
return True
Expand Down Expand Up @@ -227,6 +243,11 @@ def __init__(
self._output_converters = {}
self._converters_lock = threading.Lock()

# Initialize encoding/decoding settings lock for thread safety
# This lock protects both _encoding_settings and _decoding_settings dictionaries
# to prevent race conditions when multiple threads are reading/writing encoding settings
self._encoding_lock = threading.RLock() # RLock allows recursive locking

# Initialize search escape character
self._searchescape = None

Expand Down Expand Up @@ -430,6 +451,20 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
encoding = encoding.casefold()
logger.debug("setencoding: Encoding normalized to %s", encoding)

# Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
if encoding == "utf-16" and ctype == ConstantsDDBC.SQL_WCHAR.value:
logger.debug(
"warning",
"utf-16 with BOM rejected for SQL_WCHAR",
)
raise ProgrammingError(
driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
ddbc_error=(
"Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
"Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
),
)

# Set default ctype based on encoding if not provided
if ctype is None:
if encoding in UTF16_ENCODINGS:
Expand All @@ -456,8 +491,33 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
),
)

# Store the encoding settings
self._encoding_settings = {"encoding": encoding, "ctype": ctype}
# Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
if ctype == ConstantsDDBC.SQL_WCHAR.value:
if encoding == "utf-16":
raise ProgrammingError(
driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
ddbc_error=(
"Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
"Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
),
)
elif encoding not in UTF16_ENCODINGS:
logger.debug(
"warning",
"Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype",
sanitize_user_input(encoding),
)
raise ProgrammingError(
driver_error=f"SQL_WCHAR only supports UTF-16 encodings",
ddbc_error=(
f"Cannot use encoding '{encoding}' with SQL_WCHAR. "
f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
),
)

# Store the encoding settings (thread-safe with lock)
with self._encoding_lock:
self._encoding_settings = {"encoding": encoding, "ctype": ctype}

# Log with sanitized values for security
logger.debug(
Expand All @@ -469,7 +529,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non

def getencoding(self) -> Dict[str, Union[str, int]]:
"""
Gets the current text encoding settings.
Gets the current text encoding settings (thread-safe).

Returns:
dict: A dictionary containing 'encoding' and 'ctype' keys.
Expand All @@ -481,14 +541,19 @@ def getencoding(self) -> Dict[str, Union[str, int]]:
settings = cnxn.getencoding()
print(f"Current encoding: {settings['encoding']}")
print(f"Current ctype: {settings['ctype']}")

Note:
This method is thread-safe and can be called from multiple threads concurrently.
"""
if self._closed:
raise InterfaceError(
driver_error="Connection is closed",
ddbc_error="Connection is closed",
)

return self._encoding_settings.copy()
# Thread-safe read with lock to prevent race conditions
with self._encoding_lock:
return self._encoding_settings.copy()

def setdecoding(
self, sqltype: int, encoding: Optional[str] = None, ctype: Optional[int] = None
Expand Down Expand Up @@ -575,6 +640,38 @@ def setdecoding(
# Normalize encoding to lowercase for consistency
encoding = encoding.lower()

# Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
if sqltype == ConstantsDDBC.SQL_WCHAR.value and encoding == "utf-16":
logger.debug(
"warning",
"utf-16 with BOM rejected for SQL_WCHAR",
)
raise ProgrammingError(
driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
ddbc_error=(
"Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
"Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
),
)

# Validate SQL_WCHAR only supports UTF-16 encodings (SQL_WMETADATA is more flexible)
if sqltype == ConstantsDDBC.SQL_WCHAR.value and encoding not in UTF16_ENCODINGS:
logger.debug(
"warning",
"Non-UTF-16 encoding %s attempted with SQL_WCHAR sqltype",
sanitize_user_input(encoding),
)
raise ProgrammingError(
driver_error=f"SQL_WCHAR only supports UTF-16 encodings",
ddbc_error=(
f"Cannot use encoding '{encoding}' with SQL_WCHAR. "
f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
),
)

# SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
# No restriction needed here - let users configure as needed

# Set default ctype based on encoding if not provided
if ctype is None:
if encoding in UTF16_ENCODINGS:
Expand All @@ -598,8 +695,33 @@ def setdecoding(
),
)

# Store the decoding settings for the specified sqltype
self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}
# Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
if ctype == ConstantsDDBC.SQL_WCHAR.value:
if encoding == "utf-16":
raise ProgrammingError(
driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
ddbc_error=(
"Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
"Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
),
)
elif encoding not in UTF16_ENCODINGS:
logger.debug(
"warning",
"Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype",
sanitize_user_input(encoding),
)
raise ProgrammingError(
driver_error=f"SQL_WCHAR ctype only supports UTF-16 encodings",
ddbc_error=(
f"Cannot use encoding '{encoding}' with SQL_WCHAR ctype. "
f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
),
)

# Store the decoding settings for the specified sqltype (thread-safe with lock)
with self._encoding_lock:
self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}

# Log with sanitized values for security
sqltype_name = {
Expand All @@ -618,7 +740,7 @@ def setdecoding(

def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
"""
Gets the current text decoding settings for the specified SQL type.
Gets the current text decoding settings for the specified SQL type (thread-safe).

Args:
sqltype (int): The SQL type to get settings for: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
Expand All @@ -634,6 +756,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
settings = cnxn.getdecoding(mssql_python.SQL_CHAR)
print(f"SQL_CHAR encoding: {settings['encoding']}")
print(f"SQL_CHAR ctype: {settings['ctype']}")

Note:
This method is thread-safe and can be called from multiple threads concurrently.
"""
if self._closed:
raise InterfaceError(
Expand All @@ -657,7 +782,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
),
)

return self._decoding_settings[sqltype].copy()
# Thread-safe read with lock to prevent race conditions
with self._encoding_lock:
return self._decoding_settings[sqltype].copy()

def set_attr(self, attribute: int, value: Union[int, str, bytes, bytearray]) -> None:
"""
Expand Down
Loading
Loading