Skip to content

Commit ac56363

Browse files
author
subrata-ms
committed
unicode char fix for windows
1 parent 0eecf67 commit ac56363

File tree

1 file changed

+66
-27
lines changed

1 file changed

+66
-27
lines changed

tests/test_002_types.py

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -808,8 +808,21 @@ def test_utf8_2byte_sequence_complete_coverage():
808808
3. Lines 486-487: Overlong encoding rejection
809809
"""
810810
import mssql_python
811+
import sys
811812

812-
print("\n=== Testing 2-byte UTF-8 Sequence Handler (lines 473-488) ===\n")
813+
# Helper to safely print on Windows console
814+
def safe_print(msg):
815+
try:
816+
print(msg)
817+
except UnicodeEncodeError:
818+
# Fallback for Windows console encoding issues
819+
print(
820+
msg.encode(sys.stdout.encoding or "ascii", errors="backslashreplace").decode(
821+
sys.stdout.encoding or "ascii"
822+
)
823+
)
824+
825+
safe_print("\n=== Testing 2-byte UTF-8 Sequence Handler (lines 473-488) ===\n")
813826

814827
# TEST 1: Lines 475-478 - Invalid continuation byte detection
815828
# Condition: (data[i + 1] & 0xC0) != 0x80
@@ -825,7 +838,7 @@ def test_utf8_2byte_sequence_complete_coverage():
825838

826839
for test_bytes, binary, desc in invalid_continuation:
827840
result = test_bytes.decode("utf-8", errors="replace")
828-
print(f" {test_bytes.hex()}: {binary} ({desc}) -> {repr(result)}")
841+
safe_print(f" {test_bytes.hex()}: {binary} ({desc}) -> {repr(result)}")
829842
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
830843

831844
print(" ✓ All invalid continuation bytes correctly rejected\n")
@@ -843,7 +856,7 @@ def test_utf8_2byte_sequence_complete_coverage():
843856
for test_bytes, expected_char, codepoint, desc in valid_2byte:
844857
# Test decoding
845858
result = test_bytes.decode("utf-8")
846-
print(f" {test_bytes.hex()}: U+{codepoint:04X} -> {repr(result)} ({desc})")
859+
safe_print(f" {test_bytes.hex()}: U+{codepoint:04X} -> {repr(result)} ({desc})")
847860
assert result == expected_char, f"Should decode to {expected_char!r}"
848861
assert "\ufffd" not in result, f"Should NOT contain U+FFFD for valid sequence"
849862

@@ -867,7 +880,7 @@ def test_utf8_2byte_sequence_complete_coverage():
867880

868881
for test_bytes, codepoint, desc in overlong_2byte:
869882
result = test_bytes.decode("utf-8", errors="replace")
870-
print(
883+
safe_print(
871884
f" {test_bytes.hex()}: Overlong encoding of U+{codepoint:04X} ({desc}) -> {repr(result)}"
872885
)
873886
# Should be rejected and produce U+FFFD
@@ -943,8 +956,21 @@ def test_utf8_3byte_sequence_complete_coverage():
943956
4. Lines 504-505: Overlong encoding rejection
944957
"""
945958
import mssql_python
959+
import sys
960+
961+
# Helper to safely print on Windows console
962+
def safe_print(msg):
963+
try:
964+
print(msg)
965+
except UnicodeEncodeError:
966+
# Fallback for Windows console encoding issues
967+
print(
968+
msg.encode(sys.stdout.encoding or "ascii", errors="backslashreplace").decode(
969+
sys.stdout.encoding or "ascii"
970+
)
971+
)
946972

947-
print("\n=== Testing 3-byte UTF-8 Sequence Handler (lines 490-506) ===\n")
973+
safe_print("\n=== Testing 3-byte UTF-8 Sequence Handler (lines 490-506) ===\n")
948974

949975
# TEST 1: Lines 492-495 - Invalid continuation bytes
950976
# Condition: (data[i + 1] & 0xC0) != 0x80 || (data[i + 2] & 0xC0) != 0x80
@@ -958,10 +984,10 @@ def test_utf8_3byte_sequence_complete_coverage():
958984
(b"\xe4\xb8\xff", "Second byte 11111111"),
959985
]
960986

961-
print(" Invalid second continuation byte:")
987+
safe_print(" Invalid second continuation byte:")
962988
for test_bytes, desc in invalid_second_byte:
963989
result = test_bytes.decode("utf-8", errors="replace")
964-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
990+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
965991
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
966992

967993
# Third byte invalid
@@ -972,10 +998,10 @@ def test_utf8_3byte_sequence_complete_coverage():
972998
(b"\xe4\xb8\xff", "Third byte 11111111"),
973999
]
9741000

975-
print(" Invalid third continuation byte:")
1001+
safe_print(" Invalid third continuation byte:")
9761002
for test_bytes, desc in invalid_third_byte:
9771003
result = test_bytes.decode("utf-8", errors="replace")
978-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1004+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
9791005
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
9801006

9811007
# Both bytes invalid
@@ -985,10 +1011,10 @@ def test_utf8_3byte_sequence_complete_coverage():
9851011
(b"\xe0\xc0\xc0", "Both continuation bytes 11xxxxxx"),
9861012
]
9871013

988-
print(" Both continuation bytes invalid:")
1014+
safe_print(" Both continuation bytes invalid:")
9891015
for test_bytes, desc in both_invalid:
9901016
result = test_bytes.decode("utf-8", errors="replace")
991-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1017+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
9921018
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
9931019

9941020
print(" ✓ All invalid continuation bytes correctly rejected\n")
@@ -1009,7 +1035,7 @@ def test_utf8_3byte_sequence_complete_coverage():
10091035
for test_bytes, expected_char, codepoint, desc in valid_3byte:
10101036
# Test decoding
10111037
result = test_bytes.decode("utf-8")
1012-
print(f" {test_bytes.hex()}: U+{codepoint:04X} -> {repr(result)} ({desc})")
1038+
safe_print(f" {test_bytes.hex()}: U+{codepoint:04X} -> {repr(result)} ({desc})")
10131039
assert result == expected_char, f"Should decode to {expected_char!r}"
10141040
assert "\ufffd" not in result, f"Should NOT contain U+FFFD for valid sequence"
10151041

@@ -1036,7 +1062,7 @@ def test_utf8_3byte_sequence_complete_coverage():
10361062

10371063
for test_bytes, codepoint, desc in surrogate_encodings:
10381064
result = test_bytes.decode("utf-8", errors="replace")
1039-
print(f" {test_bytes.hex()}: {desc} (0x{codepoint:04X}) -> {repr(result)}")
1065+
safe_print(f" {test_bytes.hex()}: {desc} (0x{codepoint:04X}) -> {repr(result)}")
10401066
# Should be rejected and produce U+FFFD
10411067
assert "\ufffd" in result, f"Surrogate U+{codepoint:04X} should be rejected"
10421068
# Verify the actual surrogate character is not in the output
@@ -1062,7 +1088,7 @@ def test_utf8_3byte_sequence_complete_coverage():
10621088

10631089
for test_bytes, codepoint, desc in overlong_3byte:
10641090
result = test_bytes.decode("utf-8", errors="replace")
1065-
print(
1091+
safe_print(
10661092
f" {test_bytes.hex()}: Overlong encoding of U+{codepoint:04X} ({desc}) -> {repr(result)}"
10671093
)
10681094
# Should be rejected and produce U+FFFD
@@ -1159,8 +1185,21 @@ def test_utf8_4byte_sequence_complete_coverage():
11591185
5. Lines 528-529: Invalid sequence fallback
11601186
"""
11611187
import mssql_python
1188+
import sys
1189+
1190+
# Helper to safely print on Windows console
1191+
def safe_print(msg):
1192+
try:
1193+
print(msg)
1194+
except UnicodeEncodeError:
1195+
# Fallback for Windows console encoding issues
1196+
print(
1197+
msg.encode(sys.stdout.encoding or "ascii", errors="backslashreplace").decode(
1198+
sys.stdout.encoding or "ascii"
1199+
)
1200+
)
11621201

1163-
print("\n=== Testing 4-byte UTF-8 Sequence Handler (lines 508-530) ===\n")
1202+
safe_print("\n=== Testing 4-byte UTF-8 Sequence Handler (lines 508-530) ===\n")
11641203

11651204
# TEST 1: Lines 512-514 - Invalid continuation bytes
11661205
# Condition: (data[i+1] & 0xC0) != 0x80 || (data[i+2] & 0xC0) != 0x80 || (data[i+3] & 0xC0) != 0x80
@@ -1174,10 +1213,10 @@ def test_utf8_4byte_sequence_complete_coverage():
11741213
(b"\xf0\xff\x80\x80", "Byte 1: 11111111"),
11751214
]
11761215

1177-
print(" Invalid second continuation byte (byte 1):")
1216+
safe_print(" Invalid second continuation byte (byte 1):")
11781217
for test_bytes, desc in invalid_byte1:
11791218
result = test_bytes.decode("utf-8", errors="replace")
1180-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1219+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
11811220
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
11821221

11831222
# Third byte invalid (byte 2)
@@ -1188,10 +1227,10 @@ def test_utf8_4byte_sequence_complete_coverage():
11881227
(b"\xf0\x90\xff\x80", "Byte 2: 11111111"),
11891228
]
11901229

1191-
print(" Invalid third continuation byte (byte 2):")
1230+
safe_print(" Invalid third continuation byte (byte 2):")
11921231
for test_bytes, desc in invalid_byte2:
11931232
result = test_bytes.decode("utf-8", errors="replace")
1194-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1233+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
11951234
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
11961235

11971236
# Fourth byte invalid (byte 3)
@@ -1202,10 +1241,10 @@ def test_utf8_4byte_sequence_complete_coverage():
12021241
(b"\xf0\x90\x80\xff", "Byte 3: 11111111"),
12031242
]
12041243

1205-
print(" Invalid fourth continuation byte (byte 3):")
1244+
safe_print(" Invalid fourth continuation byte (byte 3):")
12061245
for test_bytes, desc in invalid_byte3:
12071246
result = test_bytes.decode("utf-8", errors="replace")
1208-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1247+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
12091248
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
12101249

12111250
# Multiple bytes invalid
@@ -1216,10 +1255,10 @@ def test_utf8_4byte_sequence_complete_coverage():
12161255
(b"\xf0\x00\x00\x00", "All continuation bytes invalid"),
12171256
]
12181257

1219-
print(" Multiple continuation bytes invalid:")
1258+
safe_print(" Multiple continuation bytes invalid:")
12201259
for test_bytes, desc in multiple_invalid:
12211260
result = test_bytes.decode("utf-8", errors="replace")
1222-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1261+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
12231262
assert "\ufffd" in result, f"Should produce U+FFFD for {desc}"
12241263

12251264
print(" ✓ All invalid continuation bytes correctly rejected\n")
@@ -1240,7 +1279,7 @@ def test_utf8_4byte_sequence_complete_coverage():
12401279
for test_bytes, expected_char, codepoint, desc in valid_4byte:
12411280
# Test decoding
12421281
result = test_bytes.decode("utf-8")
1243-
print(f" {test_bytes.hex()}: U+{codepoint:06X} -> {repr(result)} ({desc})")
1282+
safe_print(f" {test_bytes.hex()}: U+{codepoint:06X} -> {repr(result)} ({desc})")
12441283
assert result == expected_char, f"Should decode to {expected_char!r}"
12451284
assert "\ufffd" not in result, f"Should NOT contain U+FFFD for valid sequence"
12461285

@@ -1265,7 +1304,7 @@ def test_utf8_4byte_sequence_complete_coverage():
12651304

12661305
for test_bytes, codepoint, desc in overlong_4byte:
12671306
result = test_bytes.decode("utf-8", errors="replace")
1268-
print(
1307+
safe_print(
12691308
f" {test_bytes.hex()}: Overlong encoding of U+{codepoint:04X} ({desc}) -> {repr(result)}"
12701309
)
12711310
# Should be rejected and produce U+FFFD
@@ -1292,7 +1331,7 @@ def test_utf8_4byte_sequence_complete_coverage():
12921331

12931332
for test_bytes, codepoint, desc in out_of_range:
12941333
result = test_bytes.decode("utf-8", errors="replace")
1295-
print(f" {test_bytes.hex()}: {desc} (0x{codepoint:06X}) -> {repr(result)}")
1334+
safe_print(f" {test_bytes.hex()}: {desc} (0x{codepoint:06X}) -> {repr(result)}")
12961335
# Should be rejected and produce U+FFFD
12971336
assert (
12981337
"\ufffd" in result
@@ -1313,7 +1352,7 @@ def test_utf8_4byte_sequence_complete_coverage():
13131352

13141353
for test_bytes, desc in invalid_sequences:
13151354
result = test_bytes.decode("utf-8", errors="replace")
1316-
print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
1355+
safe_print(f" {test_bytes.hex()}: {desc} -> {repr(result)}")
13171356
assert "\ufffd" in result, f"Invalid sequence should produce U+FFFD"
13181357

13191358
print(" ✓ Invalid sequences correctly handled\n")

0 commit comments

Comments
 (0)