@@ -826,11 +826,15 @@ def test_utf8_2byte_sequence_complete_coverage():
826826 for test_bytes , binary , desc in invalid_continuation :
827827 try :
828828 result = test_bytes .decode ("utf-8" , errors = "replace" )
829- print (f" { test_bytes .hex ()} : { binary } ({ desc } ) -> { repr (result )} " )
829+ try :
830+ print (f" { test_bytes .hex ()} : { binary } ({ desc } ) -> { repr (result )} " )
831+ except UnicodeEncodeError :
832+ print (f" { test_bytes .hex ()} : { binary } ({ desc } ) -> <decoded>" )
830833 # Check that invalid sequences are handled (may produce replacement chars or split)
831834 assert len (result ) > 0 , f"Should produce some output for { desc } "
832835 except Exception as e :
833- print (f" { test_bytes .hex ()} : { binary } ({ desc } ) -> Exception: { e } " )
836+ # Print without the exception message to avoid encoding errors
837+ print (f" { test_bytes .hex ()} : { binary } ({ desc } ) -> Exception occurred" )
834838 # Any error handling is acceptable for invalid sequences
835839
836840 print (" ✓ All invalid continuation bytes handled\n " )
@@ -873,14 +877,17 @@ def test_utf8_2byte_sequence_complete_coverage():
873877 for test_bytes , codepoint , desc in overlong_2byte :
874878 try :
875879 result = test_bytes .decode ("utf-8" , errors = "replace" )
876- print (
877- f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> { repr (result )} "
878- )
880+ try :
881+ print (
882+ f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> { repr (result )} "
883+ )
884+ except UnicodeEncodeError :
885+ print (f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> <decoded>" )
879886 # Check that overlong sequences are handled (behavior may vary by platform)
880887 assert len (result ) > 0 , f"Should produce some output for overlong U+{ codepoint :04X} "
881888 except Exception as e :
882889 print (
883- f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> Exception: { e } "
890+ f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> Exception occurred "
884891 )
885892
886893 print (" ✓ All overlong 2-byte encodings handled\n " )
@@ -965,11 +972,14 @@ def test_utf8_3byte_sequence_complete_coverage():
965972 for test_bytes , desc in invalid_second_byte :
966973 try :
967974 result = test_bytes .decode ("utf-8" , errors = "replace" )
968- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
975+ try :
976+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
977+ except UnicodeEncodeError :
978+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
969979 # Check that invalid sequences are handled (may produce replacement chars or split)
970980 assert len (result ) > 0 , f"Should produce some output for { desc } "
971981 except Exception as e :
972- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
982+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
973983
974984 # Third byte invalid (second byte must be valid to isolate third byte error)
975985 invalid_third_byte = [
@@ -983,11 +993,14 @@ def test_utf8_3byte_sequence_complete_coverage():
983993 for test_bytes , desc in invalid_third_byte :
984994 try :
985995 result = test_bytes .decode ("utf-8" , errors = "replace" )
986- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
996+ try :
997+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
998+ except UnicodeEncodeError :
999+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
9871000 # Check that invalid sequences are handled (may produce replacement chars or split)
9881001 assert len (result ) > 0 , f"Should produce some output for { desc } "
9891002 except Exception as e :
990- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1003+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
9911004
9921005 # Both bytes invalid
9931006 both_invalid = [
@@ -1000,11 +1013,14 @@ def test_utf8_3byte_sequence_complete_coverage():
10001013 for test_bytes , desc in both_invalid :
10011014 try :
10021015 result = test_bytes .decode ("utf-8" , errors = "replace" )
1003- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1016+ try :
1017+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1018+ except UnicodeEncodeError :
1019+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
10041020 # Check that invalid sequences are handled (may produce replacement chars or split)
10051021 assert len (result ) > 0 , f"Should produce some output for { desc } "
10061022 except Exception as e :
1007- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1023+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
10081024
10091025 print (" ✓ All invalid continuation bytes handled\n " )
10101026
@@ -1052,11 +1068,14 @@ def test_utf8_3byte_sequence_complete_coverage():
10521068 for test_bytes , codepoint , desc in surrogate_encodings :
10531069 try :
10541070 result = test_bytes .decode ("utf-8" , errors = "replace" )
1055- print (f" { test_bytes .hex ()} : { desc } (0x{ codepoint :04X} ) -> { repr (result )} " )
1071+ try :
1072+ print (f" { test_bytes .hex ()} : { desc } (0x{ codepoint :04X} ) -> { repr (result )} " )
1073+ except UnicodeEncodeError :
1074+ print (f" { test_bytes .hex ()} : { desc } (0x{ codepoint :04X} ) -> <decoded>" )
10561075 # Check that surrogate sequences are handled (behavior may vary by platform)
10571076 assert len (result ) > 0 , f"Should produce some output for surrogate U+{ codepoint :04X} "
10581077 except Exception as e :
1059- print (f" { test_bytes .hex ()} : { desc } (0x{ codepoint :04X} ) -> Exception: { e } " )
1078+ print (f" { test_bytes .hex ()} : { desc } (0x{ codepoint :04X} ) -> Exception occurred " )
10601079 except ValueError :
10611080 # Python may not allow creating surrogate characters directly
10621081 pass
@@ -1077,14 +1096,17 @@ def test_utf8_3byte_sequence_complete_coverage():
10771096 for test_bytes , codepoint , desc in overlong_3byte :
10781097 try :
10791098 result = test_bytes .decode ("utf-8" , errors = "replace" )
1080- print (
1081- f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> { repr (result )} "
1082- )
1099+ try :
1100+ print (
1101+ f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> { repr (result )} "
1102+ )
1103+ except UnicodeEncodeError :
1104+ print (f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> <decoded>" )
10831105 # Check that overlong sequences are handled (behavior may vary by platform)
10841106 assert len (result ) > 0 , f"Should produce some output for overlong U+{ codepoint :04X} "
10851107 except Exception as e :
10861108 print (
1087- f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> Exception: { e } "
1109+ f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> Exception occurred "
10881110 )
10891111
10901112 print (" ✓ All overlong 3-byte encodings handled\n " )
@@ -1190,11 +1212,14 @@ def test_utf8_4byte_sequence_complete_coverage():
11901212 for test_bytes , desc in invalid_byte1 :
11911213 try :
11921214 result = test_bytes .decode ("utf-8" , errors = "replace" )
1193- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1215+ try :
1216+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1217+ except UnicodeEncodeError :
1218+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
11941219 # Check that invalid sequences are handled (may produce replacement chars or split)
11951220 assert len (result ) > 0 , f"Should produce some output for { desc } "
11961221 except Exception as e :
1197- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1222+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
11981223
11991224 # Third byte invalid (byte 2)
12001225 invalid_byte2 = [
@@ -1208,11 +1233,14 @@ def test_utf8_4byte_sequence_complete_coverage():
12081233 for test_bytes , desc in invalid_byte2 :
12091234 try :
12101235 result = test_bytes .decode ("utf-8" , errors = "replace" )
1211- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1236+ try :
1237+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1238+ except UnicodeEncodeError :
1239+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
12121240 # Check that invalid sequences are handled (may produce replacement chars or split)
12131241 assert len (result ) > 0 , f"Should produce some output for { desc } "
12141242 except Exception as e :
1215- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1243+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
12161244
12171245 # Fourth byte invalid (byte 3)
12181246 invalid_byte3 = [
@@ -1226,11 +1254,14 @@ def test_utf8_4byte_sequence_complete_coverage():
12261254 for test_bytes , desc in invalid_byte3 :
12271255 try :
12281256 result = test_bytes .decode ("utf-8" , errors = "replace" )
1229- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1257+ try :
1258+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1259+ except UnicodeEncodeError :
1260+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
12301261 # Check that invalid sequences are handled (may produce replacement chars or split)
12311262 assert len (result ) > 0 , f"Should produce some output for { desc } "
12321263 except Exception as e :
1233- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1264+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
12341265
12351266 # Multiple bytes invalid
12361267 multiple_invalid = [
@@ -1244,11 +1275,14 @@ def test_utf8_4byte_sequence_complete_coverage():
12441275 for test_bytes , desc in multiple_invalid :
12451276 try :
12461277 result = test_bytes .decode ("utf-8" , errors = "replace" )
1247- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1278+ try :
1279+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1280+ except UnicodeEncodeError :
1281+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
12481282 # Check that invalid sequences are handled (may produce replacement chars or split)
12491283 assert len (result ) > 0 , f"Should produce some output for { desc } "
12501284 except Exception as e :
1251- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1285+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
12521286
12531287 print (" ✓ All invalid continuation bytes handled\n " )
12541288
@@ -1294,14 +1328,17 @@ def test_utf8_4byte_sequence_complete_coverage():
12941328 for test_bytes , codepoint , desc in overlong_4byte :
12951329 try :
12961330 result = test_bytes .decode ("utf-8" , errors = "replace" )
1297- print (
1298- f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> { repr (result )} "
1299- )
1331+ try :
1332+ print (
1333+ f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> { repr (result )} "
1334+ )
1335+ except UnicodeEncodeError :
1336+ print (f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> <decoded>" )
13001337 # Check that overlong sequences are handled (behavior may vary by platform)
13011338 assert len (result ) > 0 , f"Should produce some output for overlong U+{ codepoint :04X} "
13021339 except Exception as e :
13031340 print (
1304- f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> Exception: { e } "
1341+ f" { test_bytes .hex ()} : Overlong encoding of U+{ codepoint :04X} ({ desc } ) -> Exception occurred "
13051342 )
13061343
13071344 print (" ✓ All overlong 4-byte encodings handled\n " )
@@ -1338,11 +1375,14 @@ def test_utf8_4byte_sequence_complete_coverage():
13381375 for test_bytes , desc in invalid_sequences :
13391376 try :
13401377 result = test_bytes .decode ("utf-8" , errors = "replace" )
1341- print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1378+ try :
1379+ print (f" { test_bytes .hex ()} : { desc } -> { repr (result )} " )
1380+ except UnicodeEncodeError :
1381+ print (f" { test_bytes .hex ()} : { desc } -> <decoded>" )
13421382 # Check that invalid sequences are handled
13431383 assert len (result ) > 0 , f"Should produce some output for invalid sequence"
13441384 except Exception as e :
1345- print (f" { test_bytes .hex ()} : { desc } -> Exception: { e } " )
1385+ print (f" { test_bytes .hex ()} : { desc } -> Exception occurred " )
13461386
13471387 print (" ✓ Invalid sequences handled\n " )
13481388
0 commit comments