From 4e10fa993a8bbc4d6e77910cb5fd1bc28473e8ec Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 24 Jan 2026 18:35:32 +0900 Subject: [PATCH 1/7] gh-144007: Eliminate redundant refcounting in the JIT for BINARY_OP (GH-144011) --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 ++--- Lib/test/test_capi/test_opt.py | 23 ++++++++++++++++ ...-01-19-01-56-44.gh-issue-144007.1xjdBf.rst | 1 + Modules/_testinternalcapi/test_cases.c.h | 27 +++++++++++++------ Python/bytecodes.c | 9 ++++--- Python/executor_cases.c.h | 26 +++++++----------- Python/generated_cases.c.h | 27 +++++++++++++------ Python/optimizer_bytecodes.c | 4 ++- Python/optimizer_cases.c.h | 26 +++++++++++++----- 11 files changed, 105 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 80c11b753be7e6..95dee5d352670d 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1343,7 +1343,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; #ifdef NEED_OPCODE_METADATA const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { - [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 } } }, + [BINARY_OP] = { .nuops = 3, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index ccdcc27f903cb9..8aa42933ec83fa 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -379,7 +379,7 @@ extern "C" { #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE #define MAX_UOP_ID 578 -#define _BINARY_OP_r21 579 +#define _BINARY_OP_r23 579 #define _BINARY_OP_ADD_FLOAT_r03 580 #define _BINARY_OP_ADD_FLOAT_r13 581 #define _BINARY_OP_ADD_FLOAT_r23 582 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index d51ed9c697ab8c..f9f22ba048194f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -2931,7 +2931,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 2, _BINARY_OP_r21 }, + { 3, 2, _BINARY_OP_r23 }, { -1, -1, -1 }, }, }, @@ -4031,7 +4031,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_COPY_3_r23] = _COPY_3, [_COPY_3_r33] = _COPY_3, [_COPY_r01] = _COPY, - [_BINARY_OP_r21] = _BINARY_OP, + [_BINARY_OP_r23] = _BINARY_OP, [_SWAP_2_r02] = _SWAP_2, [_SWAP_2_r12] = _SWAP_2, [_SWAP_2_r22] = _SWAP_2, @@ -4225,7 +4225,7 @@ const uint16_t _PyUop_SpillsAndReloads[4][4] = { const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP] = "_BINARY_OP", - [_BINARY_OP_r21] = "_BINARY_OP_r21", + [_BINARY_OP_r23] = "_BINARY_OP_r23", [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", [_BINARY_OP_ADD_FLOAT_r03] = "_BINARY_OP_ADD_FLOAT_r03", [_BINARY_OP_ADD_FLOAT_r13] = "_BINARY_OP_ADD_FLOAT_r13", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 5a0a379e31a962..da0d4078c9f5ed 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2897,6 +2897,29 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_refcount_elimination(self): + class CustomAdder: + def __init__(self, val): + self.val = val + def __add__(self, other): + return CustomAdder(self.val + other.val) + + def testfunc(n): + a = CustomAdder(1) + b = CustomAdder(2) + res = None + for _ in range(n): + res = a + b + return res.val if res else 0 + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 3) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP", uops) + self.assertIn("_POP_TOP_NOP", uops) + self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_extend_float_long_add_refcount_elimination(self): def testfunc(n): a = 1.5 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst new file mode 100644 index 00000000000000..26db86fae6bf25 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting in the JIT for ``BINARY_OP``. diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index a7d589dbe7b274..3c9eb9194366e8 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -32,6 +32,9 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -65,18 +68,26 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = lhs; + r = rhs; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 36df991e84dc2c..bec03119993d79 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5119,7 +5119,7 @@ dummy_func( assert(oparg <= NB_OPARG_LAST); } - op(_BINARY_OP, (lhs, rhs -- res)) { + op(_BINARY_OP, (lhs, rhs -- res, l, r)) { PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -5129,10 +5129,13 @@ dummy_func( ERROR_NO_POP(); } res = PyStackRef_FromPyObjectSteal(res_o); - DECREF_INPUTS(); + l = lhs; + r = rhs; + DEAD(lhs); + DEAD(rhs); } - macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; + macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP + POP_TOP + POP_TOP; pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9098bd219ed506..f51466abea9266 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -16594,12 +16594,14 @@ break; } - case _BINARY_OP_r21: { + case _BINARY_OP_r23: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef rhs; _PyStackRef lhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; oparg = CURRENT_OPARG(); @@ -16620,23 +16622,13 @@ JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + l = lhs; + r = rhs; + _tos_cache2 = r; + _tos_cache1 = l; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); - stack_pointer += -1; + SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9df6b2f70f96df..ae9e1e5421b87b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -32,6 +32,9 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -65,18 +68,26 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = lhs; + r = rhs; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index de7a2313f9220a..27b974f372df99 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -211,7 +211,9 @@ dummy_func(void) { sym_set_type(left, &PyFloat_Type); } - op(_BINARY_OP, (lhs, rhs -- res)) { + op(_BINARY_OP, (lhs, rhs -- res, l, r)) { + l = lhs; + r = rhs; REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs, res); bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5fca69462669b5..3b25533e07f743 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3616,8 +3616,12 @@ JitOptRef rhs; JitOptRef lhs; JitOptRef res; + JitOptRef l; + JitOptRef r; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; + l = lhs; + r = rhs; if ( sym_is_safe_const(ctx, lhs) && sym_is_safe_const(ctx, rhs) @@ -3627,6 +3631,8 @@ _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); _PyStackRef res_stackref; + _PyStackRef l_stackref; + _PyStackRef r_stackref; /* Start of uop copied from bytecodes for constant evaluation */ PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -3636,18 +3642,24 @@ JUMP_TO_LABEL(error); } res_stackref = PyStackRef_FromPyObjectSteal(res_o); + l_stackref = lhs; + r_stackref = rhs; /* End of uop copied from bytecodes for constant evaluation */ + (void)l_stackref; + (void)r_stackref; res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { - // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result - ADD_OP(_POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + // Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); } } - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } @@ -3684,9 +3696,11 @@ else { res = sym_new_type(ctx, &PyFloat_Type); } - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From ca99bfdefb7093d9287353b7d1db97222a414b0e Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 24 Jan 2026 17:36:40 +0800 Subject: [PATCH 2/7] gh-144016: Fix bad stack assert in the JIT optimizer (GH-144019) --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c6a1ae60a317fa..6c381ab184fd85 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -507,7 +507,7 @@ optimize_uops( *(ctx->out_buffer.next++) = *this_instr; } assert(ctx->frame != NULL); - if (!CURRENT_FRAME_IS_INIT_SHIM()) { + if (!CURRENT_FRAME_IS_INIT_SHIM() && !ctx->done) { DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); From 6f579147e35f85d74c262719c6cac7277f96b124 Mon Sep 17 00:00:00 2001 From: Yongtao Huang Date: Sat, 24 Jan 2026 17:37:45 +0800 Subject: [PATCH 3/7] Misc: remove duplicate `instr_frame` assignment in `_PyJit_TryInitializeTracing` (GH-144155) Remove duplicate instr_frame assignment in optimizer --- Python/optimizer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index f25242972efeb1..b8208c7d888e38 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1050,7 +1050,6 @@ _PyJit_TryInitializeTracing( tracer->initial_state.exit = exit; tracer->initial_state.stack_depth = curr_stackdepth; tracer->initial_state.chain_depth = chain_depth; - tracer->prev_state.instr_frame = frame; tracer->prev_state.dependencies_still_valid = true; tracer->prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame)); tracer->prev_state.instr = curr_instr; From 29840247ff6da36808cd04115421ff59cdaea455 Mon Sep 17 00:00:00 2001 From: Hai Zhu Date: Sat, 24 Jan 2026 17:43:01 +0800 Subject: [PATCH 4/7] gh-144068: fix JIT tracer memory leak when daemon thread exits (GH-144077) --- Lib/test/test_capi/test_opt.py | 23 +++++++++++++++++++ ...-01-21-02-30-06.gh-issue-144068.9TTu7v.rst | 1 + Python/pystate.c | 4 ++++ 3 files changed, 28 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-21-02-30-06.gh-issue-144068.9TTu7v.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index da0d4078c9f5ed..f224984777500a 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3889,6 +3889,29 @@ def __next__(self): """), PYTHON_JIT="1", PYTHON_JIT_STRESS="1") self.assertEqual(result[0].rc, 0, result) + def test_144068_daemon_thread_jit_cleanup(self): + result = script_helper.run_python_until_end('-c', textwrap.dedent(""" + import threading + import time + + def hot_loop(): + end = time.time() + 5.0 + while time.time() < end: + pass + + # Create a daemon thread that will be abandoned at shutdown + t = threading.Thread(target=hot_loop, daemon=True) + t.start() + + time.sleep(0.1) + """), PYTHON_JIT="1", ASAN_OPTIONS="detect_leaks=1") + self.assertEqual(result[0].rc, 0, result) + stderr = result[0].err.decode('utf-8', errors='replace') + self.assertNotIn('LeakSanitizer', stderr, + f"Memory leak detected by ASan:\n{stderr}") + self.assertNotIn('_PyJit_TryInitializeTracing', stderr, + f"JIT tracer memory leak detected:\n{stderr}") + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-21-02-30-06.gh-issue-144068.9TTu7v.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-21-02-30-06.gh-issue-144068.9TTu7v.rst new file mode 100644 index 00000000000000..b3e5db64a368b3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-21-02-30-06.gh-issue-144068.9TTu7v.rst @@ -0,0 +1 @@ +Fix JIT tracer memory leak, ensure the JIT tracer state is freed when daemon threads are cleaned up during interpreter shutdown. diff --git a/Python/pystate.c b/Python/pystate.c index 19f1245d60a2f8..a8f37bedc81247 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1843,6 +1843,10 @@ PyThreadState_Clear(PyThreadState *tstate) _PyThreadState_ClearMimallocHeaps(tstate); +#ifdef _Py_TIER2 + _PyJit_TracerFree((_PyThreadStateImpl *)tstate); +#endif + tstate->_status.cleared = 1; // XXX Call _PyThreadStateSwap(runtime, NULL) here if "current". From 6d972e0104097e476118686ff7c84ea238ecafc3 Mon Sep 17 00:00:00 2001 From: reiden <65756407+reidenong@users.noreply.github.com> Date: Sat, 24 Jan 2026 18:02:08 +0800 Subject: [PATCH 5/7] gh-130415: Narrow types to constants in branches involving specialized comparisons with a constant (GH-144150) --- Include/internal/pycore_optimizer_types.h | 2 + Lib/test/test_capi/test_opt.py | 132 ++++++++++++++++++ Python/optimizer_analysis.c | 5 + Python/optimizer_bytecodes.c | 36 ++++- Python/optimizer_cases.c.h | 33 ++++- Python/optimizer_symbols.c | 160 +++++++++++++++++++++- 6 files changed, 360 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h index a879ca26ce7b63..b4b93e8353812a 100644 --- a/Include/internal/pycore_optimizer_types.h +++ b/Include/internal/pycore_optimizer_types.h @@ -76,6 +76,8 @@ typedef struct { typedef enum { JIT_PRED_IS, JIT_PRED_IS_NOT, + JIT_PRED_EQ, + JIT_PRED_NE, } JitOptPredicateKind; typedef struct { diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f224984777500a..ff3adb1d4542e0 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -890,6 +890,138 @@ def testfunc(n): self.assertLessEqual(len(guard_nos_unicode_count), 1) self.assertIn("_COMPARE_OP_STR", uops) + def test_compare_int_eq_narrows_to_constant(self): + def f(n): + def return_1(): + return 1 + + hits = 0 + v = return_1() + for _ in range(n): + if v == 1: + if v == 1: + hits += 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # Constant narrowing allows constant folding for second comparison + self.assertLessEqual(count_ops(ex, "_COMPARE_OP_INT"), 1) + + def test_compare_int_ne_narrows_to_constant(self): + def f(n): + def return_1(): + return 1 + + hits = 0 + v = return_1() + for _ in range(n): + if v != 1: + hits += 1000 + else: + if v == 1: + hits += v + 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * 2) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # Constant narrowing allows constant folding for second comparison + self.assertLessEqual(count_ops(ex, "_COMPARE_OP_INT"), 1) + + def test_compare_float_eq_narrows_to_constant(self): + def f(n): + def return_tenth(): + return 0.1 + + hits = 0 + v = return_tenth() + for _ in range(n): + if v == 0.1: + if v == 0.1: + hits += 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # Constant narrowing allows constant folding for second comparison + self.assertLessEqual(count_ops(ex, "_COMPARE_OP_FLOAT"), 1) + + def test_compare_float_ne_narrows_to_constant(self): + def f(n): + def return_tenth(): + return 0.1 + + hits = 0 + v = return_tenth() + for _ in range(n): + if v != 0.1: + hits += 1000 + else: + if v == 0.1: + hits += 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # Constant narrowing allows constant folding for second comparison + self.assertLessEqual(count_ops(ex, "_COMPARE_OP_FLOAT"), 1) + + def test_compare_str_eq_narrows_to_constant(self): + def f(n): + def return_hello(): + return "hello" + + hits = 0 + v = return_hello() + for _ in range(n): + if v == "hello": + if v == "hello": + hits += 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # Constant narrowing allows constant folding for second comparison + self.assertLessEqual(count_ops(ex, "_COMPARE_OP_STR"), 1) + + def test_compare_str_ne_narrows_to_constant(self): + def f(n): + def return_hello(): + return "hello" + + hits = 0 + v = return_hello() + for _ in range(n): + if v != "hello": + hits += 1000 + else: + if v == "hello": + hits += 1 + return hits + + res, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + # Constant narrowing allows constant folding for second comparison + self.assertLessEqual(count_ops(ex, "_COMPARE_OP_STR"), 1) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_sequential(self): def dummy12(x): diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6c381ab184fd85..65c9239f1ff427 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -250,6 +250,11 @@ add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr, #define sym_new_predicate _Py_uop_sym_new_predicate #define sym_apply_predicate_narrowing _Py_uop_sym_apply_predicate_narrowing +/* Comparison oparg masks */ +#define COMPARE_LT_MASK 2 +#define COMPARE_GT_MASK 4 +#define COMPARE_EQ_MASK 8 + #define JUMP_TO_LABEL(label) goto label; static int diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 27b974f372df99..38cd088d9fb030 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -521,21 +521,51 @@ dummy_func(void) { } op(_COMPARE_OP_INT, (left, right -- res, l, r)) { - res = sym_new_type(ctx, &PyBool_Type); + int cmp_mask = oparg & (COMPARE_LT_MASK | COMPARE_GT_MASK | COMPARE_EQ_MASK); + + if (cmp_mask == COMPARE_EQ_MASK) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_EQ); + } + else if (cmp_mask == (COMPARE_LT_MASK | COMPARE_GT_MASK)) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_NE); + } + else { + res = sym_new_type(ctx, &PyBool_Type); + } l = left; r = right; REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, res); } op(_COMPARE_OP_FLOAT, (left, right -- res, l, r)) { - res = sym_new_type(ctx, &PyBool_Type); + int cmp_mask = oparg & (COMPARE_LT_MASK | COMPARE_GT_MASK | COMPARE_EQ_MASK); + + if (cmp_mask == COMPARE_EQ_MASK) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_EQ); + } + else if (cmp_mask == (COMPARE_LT_MASK | COMPARE_GT_MASK)) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_NE); + } + else { + res = sym_new_type(ctx, &PyBool_Type); + } l = left; r = right; REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, res); } op(_COMPARE_OP_STR, (left, right -- res, l, r)) { - res = sym_new_type(ctx, &PyBool_Type); + int cmp_mask = oparg & (COMPARE_LT_MASK | COMPARE_GT_MASK | COMPARE_EQ_MASK); + + if (cmp_mask == COMPARE_EQ_MASK) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_EQ); + } + else if (cmp_mask == (COMPARE_LT_MASK | COMPARE_GT_MASK)) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_NE); + } + else { + res = sym_new_type(ctx, &PyBool_Type); + } l = left; r = right; REPLACE_OPCODE_IF_EVALUATES_PURE(left, right, res); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3b25533e07f743..e9405473fe2e0d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2118,7 +2118,16 @@ JitOptRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; - res = sym_new_type(ctx, &PyBool_Type); + int cmp_mask = oparg & (COMPARE_LT_MASK | COMPARE_GT_MASK | COMPARE_EQ_MASK); + if (cmp_mask == COMPARE_EQ_MASK) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_EQ); + } + else if (cmp_mask == (COMPARE_LT_MASK | COMPARE_GT_MASK)) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_NE); + } + else { + res = sym_new_type(ctx, &PyBool_Type); + } l = left; r = right; if ( @@ -2178,7 +2187,16 @@ JitOptRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; - res = sym_new_type(ctx, &PyBool_Type); + int cmp_mask = oparg & (COMPARE_LT_MASK | COMPARE_GT_MASK | COMPARE_EQ_MASK); + if (cmp_mask == COMPARE_EQ_MASK) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_EQ); + } + else if (cmp_mask == (COMPARE_LT_MASK | COMPARE_GT_MASK)) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_NE); + } + else { + res = sym_new_type(ctx, &PyBool_Type); + } l = left; r = right; if ( @@ -2242,7 +2260,16 @@ JitOptRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; - res = sym_new_type(ctx, &PyBool_Type); + int cmp_mask = oparg & (COMPARE_LT_MASK | COMPARE_GT_MASK | COMPARE_EQ_MASK); + if (cmp_mask == COMPARE_EQ_MASK) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_EQ); + } + else if (cmp_mask == (COMPARE_LT_MASK | COMPARE_GT_MASK)) { + res = sym_new_predicate(ctx, left, right, JIT_PRED_NE); + } + else { + res = sym_new_type(ctx, &PyBool_Type); + } l = left; r = right; if ( diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index a9640aaa5072c5..51cf6e189f0f49 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -875,9 +875,11 @@ _Py_uop_sym_apply_predicate_narrowing(JitOptContext *ctx, JitOptRef ref, bool br bool narrow = false; switch(pred.kind) { + case JIT_PRED_EQ: case JIT_PRED_IS: narrow = branch_is_true; break; + case JIT_PRED_NE: case JIT_PRED_IS_NOT: narrow = !branch_is_true; break; @@ -1300,11 +1302,11 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (None)"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == Py_None, "predicate narrowing did not narrow subject to None"); - // Test narrowing subject to numerical constant + // Test narrowing subject to numerical constant from is comparison subject = _Py_uop_sym_new_unknown(ctx); PyObject *one_obj = PyLong_FromLong(1); JitOptRef const_one = _Py_uop_sym_new_const(ctx, one_obj); - if (PyJitRef_IsNull(subject) || PyJitRef_IsNull(const_one)) { + if (PyJitRef_IsNull(subject) || one_obj == NULL || PyJitRef_IsNull(const_one)) { goto fail; } ref = _Py_uop_sym_new_predicate(ctx, subject, const_one, JIT_PRED_IS); @@ -1315,6 +1317,160 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (1)"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == one_obj, "predicate narrowing did not narrow subject to 1"); + // Test narrowing subject to constant from EQ predicate for int + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_one, JIT_PRED_EQ); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (1)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == one_obj, "predicate narrowing did not narrow subject to 1"); + + // Resolving EQ predicate to False should not narrow subject for int + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_one, JIT_PRED_EQ); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + + // Test narrowing subject to constant from NE predicate for int + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_one, JIT_PRED_NE); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (1)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == one_obj, "predicate narrowing did not narrow subject to 1"); + + // Resolving NE predicate to true should not narrow subject for int + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_one, JIT_PRED_NE); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + + // Test narrowing subject to constant from EQ predicate for float + subject = _Py_uop_sym_new_unknown(ctx); + PyObject *float_tenth_obj = PyFloat_FromDouble(0.1); + JitOptRef const_float_tenth = _Py_uop_sym_new_const(ctx, float_tenth_obj); + if (PyJitRef_IsNull(subject) || float_tenth_obj == NULL || PyJitRef_IsNull(const_float_tenth)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_float_tenth, JIT_PRED_EQ); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (float)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == float_tenth_obj, "predicate narrowing did not narrow subject to 0.1"); + + // Resolving EQ predicate to False should not narrow subject for float + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_float_tenth, JIT_PRED_EQ); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + + // Test narrowing subject to constant from NE predicate for float + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_float_tenth, JIT_PRED_NE); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (float)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == float_tenth_obj, "predicate narrowing did not narrow subject to 0.1"); + + // Resolving NE predicate to true should not narrow subject for float + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_float_tenth, JIT_PRED_NE); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + + // Test narrowing subject to constant from EQ predicate for str + subject = _Py_uop_sym_new_unknown(ctx); + PyObject *str_hello_obj = PyUnicode_FromString("hello"); + JitOptRef const_str_hello = _Py_uop_sym_new_const(ctx, str_hello_obj); + if (PyJitRef_IsNull(subject) || str_hello_obj == NULL || PyJitRef_IsNull(const_str_hello)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_str_hello, JIT_PRED_EQ); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (str)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == str_hello_obj, "predicate narrowing did not narrow subject to hello"); + + // Resolving EQ predicate to False should not narrow subject for str + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_str_hello, JIT_PRED_EQ); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + + // Test narrowing subject to constant from NE predicate for str + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_str_hello, JIT_PRED_NE); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, false); + TEST_PREDICATE(_Py_uop_sym_is_const(ctx, subject), "predicate narrowing did not const-narrow subject (str)"); + TEST_PREDICATE(_Py_uop_sym_get_const(ctx, subject) == str_hello_obj, "predicate narrowing did not narrow subject to hello"); + + // Resolving NE predicate to true should not narrow subject for str + subject = _Py_uop_sym_new_unknown(ctx); + if (PyJitRef_IsNull(subject)) { + goto fail; + } + ref = _Py_uop_sym_new_predicate(ctx, subject, const_str_hello, JIT_PRED_NE); + if (PyJitRef_IsNull(ref)) { + goto fail; + } + _Py_uop_sym_apply_predicate_narrowing(ctx, ref, true); + TEST_PREDICATE(!_Py_uop_sym_is_const(ctx, subject), "predicate narrowing incorrectly narrowed subject (inverted/true)"); + val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(66)); if (val_big == NULL) { goto fail; From 5f736a0432c2e43de8f35d9a75aa814e4407e637 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Sat, 24 Jan 2026 11:19:21 +0100 Subject: [PATCH 6/7] gh-142913: Add generated test files to gitattributes (GH-144209) --- .gitattributes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitattributes b/.gitattributes index e88d6ea13e2a5e..14c1eced0cfed4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -94,6 +94,8 @@ Lib/test/test_stable_abi_ctypes.py generated Lib/test/test_zoneinfo/data/*.json generated Lib/token.py generated Misc/sbom.spdx.json generated +Modules/_testinternalcapi/test_cases.c.h generated +Modules/_testinternalcapi/test_targets.h generated Objects/typeslots.inc generated PC/python3dll.c generated Parser/parser.c generated From 012c498035a9adfe9fd218907db1550ecb057f77 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 24 Jan 2026 13:13:50 +0200 Subject: [PATCH 7/7] gh-142037: Improve error messages for printf-style formatting (GH-142081) This affects string formatting as well as bytes and bytearray formatting. * For errors in the format string, always include the position of the start of the format unit. * For errors related to the formatted arguments, always include the number or the name of the formatted argument. * Suggest more probable causes of errors in the format string (stray %, unsupported format, unexpected character). * Provide more information when the number of arguments does not match the number of format units. * Raise more specific errors when access of arguments by name is mixed with sequential access and when * is used with a mapping. * Add tests for some uncovered cases. --- Lib/test/test_bytes.py | 22 +- Lib/test/test_format.py | 239 ++++++++++++++--- Lib/test/test_peepholer.py | 19 +- Lib/test/test_str.py | 45 +++- ...-11-29-10-06-06.gh-issue-142037.OpIGzK.rst | 7 + Objects/bytesobject.c | 221 +++++++++++----- Objects/unicode_format.c | 245 +++++++++++++----- 7 files changed, 591 insertions(+), 207 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-11-29-10-06-06.gh-issue-142037.OpIGzK.rst diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index c42c0d4f5e9bc2..742bad21a3346b 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -792,16 +792,16 @@ def __int__(self): pi = PseudoFloat(3.1415) exceptions_params = [ - ('%x format: an integer is required, not float', b'%x', 3.14), - ('%X format: an integer is required, not float', b'%X', 2.11), - ('%o format: an integer is required, not float', b'%o', 1.79), - ('%x format: an integer is required, not PseudoFloat', b'%x', pi), - ('%x format: an integer is required, not complex', b'%x', 3j), - ('%X format: an integer is required, not complex', b'%X', 2j), - ('%o format: an integer is required, not complex', b'%o', 1j), - ('%u format: a real number is required, not complex', b'%u', 3j), - ('%i format: a real number is required, not complex', b'%i', 2j), - ('%d format: a real number is required, not complex', b'%d', 2j), + ('%x requires an integer, not float', b'%x', 3.14), + ('%X requires an integer, not float', b'%X', 2.11), + ('%o requires an integer, not float', b'%o', 1.79), + (r'%x requires an integer, not .*\.PseudoFloat', b'%x', pi), + ('%x requires an integer, not complex', b'%x', 3j), + ('%X requires an integer, not complex', b'%X', 2j), + ('%o requires an integer, not complex', b'%o', 1j), + ('%u requires a real number, not complex', b'%u', 3j), + ('%i requires a real number, not complex', b'%i', 2j), + ('%d requires a real number, not complex', b'%d', 2j), ( r'%c requires an integer in range\(256\)' r' or a single byte, not .*\.PseudoFloat', @@ -810,7 +810,7 @@ def __int__(self): ] for msg, format_bytes, value in exceptions_params: - with self.assertRaisesRegex(TypeError, msg): + with self.assertRaisesRegex(TypeError, 'format argument: ' + msg): operator.mod(format_bytes, value) def test_memory_leak_gh_140939(self): diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 1f626d87fa6c7a..00f1ab44b0a8fa 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -57,10 +57,6 @@ def testcommon(formatstr, args, output=None, limit=None, overflowok=False): else: b_format = formatstr ba_format = bytearray(b_format) - b_args = [] - if not isinstance(args, tuple): - args = (args, ) - b_args = tuple(args) if output is None: b_output = ba_output = None else: @@ -69,8 +65,8 @@ def testcommon(formatstr, args, output=None, limit=None, overflowok=False): else: b_output = output ba_output = bytearray(b_output) - testformat(b_format, b_args, b_output, limit, overflowok) - testformat(ba_format, b_args, ba_output, limit, overflowok) + testformat(b_format, args, b_output, limit, overflowok) + testformat(ba_format, args, ba_output, limit, overflowok) def test_exc(formatstr, args, exception, excmsg): try: @@ -82,6 +78,7 @@ def test_exc(formatstr, args, exception, excmsg): else: if verbose: print('no') print('Unexpected ', exception, ':', repr(str(exc))) + raise except: if verbose: print('no') print('Unexpected exception') @@ -92,6 +89,8 @@ def test_exc(formatstr, args, exception, excmsg): def test_exc_common(formatstr, args, exception, excmsg): # test str and bytes test_exc(formatstr, args, exception, excmsg) + if isinstance(args, dict): + args = {k.encode('ascii'): v for k, v in args.items()} test_exc(formatstr.encode('ascii'), args, exception, excmsg) class FormatTest(unittest.TestCase): @@ -272,45 +271,154 @@ def test_common_format(self): if verbose: print('Testing exceptions') - test_exc_common('%', (), ValueError, "incomplete format") - test_exc_common('% %s', 1, ValueError, - "unsupported format character '%' (0x25) at index 2") + test_exc_common('abc %', (), ValueError, "stray % at position 4") + test_exc_common('abc % %s', 1, ValueError, + "stray % at position 4 or unexpected format character '%' at position 6") + test_exc_common('abc %z', 1, ValueError, + "unsupported format %z at position 4") + test_exc_common("abc %Id", 1, ValueError, + "unsupported format %I at position 4") + test_exc_common("abc %'d", 1, ValueError, + "stray % at position 4 or unexpected format character \"'\" at position 5") + test_exc_common("abc %1 d", 1, ValueError, + "stray % at position 4 or unexpected format character ' ' at position 6") + test_exc_common('abc % (x)r', {}, ValueError, + "stray % at position 4 or unexpected format character '(' at position 6") + test_exc_common('abc %((x)r', {}, ValueError, + "stray % or incomplete format key at position 4") + test_exc_common('%r %r', 1, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%r %r', (1,), TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%r', (), TypeError, + "not enough arguments for format string (got 0)") + test_exc_common('abc %' + '9'*50 + 'r', 1, ValueError, + "width too big at position 4") + test_exc_common('abc %.' + '9'*50 + 'r', 1, ValueError, + "precision too big at position 4") + test_exc_common('%r %*r', 1, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%r %*r', (1,), TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%*r', (1,), TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%*r', (), TypeError, + "not enough arguments for format string (got 0)") + test_exc_common('%r %.*r', 1, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%r %.*r', (1,), TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%.*r', (1,), TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%.*r', (), TypeError, + "not enough arguments for format string (got 0)") + test_exc_common('%(x)r', 1, TypeError, + "format requires a mapping, not int") + test_exc_common('%*r', 1, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%*r', 3.14, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%*r', (3.14, 1), TypeError, + "format argument 1: * requires int, not float") + test_exc_common('%.*r', 1, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%.*r', 3.14, TypeError, + "not enough arguments for format string (got 1)") + test_exc_common('%.*r', (3.14, 1), TypeError, + "format argument 1: * requires int, not float") + test_exc_common('%*r', (2**1000, 1), OverflowError, + "format argument 1: too big for width") + test_exc_common('%*r', (-2**1000, 1), OverflowError, + "format argument 1: too big for width") + test_exc_common('%.*r', (2**1000, 1), OverflowError, + "format argument 1: too big for precision") + test_exc_common('%.*r', (-2**1000, 1), OverflowError, + "format argument 1: too big for precision") test_exc_common('%d', '1', TypeError, - "%d format: a real number is required, not str") + "format argument: %d requires a real number, not str") test_exc_common('%d', b'1', TypeError, - "%d format: a real number is required, not bytes") + "format argument: %d requires a real number, not bytes") + test_exc_common('%d', ('1',), TypeError, + "format argument 1: %d requires a real number, not str") test_exc_common('%x', '1', TypeError, - "%x format: an integer is required, not str") + "format argument: %x requires an integer, not str") test_exc_common('%x', 3.14, TypeError, - "%x format: an integer is required, not float") + "format argument: %x requires an integer, not float") + test_exc_common('%x', ('1',), TypeError, + "format argument 1: %x requires an integer, not str") test_exc_common('%i', '1', TypeError, - "%i format: a real number is required, not str") + "format argument: %i requires a real number, not str") test_exc_common('%i', b'1', TypeError, - "%i format: a real number is required, not bytes") + "format argument: %i requires a real number, not bytes") + test_exc_common('%g', '1', TypeError, + "format argument: %g requires a real number, not str") + test_exc_common('%g', ('1',), TypeError, + "format argument 1: %g requires a real number, not str") def test_str_format(self): testformat("%r", "\u0378", "'\\u0378'") # non printable testformat("%a", "\u0378", "'\\u0378'") # non printable testformat("%r", "\u0374", "'\u0374'") # printable testformat("%a", "\u0374", "'\\u0374'") # printable + testformat('%(x)r', {'x': 1}, '1') # Test exception for unknown format characters, etc. if verbose: print('Testing exceptions') test_exc('abc %b', 1, ValueError, - "unsupported format character 'b' (0x62) at index 5") - #test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError, - # "unsupported format character '?' (0x3000) at index 5") - test_exc('%g', '1', TypeError, "must be real number, not str") + "unsupported format %b at position 4") + test_exc("abc %\nd", 1, ValueError, + "stray % at position 4 or unexpected format character U+000A at position 5") + test_exc("abc %\x1fd", 1, ValueError, + "stray % at position 4 or unexpected format character U+001F at position 5") + test_exc("abc %\x7fd", 1, ValueError, + "stray % at position 4 or unexpected format character U+007F at position 5") + test_exc("abc %\x80d", 1, ValueError, + "stray % at position 4 or unexpected format character U+0080 at position 5") + test_exc('abc %äd', 1, ValueError, + "stray % at position 4 or unexpected format character 'ä' (U+00E4) at position 5") + test_exc('abc %€d', 1, ValueError, + "stray % at position 4 or unexpected format character '€' (U+20AC) at position 5") test_exc('no format', '1', TypeError, - "not all arguments converted during string formatting") - test_exc('%c', -1, OverflowError, "%c arg not in range(0x110000)") + "not all arguments converted during string formatting (required 0, got 1)") + test_exc('%r', (1, 2), TypeError, + "not all arguments converted during string formatting (required 1, got 2)") + test_exc('%(x)r %r', {'x': 1}, ValueError, + "format requires a parenthesised mapping key at position 6") + test_exc('%(x)*r', {'x': 1}, ValueError, + "* cannot be used with a parenthesised mapping key at position 0") + test_exc('%(x).*r', {'x': 1}, ValueError, + "* cannot be used with a parenthesised mapping key at position 0") + test_exc('%(x)d', {'x': '1'}, TypeError, + "format argument 'x': %d requires a real number, not str") + test_exc('%(x)x', {'x': '1'}, TypeError, + "format argument 'x': %x requires an integer, not str") + test_exc('%(x)g', {'x': '1'}, TypeError, + "format argument 'x': %g requires a real number, not str") + test_exc('%c', -1, OverflowError, + "format argument: %c argument not in range(0x110000)") + test_exc('%c', (-1,), OverflowError, + "format argument 1: %c argument not in range(0x110000)") + test_exc('%(x)c', {'x': -1}, OverflowError, + "format argument 'x': %c argument not in range(0x110000)") test_exc('%c', sys.maxunicode+1, OverflowError, - "%c arg not in range(0x110000)") - #test_exc('%c', 2**128, OverflowError, "%c arg not in range(0x110000)") - test_exc('%c', 3.14, TypeError, "%c requires an int or a unicode character, not float") - test_exc('%c', 'ab', TypeError, "%c requires an int or a unicode character, not a string of length 2") - test_exc('%c', b'x', TypeError, "%c requires an int or a unicode character, not bytes") + "format argument: %c argument not in range(0x110000)") + test_exc('%c', 2**128, OverflowError, + "format argument: %c argument not in range(0x110000)") + test_exc('%c', 3.14, TypeError, + "format argument: %c requires an integer or a unicode character, not float") + test_exc('%c', (3.14,), TypeError, + "format argument 1: %c requires an integer or a unicode character, not float") + test_exc('%(x)c', {'x': 3.14}, TypeError, + "format argument 'x': %c requires an integer or a unicode character, not float") + test_exc('%c', 'ab', TypeError, + "format argument: %c requires an integer or a unicode character, not a string of length 2") + test_exc('%c', ('ab',), TypeError, + "format argument 1: %c requires an integer or a unicode character, not a string of length 2") + test_exc('%(x)c', {'x': 'ab'}, TypeError, + "format argument 'x': %c requires an integer or a unicode character, not a string of length 2") + test_exc('%c', b'x', TypeError, + "format argument: %c requires an integer or a unicode character, not bytes") if maxsize == 2**31-1: # crashes 2.2.1 and earlier: @@ -355,36 +463,83 @@ def __bytes__(self): testcommon(b"%r", b"ghi", b"b'ghi'") testcommon(b"%r", "jkl", b"'jkl'") testcommon(b"%r", "\u0544", b"'\\u0544'") + testcommon(b'%(x)r', {b'x': 1}, b'1') # Test exception for unknown format characters, etc. if verbose: print('Testing exceptions') - test_exc(b'%g', '1', TypeError, "float argument required, not str") - test_exc(b'%g', b'1', TypeError, "float argument required, not bytes") + test_exc(b"abc %\nd", 1, ValueError, + "stray % at position 4 or unexpected format character with code 0x0a at position 5") + test_exc(b"abc %'d", 1, ValueError, + "stray % at position 4 or unexpected format character \"'\" at position 5") + test_exc(b"abc %\x1fd", 1, ValueError, + "stray % at position 4 or unexpected format character with code 0x1f at position 5") + test_exc(b"abc %\x7fd", 1, ValueError, + "stray % at position 4 or unexpected format character with code 0x7f at position 5") + test_exc(b"abc %\x80d", 1, ValueError, + "stray % at position 4 or unexpected format character with code 0x80 at position 5") test_exc(b'no format', 7, TypeError, - "not all arguments converted during bytes formatting") + "not all arguments converted during bytes formatting (required 0, got 1)") test_exc(b'no format', b'1', TypeError, - "not all arguments converted during bytes formatting") + "not all arguments converted during bytes formatting (required 0, got 1)") test_exc(b'no format', bytearray(b'1'), TypeError, - "not all arguments converted during bytes formatting") + "not all arguments converted during bytes formatting (required 0, got 1)") + test_exc(b'%r', (1, 2), TypeError, + "not all arguments converted during bytes formatting (required 1, got 2)") + test_exc(b'%(x)r %r', {b'x': 1}, ValueError, + "format requires a parenthesised mapping key at position 6") + test_exc(b'%(x)*r', {b'x': 1}, ValueError, + "* cannot be used with a parenthesised mapping key at position 0") + test_exc(b'%(x).*r', {b'x': 1}, ValueError, + "* cannot be used with a parenthesised mapping key at position 0") + test_exc(b'%(x)d', {b'x': '1'}, TypeError, + "format argument b'x': %d requires a real number, not str") + test_exc(b'%(x)x', {b'x': '1'}, TypeError, + "format argument b'x': %x requires an integer, not str") + test_exc(b'%(x)g', {b'x': '1'}, TypeError, + "format argument b'x': %g requires a real number, not str") test_exc(b"%c", -1, OverflowError, - "%c arg not in range(256)") + "format argument: %c argument not in range(256)") + test_exc(b"%c", (-1,), OverflowError, + "format argument 1: %c argument not in range(256)") + test_exc(b"%(x)c", {b'x': -1}, OverflowError, + "format argument b'x': %c argument not in range(256)") test_exc(b"%c", 256, OverflowError, - "%c arg not in range(256)") + "format argument: %c argument not in range(256)") test_exc(b"%c", 2**128, OverflowError, - "%c arg not in range(256)") + "format argument: %c argument not in range(256)") test_exc(b"%c", b"Za", TypeError, - "%c requires an integer in range(256) or a single byte, not a bytes object of length 2") + "format argument: %c requires an integer in range(256) or a single byte, not a bytes object of length 2") + test_exc(b"%c", (b"Za",), TypeError, + "format argument 1: %c requires an integer in range(256) or a single byte, not a bytes object of length 2") + test_exc(b"%(x)c", {b'x': b"Za"}, TypeError, + "format argument b'x': %c requires an integer in range(256) or a single byte, not a bytes object of length 2") + test_exc(b"%c", bytearray(b"Za"), TypeError, + "format argument: %c requires an integer in range(256) or a single byte, not a bytearray object of length 2") + test_exc(b"%c", (bytearray(b"Za"),), TypeError, + "format argument 1: %c requires an integer in range(256) or a single byte, not a bytearray object of length 2") + test_exc(b"%(x)c", {b'x': bytearray(b"Za")}, TypeError, + "format argument b'x': %c requires an integer in range(256) or a single byte, not a bytearray object of length 2") test_exc(b"%c", "Y", TypeError, - "%c requires an integer in range(256) or a single byte, not str") + "format argument: %c requires an integer in range(256) or a single byte, not str") test_exc(b"%c", 3.14, TypeError, - "%c requires an integer in range(256) or a single byte, not float") + "format argument: %c requires an integer in range(256) or a single byte, not float") + test_exc(b"%c", (3.14,), TypeError, + "format argument 1: %c requires an integer in range(256) or a single byte, not float") + test_exc(b"%(x)c", {b'x': 3.14}, TypeError, + "format argument b'x': %c requires an integer in range(256) or a single byte, not float") test_exc(b"%b", "Xc", TypeError, - "%b requires a bytes-like object, " - "or an object that implements __bytes__, not 'str'") + "format argument: %b requires a bytes-like object, " + "or an object that implements __bytes__, not str") + test_exc(b"%b", ("Xc",), TypeError, + "format argument 1: %b requires a bytes-like object, " + "or an object that implements __bytes__, not str") + test_exc(b"%(x)b", {b'x': "Xc"}, TypeError, + "format argument b'x': %b requires a bytes-like object, " + "or an object that implements __bytes__, not str") test_exc(b"%s", "Wd", TypeError, - "%b requires a bytes-like object, " - "or an object that implements __bytes__, not 'str'") + "format argument: %b requires a bytes-like object, " + "or an object that implements __bytes__, not str") if maxsize == 2**31-1: # crashes 2.2.1 and earlier: @@ -626,7 +781,7 @@ def test_specifier_z_error(self): with self.assertRaisesRegex(ValueError, error_msg): f"{'x':zs}" # can't apply to string - error_msg = re.escape("unsupported format character 'z'") + error_msg = re.escape("unsupported format %z at position 0") with self.assertRaisesRegex(ValueError, error_msg): "%z.1f" % 0 # not allowed in old style string interpolation with self.assertRaisesRegex(ValueError, error_msg): diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 1caf6de4a14e39..88d20bbb028d6f 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -733,22 +733,27 @@ def test_format_errors(self): with self.assertRaisesRegex(TypeError, 'not all arguments converted during string formatting'): eval("'%s' % (x, y)", {'x': 1, 'y': 2}) - with self.assertRaisesRegex(ValueError, 'incomplete format'): + with self.assertRaisesRegex(ValueError, 'stray % at position 2'): eval("'%s%' % (x,)", {'x': 1234}) - with self.assertRaisesRegex(ValueError, 'incomplete format'): + with self.assertRaisesRegex(ValueError, 'stray % at position 4'): eval("'%s%%%' % (x,)", {'x': 1234}) with self.assertRaisesRegex(TypeError, 'not enough arguments for format string'): eval("'%s%z' % (x,)", {'x': 1234}) - with self.assertRaisesRegex(ValueError, 'unsupported format character'): + with self.assertRaisesRegex(ValueError, + 'unsupported format %z at position 2'): eval("'%s%z' % (x, 5)", {'x': 1234}) - with self.assertRaisesRegex(TypeError, 'a real number is required, not str'): + with self.assertRaisesRegex(TypeError, + 'format argument 1: %d requires a real number, not str'): eval("'%d' % (x,)", {'x': '1234'}) - with self.assertRaisesRegex(TypeError, 'an integer is required, not float'): + with self.assertRaisesRegex(TypeError, + 'format argument 1: %x requires an integer, not float'): eval("'%x' % (x,)", {'x': 1234.56}) - with self.assertRaisesRegex(TypeError, 'an integer is required, not str'): + with self.assertRaisesRegex(TypeError, + 'format argument 1: %x requires an integer, not str'): eval("'%x' % (x,)", {'x': '1234'}) - with self.assertRaisesRegex(TypeError, 'must be real number, not str'): + with self.assertRaisesRegex(TypeError, + 'format argument 1: %f requires a real number, not str'): eval("'%f' % (x,)", {'x': '1234'}) with self.assertRaisesRegex(TypeError, 'not enough arguments for format string'): diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 2584fbf72d3fa6..0a8dddb026f6c8 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1578,17 +1578,40 @@ def __int__(self): self.assertEqual('%X' % letter_m, '6D') self.assertEqual('%o' % letter_m, '155') self.assertEqual('%c' % letter_m, 'm') - self.assertRaisesRegex(TypeError, '%x format: an integer is required, not float', operator.mod, '%x', 3.14) - self.assertRaisesRegex(TypeError, '%X format: an integer is required, not float', operator.mod, '%X', 2.11) - self.assertRaisesRegex(TypeError, '%o format: an integer is required, not float', operator.mod, '%o', 1.79) - self.assertRaisesRegex(TypeError, '%x format: an integer is required, not PseudoFloat', operator.mod, '%x', pi) - self.assertRaisesRegex(TypeError, '%x format: an integer is required, not complex', operator.mod, '%x', 3j) - self.assertRaisesRegex(TypeError, '%X format: an integer is required, not complex', operator.mod, '%X', 2j) - self.assertRaisesRegex(TypeError, '%o format: an integer is required, not complex', operator.mod, '%o', 1j) - self.assertRaisesRegex(TypeError, '%u format: a real number is required, not complex', operator.mod, '%u', 3j) - self.assertRaisesRegex(TypeError, '%i format: a real number is required, not complex', operator.mod, '%i', 2j) - self.assertRaisesRegex(TypeError, '%d format: a real number is required, not complex', operator.mod, '%d', 1j) - self.assertRaisesRegex(TypeError, r'%c requires an int or a unicode character, not .*\.PseudoFloat', operator.mod, '%c', pi) + with self.assertRaisesRegex(TypeError, + 'format argument: %x requires an integer, not float'): + '%x' % 3.14 + with self.assertRaisesRegex(TypeError, + 'format argument: %X requires an integer, not float'): + '%X' % 2.11 + with self.assertRaisesRegex(TypeError, + 'format argument: %o requires an integer, not float'): + '%o' % 1.79 + with self.assertRaisesRegex(TypeError, + r'format argument: %x requires an integer, not .*\.PseudoFloat'): + '%x' % pi + with self.assertRaisesRegex(TypeError, + 'format argument: %x requires an integer, not complex'): + '%x' % 3j + with self.assertRaisesRegex(TypeError, + 'format argument: %X requires an integer, not complex'): + '%X' % 2j + with self.assertRaisesRegex(TypeError, + 'format argument: %o requires an integer, not complex'): + '%o' % 1j + with self.assertRaisesRegex(TypeError, + 'format argument: %u requires a real number, not complex'): + '%u' % 3j + with self.assertRaisesRegex(TypeError, + 'format argument: %i requires a real number, not complex'): + '%i' % 2j + with self.assertRaisesRegex(TypeError, + 'format argument: %d requires a real number, not complex'): + '%d' % 1j + with self.assertRaisesRegex(TypeError, + r'format argument: %c requires an integer or a unicode character, ' + r'not .*\.PseudoFloat'): + '%c' % pi class RaisingNumber: def __int__(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-29-10-06-06.gh-issue-142037.OpIGzK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-29-10-06-06.gh-issue-142037.OpIGzK.rst new file mode 100644 index 00000000000000..6a59be7726f254 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-29-10-06-06.gh-issue-142037.OpIGzK.rst @@ -0,0 +1,7 @@ +Improve error messages for printf-style formatting. +For errors in the format string, always include the position of the +start of the format unit. +For errors related to the formatted arguments, always include the number +or the name of the argument. +Raise more specific errors and include more information (type and number +of arguments, most probable causes of error). diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 56de99bde11682..b85cbfe43e11e0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -404,26 +404,44 @@ PyBytes_FromFormat(const char *format, ...) /* Helpers for formatstring */ +#define FORMAT_ERROR(EXC, FMT, ...) do { \ + if (key != NULL) { \ + PyErr_Format((EXC), "format argument %R: " FMT, \ + key, __VA_ARGS__); \ + } \ + else if (argidx >= 0) { \ + PyErr_Format((EXC), "format argument %zd: " FMT, \ + argidx, __VA_ARGS__); \ + } \ + else { \ + PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__); \ + } \ +} while (0) + Py_LOCAL_INLINE(PyObject *) -getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx, int allowone) { Py_ssize_t argidx = *p_argidx; if (argidx < arglen) { (*p_argidx)++; - if (arglen < 0) - return args; - else + if (arglen >= 0) { return PyTuple_GetItem(args, argidx); + } + else if (allowone) { + return args; + } } - PyErr_SetString(PyExc_TypeError, - "not enough arguments for format string"); + PyErr_Format(PyExc_TypeError, + "not enough arguments for format string (got %zd)", + arglen < 0 ? 1 : arglen); return NULL; } /* Returns a new reference to a PyBytes object, or NULL on failure. */ static char* -formatfloat(PyObject *v, int flags, int prec, int type, +formatfloat(PyObject *v, Py_ssize_t argidx, PyObject *key, + int flags, int prec, int type, PyObject **p_result, PyBytesWriter *writer, char *str) { char *p; @@ -434,8 +452,11 @@ formatfloat(PyObject *v, int flags, int prec, int type, x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) { - PyErr_Format(PyExc_TypeError, "float argument required, " - "not %.200s", Py_TYPE(v)->tp_name); + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + FORMAT_ERROR(PyExc_TypeError, + "%%%c requires a real number, not %T", + type, v); + } return NULL; } @@ -470,7 +491,8 @@ formatfloat(PyObject *v, int flags, int prec, int type, } static PyObject * -formatlong(PyObject *v, int flags, int prec, int type) +formatlong(PyObject *v, Py_ssize_t argidx, PyObject *key, + int flags, int prec, int type) { PyObject *result, *iobj; if (PyLong_Check(v)) @@ -490,20 +512,21 @@ formatlong(PyObject *v, int flags, int prec, int type) if (!PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; } - PyErr_Format(PyExc_TypeError, - "%%%c format: %s is required, not %.200s", type, - (type == 'o' || type == 'x' || type == 'X') ? "an integer" - : "a real number", - Py_TYPE(v)->tp_name); + FORMAT_ERROR(PyExc_TypeError, + "%%%c requires %s, not %T", + type, + (type == 'o' || type == 'x' || type == 'X') ? "an integer" + : "a real number", + v); return NULL; } static int -byte_converter(PyObject *arg, char *p) +byte_converter(PyObject *arg, Py_ssize_t argidx, PyObject *key, char *p) { if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - PyErr_Format(PyExc_TypeError, + FORMAT_ERROR(PyExc_TypeError, "%%c requires an integer in range(256) or " "a single byte, not a bytes object of length %zd", PyBytes_GET_SIZE(arg)); @@ -514,7 +537,7 @@ byte_converter(PyObject *arg, char *p) } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - PyErr_Format(PyExc_TypeError, + FORMAT_ERROR(PyExc_TypeError, "%%c requires an integer in range(256) or " "a single byte, not a bytearray object of length %zd", PyByteArray_GET_SIZE(arg)); @@ -531,23 +554,25 @@ byte_converter(PyObject *arg, char *p) } if (!(0 <= ival && ival <= 255)) { /* this includes an overflow in converting to C long */ - PyErr_SetString(PyExc_OverflowError, - "%c arg not in range(256)"); + FORMAT_ERROR(PyExc_OverflowError, + "%%c argument not in range(256)%s", ""); return 0; } *p = (char)ival; return 1; } - PyErr_Format(PyExc_TypeError, - "%%c requires an integer in range(256) or a single byte, not %T", - arg); + FORMAT_ERROR(PyExc_TypeError, + "%%c requires an integer in range(256) or " + "a single byte, not %T", + arg); return 0; } static PyObject *_PyBytes_FromBuffer(PyObject *x); static PyObject * -format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) +format_obj(PyObject *v, Py_ssize_t argidx, PyObject *key, + const char **pbuf, Py_ssize_t *plen) { PyObject *func, *result; /* is it a bytes object? */ @@ -589,10 +614,10 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) *plen = PyBytes_GET_SIZE(result); return result; } - PyErr_Format(PyExc_TypeError, + FORMAT_ERROR(PyExc_TypeError, "%%b requires a bytes-like object, " - "or an object that implements __bytes__, not '%.100s'", - Py_TYPE(v)->tp_name); + "or an object that implements __bytes__, not %T", + v); return NULL; } @@ -607,6 +632,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, Py_ssize_t fmtcnt; int args_owned = 0; PyObject *dict = NULL; + PyObject *key = NULL; if (args == NULL) { PyErr_BadInternalCall(); @@ -680,15 +706,17 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, fmtcnt--; continue; } + Py_CLEAR(key); + const char *fmtstart = fmt; if (*fmt == '(') { const char *keystart; Py_ssize_t keylen; - PyObject *key; int pcount = 1; if (dict == NULL) { - PyErr_SetString(PyExc_TypeError, - "format requires a mapping"); + PyErr_Format(PyExc_TypeError, + "format requires a mapping, not %T", + args); goto error; } ++fmt; @@ -704,8 +732,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } keylen = fmt - keystart - 1; if (fmtcnt < 0 || pcount > 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format key"); + PyErr_Format(PyExc_ValueError, + "stray %% or incomplete format key " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); goto error; } key = PyBytes_FromStringAndSize(keystart, @@ -717,13 +747,21 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, args_owned = 0; } args = PyObject_GetItem(dict, key); - Py_DECREF(key); if (args == NULL) { goto error; } args_owned = 1; - arglen = -1; - argidx = -2; + arglen = -3; + argidx = -4; + } + else { + if (arglen < -1) { + PyErr_Format(PyExc_ValueError, + "format requires a parenthesised mapping key " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); + goto error; + } } /* Parse flags. Example: "%+i" => flags=F_SIGN. */ @@ -740,17 +778,28 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, /* Parse width. Example: "%10s" => width=10 */ if (c == '*') { - v = getnextarg(args, arglen, &argidx); + if (arglen < -1) { + PyErr_Format(PyExc_ValueError, + "* cannot be used with a parenthesised mapping key " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); + goto error; + } + v = getnextarg(args, arglen, &argidx, 0); if (v == NULL) goto error; if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); + FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v); goto error; } width = PyLong_AsSsize_t(v); - if (width == -1 && PyErr_Occurred()) + if (width == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + FORMAT_ERROR(PyExc_OverflowError, + "too big for width%s", ""); + } goto error; + } if (width < 0) { flags |= F_LJUST; width = -width; @@ -765,9 +814,9 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (!Py_ISDIGIT(c)) break; if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { - PyErr_SetString( - PyExc_ValueError, - "width too big"); + PyErr_Format(PyExc_ValueError, + "width too big at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); goto error; } width = width*10 + (c - '0'); @@ -780,18 +829,29 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (--fmtcnt >= 0) c = *fmt++; if (c == '*') { - v = getnextarg(args, arglen, &argidx); + if (arglen < -1) { + PyErr_Format(PyExc_ValueError, + "* cannot be used with a parenthesised mapping key " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); + goto error; + } + v = getnextarg(args, arglen, &argidx, 0); if (v == NULL) goto error; if (!PyLong_Check(v)) { - PyErr_SetString( - PyExc_TypeError, - "* wants int"); + FORMAT_ERROR(PyExc_TypeError, + "* requires int, not %T", v); goto error; } prec = PyLong_AsInt(v); - if (prec == -1 && PyErr_Occurred()) + if (prec == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + FORMAT_ERROR(PyExc_OverflowError, + "too big for precision%s", ""); + } goto error; + } if (prec < 0) prec = 0; if (--fmtcnt >= 0) @@ -804,9 +864,9 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (!Py_ISDIGIT(c)) break; if (prec > (INT_MAX - ((int)c - '0')) / 10) { - PyErr_SetString( - PyExc_ValueError, - "prec too big"); + PyErr_Format(PyExc_ValueError, + "precision too big at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); goto error; } prec = prec*10 + (c - '0'); @@ -820,11 +880,12 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } } if (fmtcnt < 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format"); + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd", + (Py_ssize_t)(fmtstart - format - 1)); goto error; } - v = getnextarg(args, arglen, &argidx); + v = getnextarg(args, arglen, &argidx, 1); if (v == NULL) goto error; @@ -852,7 +913,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, case 's': // %s is only for 2/3 code; 3 only code should use %b case 'b': - temp = format_obj(v, &pbuf, &len); + temp = format_obj(v, argidx, key, &pbuf, &len); if (temp == NULL) goto error; if (prec >= 0 && len > prec) @@ -900,7 +961,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, continue; } - temp = formatlong(v, flags, prec, c); + temp = formatlong(v, argidx, key, flags, prec, c); if (!temp) goto error; assert(PyUnicode_IS_ASCII(temp)); @@ -921,13 +982,13 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, && !(flags & (F_SIGN | F_BLANK))) { /* Fast path */ - res = formatfloat(v, flags, prec, c, NULL, writer, res); + res = formatfloat(v, argidx, key, flags, prec, c, NULL, writer, res); if (res == NULL) goto error; continue; } - if (!formatfloat(v, flags, prec, c, &temp, NULL, res)) + if (!formatfloat(v, argidx, key, flags, prec, c, &temp, NULL, res)) goto error; pbuf = PyBytes_AS_STRING(temp); len = PyBytes_GET_SIZE(temp); @@ -938,7 +999,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, case 'c': pbuf = &onechar; - len = byte_converter(v, &onechar); + len = byte_converter(v, argidx, key, &onechar); if (!len) goto error; if (width == -1) { @@ -949,11 +1010,36 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, break; default: - PyErr_Format(PyExc_ValueError, - "unsupported format character '%c' (0x%x) " - "at index %zd", - c, c, - (Py_ssize_t)(fmt - 1 - format)); + if (Py_ISALPHA(c)) { + PyErr_Format(PyExc_ValueError, + "unsupported format %%%c at position %zd", + c, (Py_ssize_t)(fmtstart - format - 1)); + } + else if (c == '\'') { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character \"'\" " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1), + (Py_ssize_t)(fmt - format - 1)); + } + else if (c >= 32 && c < 127 && c != '\'') { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character '%c' " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1), + c, (Py_ssize_t)(fmt - format - 1)); + } + else { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character with code 0x%02x " + "at position %zd", + (Py_ssize_t)(fmtstart - format - 1), + Py_CHARMASK(c), + (Py_ssize_t)(fmt - format - 1)); + } goto error; } @@ -1042,6 +1128,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } if (dict && (argidx < arglen)) { + // XXX: Never happens? PyErr_SetString(PyExc_TypeError, "not all arguments converted during bytes formatting"); Py_XDECREF(temp); @@ -1061,8 +1148,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* until end */ if (argidx < arglen && !dict) { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during bytes formatting"); + PyErr_Format(PyExc_TypeError, + "not all arguments converted during bytes formatting " + "(required %zd, got %zd)", + arglen < 0 ? 0 : argidx, + arglen < 0 ? 1 : arglen); goto error; } @@ -1072,6 +1162,7 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, return PyBytesWriter_FinishWithPointer(writer, res); error: + Py_XDECREF(key); PyBytesWriter_Discard(writer); if (args_owned) { Py_DECREF(args); diff --git a/Objects/unicode_format.c b/Objects/unicode_format.c index 26bdae55d8b931..e2790c8c1d4343 100644 --- a/Objects/unicode_format.c +++ b/Objects/unicode_format.c @@ -72,23 +72,44 @@ struct unicode_format_arg_t { Py_ssize_t width; int prec; int sign; + Py_ssize_t fmtstart; + PyObject *key; }; +// Use FORMAT_ERROR("...%s", "") when there is no arguments. +#define FORMAT_ERROR(EXC, FMT, ...) do { \ + if (arg->key != NULL) { \ + PyErr_Format((EXC), "format argument %R: " FMT, \ + arg->key, __VA_ARGS__); \ + } \ + else if (ctx->argidx >= 0) { \ + PyErr_Format((EXC), "format argument %zd: " FMT, \ + ctx->argidx, __VA_ARGS__); \ + } \ + else { \ + PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__); \ + } \ +} while (0) + + static PyObject * -unicode_format_getnextarg(struct unicode_formatter_t *ctx) +unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone) { Py_ssize_t argidx = ctx->argidx; - if (argidx < ctx->arglen) { + if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) { ctx->argidx++; - if (ctx->arglen < 0) - return ctx->args; - else + if (ctx->arglen >= 0) { return PyTuple_GetItem(ctx->args, argidx); + } + else if (allowone) { + return ctx->args; + } } - PyErr_SetString(PyExc_TypeError, - "not enough arguments for format string"); + PyErr_Format(PyExc_TypeError, + "not enough arguments for format string (got %zd)", + ctx->arglen < 0 ? 1 : ctx->arglen); return NULL; } @@ -100,7 +121,9 @@ unicode_format_getnextarg(struct unicode_formatter_t *ctx) Return 0 on success, raise an exception and return -1 on error. */ static int -formatfloat(PyObject *v, struct unicode_format_arg_t *arg, +formatfloat(PyObject *v, + struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg, PyObject **p_output, _PyUnicodeWriter *writer) { @@ -111,8 +134,14 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg, int dtoa_flags = 0; x = PyFloat_AsDouble(v); - if (x == -1.0 && PyErr_Occurred()) + if (x == -1.0 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + FORMAT_ERROR(PyExc_TypeError, + "%%%c requires a real number, not %T", + arg->ch, v); + } return -1; + } prec = arg->prec; if (prec < 0) @@ -287,6 +316,7 @@ _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) * -1 and raise an exception on error */ static int mainformatlong(PyObject *v, + struct unicode_formatter_t *ctx, struct unicode_format_arg_t *arg, PyObject **p_output, _PyUnicodeWriter *writer) @@ -364,16 +394,14 @@ mainformatlong(PyObject *v, case 'o': case 'x': case 'X': - PyErr_Format(PyExc_TypeError, - "%%%c format: an integer is required, " - "not %.200s", - type, Py_TYPE(v)->tp_name); + FORMAT_ERROR(PyExc_TypeError, + "%%%c requires an integer, not %T", + arg->ch, v); break; default: - PyErr_Format(PyExc_TypeError, - "%%%c format: a real number is required, " - "not %.200s", - type, Py_TYPE(v)->tp_name); + FORMAT_ERROR(PyExc_TypeError, + "%%%c requires a real number, not %T", + arg->ch, v); break; } return -1; @@ -381,15 +409,17 @@ mainformatlong(PyObject *v, static Py_UCS4 -formatchar(PyObject *v) +formatchar(PyObject *v, + struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg) { /* presume that the buffer is at least 3 characters long */ if (PyUnicode_Check(v)) { if (PyUnicode_GET_LENGTH(v) == 1) { return PyUnicode_READ_CHAR(v, 0); } - PyErr_Format(PyExc_TypeError, - "%%c requires an int or a unicode character, " + FORMAT_ERROR(PyExc_TypeError, + "%%c requires an integer or a unicode character, " "not a string of length %zd", PyUnicode_GET_LENGTH(v)); return (Py_UCS4) -1; @@ -399,18 +429,18 @@ formatchar(PyObject *v) long x = PyLong_AsLongAndOverflow(v, &overflow); if (x == -1 && PyErr_Occurred()) { if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Format(PyExc_TypeError, - "%%c requires an int or a unicode character, not %T", + FORMAT_ERROR(PyExc_TypeError, + "%%c requires an integer or a unicode character, " + "not %T", v); - return (Py_UCS4) -1; } return (Py_UCS4) -1; } if (x < 0 || x > MAX_UNICODE) { /* this includes an overflow in converting to C long */ - PyErr_SetString(PyExc_OverflowError, - "%c arg not in range(0x110000)"); + FORMAT_ERROR(PyExc_OverflowError, + "%%c argument not in range(0x110000)%s", ""); return (Py_UCS4) -1; } @@ -438,12 +468,12 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, /* Get argument value from a dictionary. Example: "%(name)s". */ Py_ssize_t keystart; Py_ssize_t keylen; - PyObject *key; int pcount = 1; if (ctx->dict == NULL) { - PyErr_SetString(PyExc_TypeError, - "format requires a mapping"); + PyErr_Format(PyExc_TypeError, + "format requires a mapping, not %T", + ctx->args); return -1; } ++ctx->fmtpos; @@ -460,25 +490,34 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, } keylen = ctx->fmtpos - keystart - 1; if (ctx->fmtcnt < 0 || pcount > 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format key"); + PyErr_Format(PyExc_ValueError, + "stray %% or incomplete format key at position %zd", + arg->fmtstart); return -1; } - key = PyUnicode_Substring(ctx->fmtstr, - keystart, keystart + keylen); - if (key == NULL) + arg->key = PyUnicode_Substring(ctx->fmtstr, + keystart, keystart + keylen); + if (arg->key == NULL) return -1; if (ctx->args_owned) { ctx->args_owned = 0; Py_DECREF(ctx->args); } - ctx->args = PyObject_GetItem(ctx->dict, key); - Py_DECREF(key); + ctx->args = PyObject_GetItem(ctx->dict, arg->key); if (ctx->args == NULL) return -1; ctx->args_owned = 1; - ctx->arglen = -1; - ctx->argidx = -2; + ctx->arglen = -3; + ctx->argidx = -4; + } + else { + if (ctx->arglen < -1) { + PyErr_Format(PyExc_ValueError, + "format requires a parenthesised mapping key " + "at position %zd", + arg->fmtstart); + return -1; + } } /* Parse flags. Example: "%+i" => flags=F_SIGN. */ @@ -497,17 +536,28 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, /* Parse width. Example: "%10s" => width=10 */ if (arg->ch == '*') { - v = unicode_format_getnextarg(ctx); + if (ctx->arglen < -1) { + PyErr_Format(PyExc_ValueError, + "* cannot be used with a parenthesised mapping key " + "at position %zd", + arg->fmtstart); + return -1; + } + v = unicode_format_getnextarg(ctx, 0); if (v == NULL) return -1; if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); + FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v); return -1; } arg->width = PyLong_AsSsize_t(v); - if (arg->width == -1 && PyErr_Occurred()) + if (arg->width == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + FORMAT_ERROR(PyExc_OverflowError, + "too big for width%s", ""); + } return -1; + } if (arg->width < 0) { arg->flags |= F_LJUST; arg->width = -arg->width; @@ -528,8 +578,9 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, mixing signed and unsigned comparison. Since arg->ch is between '0' and '9', casting to int is safe. */ if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "width too big"); + PyErr_Format(PyExc_ValueError, + "width too big at position %zd", + arg->fmtstart); return -1; } arg->width = arg->width*10 + (arg->ch - '0'); @@ -544,17 +595,28 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, ctx->fmtpos++; } if (arg->ch == '*') { - v = unicode_format_getnextarg(ctx); + if (ctx->arglen < -1) { + PyErr_Format(PyExc_ValueError, + "* cannot be used with a parenthesised mapping key " + "at position %zd", + arg->fmtstart); + return -1; + } + v = unicode_format_getnextarg(ctx, 0); if (v == NULL) return -1; if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); + FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v); return -1; } arg->prec = PyLong_AsInt(v); - if (arg->prec == -1 && PyErr_Occurred()) + if (arg->prec == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + FORMAT_ERROR(PyExc_OverflowError, + "too big for precision%s", ""); + } return -1; + } if (arg->prec < 0) arg->prec = 0; if (--ctx->fmtcnt >= 0) { @@ -570,8 +632,9 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, if (arg->ch < '0' || arg->ch > '9') break; if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "precision too big"); + PyErr_Format(PyExc_ValueError, + "precision too big at position %zd", + arg->fmtstart); return -1; } arg->prec = arg->prec*10 + (arg->ch - '0'); @@ -589,8 +652,8 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, } } if (ctx->fmtcnt < 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format"); + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd", arg->fmtstart); return -1; } return 0; @@ -624,7 +687,7 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, if (ctx->fmtcnt == 0) ctx->writer.overallocate = 0; - v = unicode_format_getnextarg(ctx); + v = unicode_format_getnextarg(ctx, 1); if (v == NULL) return -1; @@ -660,7 +723,7 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, case 'x': case 'X': { - int ret = mainformatlong(v, arg, p_str, writer); + int ret = mainformatlong(v, ctx, arg, p_str, writer); if (ret != 0) return ret; arg->sign = 1; @@ -677,19 +740,19 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, && !(arg->flags & (F_SIGN | F_BLANK))) { /* Fast path */ - if (formatfloat(v, arg, NULL, writer) == -1) + if (formatfloat(v, ctx, arg, NULL, writer) == -1) return -1; return 1; } arg->sign = 1; - if (formatfloat(v, arg, p_str, NULL) == -1) + if (formatfloat(v, ctx, arg, p_str, NULL) == -1) return -1; break; case 'c': { - Py_UCS4 ch = formatchar(v); + Py_UCS4 ch = formatchar(v, ctx, arg); if (ch == (Py_UCS4) -1) return -1; if (arg->width == -1 && arg->prec == -1) { @@ -703,12 +766,38 @@ unicode_format_arg_format(struct unicode_formatter_t *ctx, } default: - PyErr_Format(PyExc_ValueError, - "unsupported format character '%c' (0x%x) " - "at index %zd", - (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?', - (int)arg->ch, - ctx->fmtpos - 1); + if (arg->ch < 128 && Py_ISALPHA(arg->ch)) { + PyErr_Format(PyExc_ValueError, + "unsupported format %%%c at position %zd", + (int)arg->ch, arg->fmtstart); + } + else if (arg->ch == '\'') { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character \"'\" at position %zd", + arg->fmtstart, + ctx->fmtpos - 1); + } + else if (arg->ch >= 32 && arg->ch < 127) { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character '%c' at position %zd", + arg->fmtstart, + (int)arg->ch, ctx->fmtpos - 1); + } + else if (Py_UNICODE_ISPRINTABLE(arg->ch)) { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character '%c' (U+%04X) at position %zd", + arg->fmtstart, + (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1); + } + else { + PyErr_Format(PyExc_ValueError, + "stray %% at position %zd or unexpected " + "format character U+%04X at position %zd", + arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1); + } return -1; } if (*p_str == NULL) @@ -892,29 +981,40 @@ unicode_format_arg(struct unicode_formatter_t *ctx) arg.width = -1; arg.prec = -1; arg.sign = 0; + arg.fmtstart = ctx->fmtpos - 1; + arg.key = NULL; str = NULL; ret = unicode_format_arg_parse(ctx, &arg); - if (ret == -1) - return -1; + if (ret == -1) { + goto onError; + } ret = unicode_format_arg_format(ctx, &arg, &str); - if (ret == -1) - return -1; + if (ret == -1) { + goto onError; + } if (ret != 1) { ret = unicode_format_arg_output(ctx, &arg, str); Py_DECREF(str); - if (ret == -1) - return -1; + if (ret == -1) { + goto onError; + } } if (ctx->dict && (ctx->argidx < ctx->arglen)) { + // XXX: Never happens? PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); - return -1; + goto onError; } + Py_XDECREF(arg.key); return 0; + + onError: + Py_XDECREF(arg.key); + return -1; } @@ -983,8 +1083,11 @@ PyUnicode_Format(PyObject *format, PyObject *args) } if (ctx.argidx < ctx.arglen && !ctx.dict) { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); + PyErr_Format(PyExc_TypeError, + "not all arguments converted during string formatting " + "(required %zd, got %zd)", + ctx.arglen < 0 ? 0 : ctx.argidx, + ctx.arglen < 0 ? 1 : ctx.arglen); goto onError; }