Skip to content

Commit 02a472a

Browse files
authored
[mypyc] Optionally log a sampled operation trace to a file (#19457)
Logging executed ops is useful for performance analysis. For example, we can look for functions which perform many slow operations and try to optimize them. I've already used this successfully to implement several optimizations. A typical optimization that this helps with is replacing a generic Python function call operation with a native call. This has also helped me identify inefficient code generated by mypyc. Compile using `MYPYC_LOG_TRACE=1 mypyc ...` to enable trace logging. The log will be written to `mypyc_trace.txt`. Roughly 1/1000 of ops of certain kinds (e.g. primitive calls) are logged. This can also be enabled by passing `log_trace=True` to `mypycify`. Compared to profiling, this logging data is frequency-oriented instead of CPU time oriented, and it's mostly helpful for micro-optimizations. It also needs some understanding of mypyc internals to be useful. It's not generally possible to reconstruct call stacks from the event data (but we could improve this). However, there is very little noise in the data and even small improvements can be visible. Logging isn't impacted by C compiler optimizations, so for a faster iteration loop, optimizations can be disabled. In the future this could possibly be used for profile-guided optimizations, but we'd probably need to adjust the data collection a bit for this use case. This is currently not documented and mostly intended for mypy or mypyc maintainers for now. Also no tests yet, since this is not a user-evel feature and it's disabled by default. Random example of log entries from mypy self check: ``` mypy.typeops.TypeVarExtractor._merge:1146:call_c:CPyList_Extend mypy.semanal.SemanticAnalyzer.lookup::primitive_op:list_get_item_unsafe mypy.expandtype.ExpandTypeVisitor.visit_type_var__TypeVisitor_glue:239:call:mypy.expandtype.ExpandTypeVisitor.visit_type_var mypy.applytype.apply_generic_arguments:111:call_c:CPy_NoErrOccurred mypy.indirection.TypeIndirectionVisitor.visit_callable_type__TypeVisitor_glue:118:call:mypy.indirection.TypeIndirectionVisitor.visit_callable_type mypy.fastparse.ASTConverter.visit_Call::primitive_op:buf_init_item mypy.semanal.SemanticAnalyzer.is_func_scope::primitive_op:int_eq mypy.meet.is_overlapping_types:397:call:mypy.meet._is_subtype_is_overlapping_types_obj mypy.types.CallableType.serialize:2287:call_c:CPyList_SetItemUnsafe mypy.checkexpr.ExpressionChecker.check_argument_types:2576:call_c:CPyList_SetItemUnsafe ``` For example, let's look at this line: ``` mypy.typeops.TypeVarExtractor._merge:1146:call_c:CPyList_Extend ``` In method `TypeVarExtractor._merge`, on line 1146 of `mypy/typeops.py`, the C primitive CPyList_Extend was called (corresponds to `list.extend`). I'll later add some documentation to the wiki or other developer docs and give examples of using and analyzing the data.
1 parent eb07c06 commit 02a472a

File tree

8 files changed

+159
-3
lines changed

8 files changed

+159
-3
lines changed

mypyc/__main__.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,15 @@
2323
from setuptools import setup
2424
from mypyc.build import mypycify
2525
26-
setup(name='mypyc_output',
27-
ext_modules=mypycify({}, opt_level="{}", debug_level="{}", strict_dunder_typing={}),
26+
setup(
27+
name='mypyc_output',
28+
ext_modules=mypycify(
29+
{},
30+
opt_level="{}",
31+
debug_level="{}",
32+
strict_dunder_typing={},
33+
log_trace={},
34+
),
2835
)
2936
"""
3037

@@ -39,10 +46,17 @@ def main() -> None:
3946
opt_level = os.getenv("MYPYC_OPT_LEVEL", "3")
4047
debug_level = os.getenv("MYPYC_DEBUG_LEVEL", "1")
4148
strict_dunder_typing = bool(int(os.getenv("MYPYC_STRICT_DUNDER_TYPING", "0")))
49+
# If enabled, compiled code writes a sampled log of executed ops (or events) to
50+
# mypyc_trace.txt.
51+
log_trace = bool(int(os.getenv("MYPYC_LOG_TRACE", "0")))
4252

4353
setup_file = os.path.join(build_dir, "setup.py")
4454
with open(setup_file, "w") as f:
45-
f.write(setup_format.format(sys.argv[1:], opt_level, debug_level, strict_dunder_typing))
55+
f.write(
56+
setup_format.format(
57+
sys.argv[1:], opt_level, debug_level, strict_dunder_typing, log_trace
58+
)
59+
)
4660

4761
# We don't use run_setup (like we do in the test suite) because it throws
4862
# away the error code from distutils, and we don't care about the slight

mypyc/build.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,7 @@ def mypycify(
485485
include_runtime_files: bool | None = None,
486486
strict_dunder_typing: bool = False,
487487
group_name: str | None = None,
488+
log_trace: bool = False,
488489
) -> list[Extension]:
489490
"""Main entry point to building using mypyc.
490491
@@ -531,6 +532,10 @@ def mypycify(
531532
the hash of module names. This is used for the names of the
532533
output C files and the shared library. This is only supported
533534
if there is a single group. [Experimental]
535+
log_trace: If True, compiled code writes a trace log of events in
536+
mypyc_trace.txt (derived from executed operations). This is
537+
useful for performance analysis, such as analyzing which
538+
primitive ops are used the most and on which lines.
534539
"""
535540

536541
# Figure out our configuration
@@ -543,6 +548,7 @@ def mypycify(
543548
include_runtime_files=include_runtime_files,
544549
strict_dunder_typing=strict_dunder_typing,
545550
group_name=group_name,
551+
log_trace=log_trace,
546552
)
547553

548554
# Generate all the actual important C code
@@ -583,6 +589,8 @@ def mypycify(
583589
# See https://github.com/mypyc/mypyc/issues/956
584590
"-Wno-cpp",
585591
]
592+
if log_trace:
593+
cflags.append("-DMYPYC_LOG_TRACE")
586594
elif compiler.compiler_type == "msvc":
587595
# msvc doesn't have levels, '/O2' is full and '/Od' is disable
588596
if opt_level == "0":
@@ -607,6 +615,8 @@ def mypycify(
607615
# that we actually get the compilation speed and memory
608616
# use wins that multi-file mode is intended for.
609617
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
618+
if log_trace:
619+
cflags.append("/DMYPYC_LOG_TRACE")
610620

611621
# If configured to (defaults to yes in multi-file mode), copy the
612622
# runtime library in. Otherwise it just gets #included to save on

mypyc/codegen/emitmodule.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
from mypyc.transform.copy_propagation import do_copy_propagation
6666
from mypyc.transform.exceptions import insert_exception_handling
6767
from mypyc.transform.flag_elimination import do_flag_elimination
68+
from mypyc.transform.log_trace import insert_event_trace_logging
6869
from mypyc.transform.lower import lower_ir
6970
from mypyc.transform.refcount import insert_ref_count_opcodes
7071
from mypyc.transform.spill import insert_spills
@@ -253,6 +254,9 @@ def compile_scc_to_ir(
253254
if fn in env_user_functions:
254255
insert_spills(fn, env_user_functions[fn])
255256

257+
if compiler_options.log_trace:
258+
insert_event_trace_logging(fn, compiler_options)
259+
256260
# Switch to lower abstraction level IR.
257261
lower_ir(fn, compiler_options)
258262
# Perform optimizations.

mypyc/lib-rt/CPy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,7 @@ PyObject *CPySingledispatch_RegisterFunction(PyObject *singledispatch_func, PyOb
929929
PyObject *CPy_GetAIter(PyObject *obj);
930930
PyObject *CPy_GetANext(PyObject *aiter);
931931
void CPy_SetTypeAliasTypeComputeFunction(PyObject *alias, PyObject *compute_value);
932+
void CPyTrace_LogEvent(const char *location, const char *line, const char *op, const char *details);
932933

933934
#ifdef __cplusplus
934935
}

mypyc/lib-rt/misc_ops.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,34 @@ PyObject *CPy_GetANext(PyObject *aiter)
10301030
return NULL;
10311031
}
10321032

1033+
#ifdef MYPYC_LOG_TRACE
1034+
1035+
// This is only compiled in if trace logging is enabled by user
1036+
1037+
static int TraceCounter = 0;
1038+
static const int TRACE_EVERY_NTH = 1009; // Should be a prime number
1039+
#define TRACE_LOG_FILE_NAME "mypyc_trace.txt"
1040+
static FILE *TraceLogFile = NULL;
1041+
1042+
// Log a tracing event on every Nth call
1043+
void CPyTrace_LogEvent(const char *location, const char *line, const char *op, const char *details) {
1044+
if (TraceLogFile == NULL) {
1045+
if ((TraceLogFile = fopen(TRACE_LOG_FILE_NAME, "w")) == NULL) {
1046+
fprintf(stderr, "error: Could not open trace file %s\n", TRACE_LOG_FILE_NAME);
1047+
abort();
1048+
}
1049+
}
1050+
if (TraceCounter == 0) {
1051+
fprintf(TraceLogFile, "%s:%s:%s:%s\n", location, line, op, details);
1052+
}
1053+
TraceCounter++;
1054+
if (TraceCounter == TRACE_EVERY_NTH) {
1055+
TraceCounter = 0;
1056+
}
1057+
}
1058+
1059+
#endif
1060+
10331061
#ifdef CPY_3_12_FEATURES
10341062

10351063
// Copied from Python 3.12.3, since this struct is internal to CPython. It defines

mypyc/options.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def __init__(
1616
python_version: tuple[int, int] | None = None,
1717
strict_dunder_typing: bool = False,
1818
group_name: str | None = None,
19+
log_trace: bool = False,
1920
) -> None:
2021
self.strip_asserts = strip_asserts
2122
self.multi_file = multi_file
@@ -45,3 +46,7 @@ def __init__(
4546
# library is generated (with shims). This can be used to make the output
4647
# file names more predictable.
4748
self.group_name = group_name
49+
# If enabled, write a trace log of events based on executed operations to
50+
# mypyc_trace.txt when compiled module is executed. This is useful for
51+
# performance analysis.
52+
self.log_trace = log_trace

mypyc/primitives/misc_ops.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
c_int_rprimitive,
1010
c_pointer_rprimitive,
1111
c_pyssize_t_rprimitive,
12+
cstring_rprimitive,
1213
dict_rprimitive,
1314
int_rprimitive,
1415
object_pointer_rprimitive,
@@ -300,3 +301,13 @@
300301
return_type=void_rtype,
301302
error_kind=ERR_NEVER,
302303
)
304+
305+
# Log an event to a trace log, which is written to a file during execution.
306+
log_trace_event = custom_primitive_op(
307+
name="log_trace_event",
308+
c_function_name="CPyTrace_LogEvent",
309+
# (fullname of function/location, line number, operation name, operation details)
310+
arg_types=[cstring_rprimitive, cstring_rprimitive, cstring_rprimitive, cstring_rprimitive],
311+
return_type=void_rtype,
312+
error_kind=ERR_NEVER,
313+
)

mypyc/transform/log_trace.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""This optional pass adds logging of various executed operations.
2+
3+
Some subset of the executed operations are logged to the mypyc_trace.txt file.
4+
5+
This is useful for performance analysis. For example, it's possible
6+
to identify how frequently various primitive functions are called,
7+
and in which code locations they are called.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
from mypyc.ir.func_ir import FuncIR
13+
from mypyc.ir.ops import Call, CallC, CString, LoadLiteral, LoadStatic, Op, PrimitiveOp, Value
14+
from mypyc.irbuild.ll_builder import LowLevelIRBuilder
15+
from mypyc.options import CompilerOptions
16+
from mypyc.primitives.misc_ops import log_trace_event
17+
from mypyc.transform.ir_transform import IRTransform
18+
19+
20+
def insert_event_trace_logging(fn: FuncIR, options: CompilerOptions) -> None:
21+
builder = LowLevelIRBuilder(None, options)
22+
transform = LogTraceEventTransform(builder, fn.decl.fullname)
23+
transform.transform_blocks(fn.blocks)
24+
fn.blocks = builder.blocks
25+
26+
27+
def get_load_global_name(op: CallC) -> str | None:
28+
name = op.function_name
29+
if name == "CPyDict_GetItem":
30+
arg = op.args[0]
31+
if (
32+
isinstance(arg, LoadStatic)
33+
and arg.namespace == "static"
34+
and arg.identifier == "globals"
35+
and isinstance(op.args[1], LoadLiteral)
36+
):
37+
return str(op.args[1].value)
38+
return None
39+
40+
41+
class LogTraceEventTransform(IRTransform):
42+
def __init__(self, builder: LowLevelIRBuilder, fullname: str) -> None:
43+
super().__init__(builder)
44+
self.fullname = fullname.encode("utf-8")
45+
46+
def visit_call(self, op: Call) -> Value:
47+
# TODO: Use different op name when constructing an instance
48+
return self.log(op, "call", op.fn.fullname)
49+
50+
def visit_primitive_op(self, op: PrimitiveOp) -> Value:
51+
return self.log(op, "primitive_op", op.desc.name)
52+
53+
def visit_call_c(self, op: CallC) -> Value:
54+
if global_name := get_load_global_name(op):
55+
return self.log(op, "globals_dict_get_item", global_name)
56+
57+
func_name = op.function_name
58+
if func_name == "PyObject_Vectorcall" and isinstance(op.args[0], CallC):
59+
if global_name := get_load_global_name(op.args[0]):
60+
return self.log(op, "python_call_global", global_name)
61+
elif func_name == "CPyObject_GetAttr" and isinstance(op.args[1], LoadLiteral):
62+
return self.log(op, "python_get_attr", str(op.args[1].value))
63+
elif func_name == "PyObject_VectorcallMethod" and isinstance(op.args[0], LoadLiteral):
64+
return self.log(op, "python_call_method", str(op.args[0].value))
65+
66+
return self.log(op, "call_c", func_name)
67+
68+
def log(self, op: Op, name: str, details: str) -> Value:
69+
if op.line >= 0:
70+
line_str = str(op.line)
71+
else:
72+
line_str = ""
73+
self.builder.primitive_op(
74+
log_trace_event,
75+
[
76+
CString(self.fullname),
77+
CString(line_str.encode("ascii")),
78+
CString(name.encode("utf-8")),
79+
CString(details.encode("utf-8")),
80+
],
81+
op.line,
82+
)
83+
return self.add(op)

0 commit comments

Comments
 (0)